aboutsummaryrefslogtreecommitdiff
path: root/src/ARMeilleure
diff options
context:
space:
mode:
authorTSR Berry <20988865+TSRBerry@users.noreply.github.com>2023-04-08 01:22:00 +0200
committerMary <thog@protonmail.com>2023-04-27 23:51:14 +0200
commitcee712105850ac3385cd0091a923438167433f9f (patch)
tree4a5274b21d8b7f938c0d0ce18736d3f2993b11b1 /src/ARMeilleure
parentcd124bda587ef09668a971fa1cac1c3f0cfc9f21 (diff)
Move solution and projects to src
Diffstat (limited to 'src/ARMeilleure')
-rw-r--r--src/ARMeilleure/ARMeilleure.csproj26
-rw-r--r--src/ARMeilleure/Allocators.cs42
-rw-r--r--src/ARMeilleure/CodeGen/Arm64/Arm64Optimizer.cs270
-rw-r--r--src/ARMeilleure/CodeGen/Arm64/ArmCondition.cs47
-rw-r--r--src/ARMeilleure/CodeGen/Arm64/ArmExtensionType.cs14
-rw-r--r--src/ARMeilleure/CodeGen/Arm64/ArmShiftType.cs11
-rw-r--r--src/ARMeilleure/CodeGen/Arm64/Assembler.cs1160
-rw-r--r--src/ARMeilleure/CodeGen/Arm64/CallingConvention.cs96
-rw-r--r--src/ARMeilleure/CodeGen/Arm64/CodeGenCommon.cs91
-rw-r--r--src/ARMeilleure/CodeGen/Arm64/CodeGenContext.cs287
-rw-r--r--src/ARMeilleure/CodeGen/Arm64/CodeGenerator.cs1580
-rw-r--r--src/ARMeilleure/CodeGen/Arm64/CodeGeneratorIntrinsic.cs662
-rw-r--r--src/ARMeilleure/CodeGen/Arm64/HardwareCapabilities.cs185
-rw-r--r--src/ARMeilleure/CodeGen/Arm64/IntrinsicInfo.cs14
-rw-r--r--src/ARMeilleure/CodeGen/Arm64/IntrinsicTable.cs463
-rw-r--r--src/ARMeilleure/CodeGen/Arm64/IntrinsicType.cs59
-rw-r--r--src/ARMeilleure/CodeGen/Arm64/PreAllocator.cs892
-rw-r--r--src/ARMeilleure/CodeGen/CompiledFunction.cs68
-rw-r--r--src/ARMeilleure/CodeGen/Linking/RelocEntry.cs38
-rw-r--r--src/ARMeilleure/CodeGen/Linking/RelocInfo.cs32
-rw-r--r--src/ARMeilleure/CodeGen/Linking/Symbol.cs99
-rw-r--r--src/ARMeilleure/CodeGen/Linking/SymbolType.cs28
-rw-r--r--src/ARMeilleure/CodeGen/Optimizations/BlockPlacement.cs72
-rw-r--r--src/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs346
-rw-r--r--src/ARMeilleure/CodeGen/Optimizations/Optimizer.cs252
-rw-r--r--src/ARMeilleure/CodeGen/Optimizations/Simplification.cs183
-rw-r--r--src/ARMeilleure/CodeGen/Optimizations/TailMerge.cs83
-rw-r--r--src/ARMeilleure/CodeGen/PreAllocatorCommon.cs57
-rw-r--r--src/ARMeilleure/CodeGen/RegisterAllocators/AllocationResult.cs19
-rw-r--r--src/ARMeilleure/CodeGen/RegisterAllocators/CopyResolver.cs259
-rw-r--r--src/ARMeilleure/CodeGen/RegisterAllocators/HybridAllocator.cs454
-rw-r--r--src/ARMeilleure/CodeGen/RegisterAllocators/IRegisterAllocator.cs12
-rw-r--r--src/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs1101
-rw-r--r--src/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs396
-rw-r--r--src/ARMeilleure/CodeGen/RegisterAllocators/LiveIntervalList.cs40
-rw-r--r--src/ARMeilleure/CodeGen/RegisterAllocators/LiveRange.cs74
-rw-r--r--src/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs50
-rw-r--r--src/ARMeilleure/CodeGen/RegisterAllocators/StackAllocator.cs25
-rw-r--r--src/ARMeilleure/CodeGen/RegisterAllocators/UseList.cs84
-rw-r--r--src/ARMeilleure/CodeGen/Unwinding/UnwindInfo.cs16
-rw-r--r--src/ARMeilleure/CodeGen/Unwinding/UnwindPseudoOp.cs11
-rw-r--r--src/ARMeilleure/CodeGen/Unwinding/UnwindPushEntry.cs20
-rw-r--r--src/ARMeilleure/CodeGen/X86/Assembler.cs1559
-rw-r--r--src/ARMeilleure/CodeGen/X86/AssemblerTable.cs295
-rw-r--r--src/ARMeilleure/CodeGen/X86/CallConvName.cs8
-rw-r--r--src/ARMeilleure/CodeGen/X86/CallingConvention.cs158
-rw-r--r--src/ARMeilleure/CodeGen/X86/CodeGenCommon.cs19
-rw-r--r--src/ARMeilleure/CodeGen/X86/CodeGenContext.cs105
-rw-r--r--src/ARMeilleure/CodeGen/X86/CodeGenerator.cs1865
-rw-r--r--src/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs144
-rw-r--r--src/ARMeilleure/CodeGen/X86/IntrinsicInfo.cs14
-rw-r--r--src/ARMeilleure/CodeGen/X86/IntrinsicTable.cs200
-rw-r--r--src/ARMeilleure/CodeGen/X86/IntrinsicType.cs18
-rw-r--r--src/ARMeilleure/CodeGen/X86/Mxcsr.cs15
-rw-r--r--src/ARMeilleure/CodeGen/X86/PreAllocator.cs796
-rw-r--r--src/ARMeilleure/CodeGen/X86/PreAllocatorSystemV.cs334
-rw-r--r--src/ARMeilleure/CodeGen/X86/PreAllocatorWindows.cs327
-rw-r--r--src/ARMeilleure/CodeGen/X86/X86Condition.cs47
-rw-r--r--src/ARMeilleure/CodeGen/X86/X86Instruction.cs231
-rw-r--r--src/ARMeilleure/CodeGen/X86/X86Optimizer.cs259
-rw-r--r--src/ARMeilleure/CodeGen/X86/X86Register.cs41
-rw-r--r--src/ARMeilleure/Common/AddressTable.cs252
-rw-r--r--src/ARMeilleure/Common/Allocator.cs24
-rw-r--r--src/ARMeilleure/Common/ArenaAllocator.cs187
-rw-r--r--src/ARMeilleure/Common/BitMap.cs222
-rw-r--r--src/ARMeilleure/Common/BitUtils.cs57
-rw-r--r--src/ARMeilleure/Common/Counter.cs98
-rw-r--r--src/ARMeilleure/Common/EntryTable.cs188
-rw-r--r--src/ARMeilleure/Common/EnumUtils.cs12
-rw-r--r--src/ARMeilleure/Common/NativeAllocator.cs27
-rw-r--r--src/ARMeilleure/Decoders/Block.cs101
-rw-r--r--src/ARMeilleure/Decoders/Condition.cs32
-rw-r--r--src/ARMeilleure/Decoders/DataOp.cs10
-rw-r--r--src/ARMeilleure/Decoders/Decoder.cs391
-rw-r--r--src/ARMeilleure/Decoders/DecoderHelper.cs167
-rw-r--r--src/ARMeilleure/Decoders/DecoderMode.cs9
-rw-r--r--src/ARMeilleure/Decoders/IOpCode.cs17
-rw-r--r--src/ARMeilleure/Decoders/IOpCode32.cs9
-rw-r--r--src/ARMeilleure/Decoders/IOpCode32Adr.cs9
-rw-r--r--src/ARMeilleure/Decoders/IOpCode32Alu.cs8
-rw-r--r--src/ARMeilleure/Decoders/IOpCode32AluBf.cs14
-rw-r--r--src/ARMeilleure/Decoders/IOpCode32AluImm.cs9
-rw-r--r--src/ARMeilleure/Decoders/IOpCode32AluImm16.cs7
-rw-r--r--src/ARMeilleure/Decoders/IOpCode32AluMla.cs11
-rw-r--r--src/ARMeilleure/Decoders/IOpCode32AluReg.cs7
-rw-r--r--src/ARMeilleure/Decoders/IOpCode32AluRsImm.cs10
-rw-r--r--src/ARMeilleure/Decoders/IOpCode32AluRsReg.cs10
-rw-r--r--src/ARMeilleure/Decoders/IOpCode32AluUmull.cs13
-rw-r--r--src/ARMeilleure/Decoders/IOpCode32AluUx.cs8
-rw-r--r--src/ARMeilleure/Decoders/IOpCode32BImm.cs4
-rw-r--r--src/ARMeilleure/Decoders/IOpCode32BReg.cs7
-rw-r--r--src/ARMeilleure/Decoders/IOpCode32Exception.cs7
-rw-r--r--src/ARMeilleure/Decoders/IOpCode32HasSetFlags.cs7
-rw-r--r--src/ARMeilleure/Decoders/IOpCode32Mem.cs16
-rw-r--r--src/ARMeilleure/Decoders/IOpCode32MemEx.cs7
-rw-r--r--src/ARMeilleure/Decoders/IOpCode32MemMult.cs15
-rw-r--r--src/ARMeilleure/Decoders/IOpCode32MemReg.cs7
-rw-r--r--src/ARMeilleure/Decoders/IOpCode32MemRsImm.cs8
-rw-r--r--src/ARMeilleure/Decoders/IOpCode32Simd.cs4
-rw-r--r--src/ARMeilleure/Decoders/IOpCode32SimdImm.cs9
-rw-r--r--src/ARMeilleure/Decoders/IOpCodeAlu.cs10
-rw-r--r--src/ARMeilleure/Decoders/IOpCodeAluImm.cs7
-rw-r--r--src/ARMeilleure/Decoders/IOpCodeAluRs.cs10
-rw-r--r--src/ARMeilleure/Decoders/IOpCodeAluRx.cs10
-rw-r--r--src/ARMeilleure/Decoders/IOpCodeBImm.cs7
-rw-r--r--src/ARMeilleure/Decoders/IOpCodeCond.cs7
-rw-r--r--src/ARMeilleure/Decoders/IOpCodeLit.cs11
-rw-r--r--src/ARMeilleure/Decoders/IOpCodeSimd.cs7
-rw-r--r--src/ARMeilleure/Decoders/InstDescriptor.cs18
-rw-r--r--src/ARMeilleure/Decoders/InstEmitter.cs6
-rw-r--r--src/ARMeilleure/Decoders/IntType.cs14
-rw-r--r--src/ARMeilleure/Decoders/OpCode.cs49
-rw-r--r--src/ARMeilleure/Decoders/OpCode32.cs34
-rw-r--r--src/ARMeilleure/Decoders/OpCode32Alu.cs20
-rw-r--r--src/ARMeilleure/Decoders/OpCode32AluBf.cs22
-rw-r--r--src/ARMeilleure/Decoders/OpCode32AluImm.cs23
-rw-r--r--src/ARMeilleure/Decoders/OpCode32AluImm16.cs17
-rw-r--r--src/ARMeilleure/Decoders/OpCode32AluMla.cs30
-rw-r--r--src/ARMeilleure/Decoders/OpCode32AluReg.cs14
-rw-r--r--src/ARMeilleure/Decoders/OpCode32AluRsImm.cs20
-rw-r--r--src/ARMeilleure/Decoders/OpCode32AluRsReg.cs20
-rw-r--r--src/ARMeilleure/Decoders/OpCode32AluUmull.cs30
-rw-r--r--src/ARMeilleure/Decoders/OpCode32AluUx.cs18
-rw-r--r--src/ARMeilleure/Decoders/OpCode32BImm.cs29
-rw-r--r--src/ARMeilleure/Decoders/OpCode32BReg.cs14
-rw-r--r--src/ARMeilleure/Decoders/OpCode32Exception.cs14
-rw-r--r--src/ARMeilleure/Decoders/OpCode32Mem.cs39
-rw-r--r--src/ARMeilleure/Decoders/OpCode32MemImm.cs12
-rw-r--r--src/ARMeilleure/Decoders/OpCode32MemImm8.cs15
-rw-r--r--src/ARMeilleure/Decoders/OpCode32MemLdEx.cs14
-rw-r--r--src/ARMeilleure/Decoders/OpCode32MemMult.cs52
-rw-r--r--src/ARMeilleure/Decoders/OpCode32MemReg.cs14
-rw-r--r--src/ARMeilleure/Decoders/OpCode32MemRsImm.cs18
-rw-r--r--src/ARMeilleure/Decoders/OpCode32MemStEx.cs15
-rw-r--r--src/ARMeilleure/Decoders/OpCode32Mrs.cs16
-rw-r--r--src/ARMeilleure/Decoders/OpCode32MsrReg.cs29
-rw-r--r--src/ARMeilleure/Decoders/OpCode32Sat.cs24
-rw-r--r--src/ARMeilleure/Decoders/OpCode32Sat16.cs18
-rw-r--r--src/ARMeilleure/Decoders/OpCode32Simd.cs33
-rw-r--r--src/ARMeilleure/Decoders/OpCode32SimdBase.cs55
-rw-r--r--src/ARMeilleure/Decoders/OpCode32SimdBinary.cs21
-rw-r--r--src/ARMeilleure/Decoders/OpCode32SimdCmpZ.cs18
-rw-r--r--src/ARMeilleure/Decoders/OpCode32SimdCvtFI.cs24
-rw-r--r--src/ARMeilleure/Decoders/OpCode32SimdCvtTB.cs44
-rw-r--r--src/ARMeilleure/Decoders/OpCode32SimdDupElem.cs43
-rw-r--r--src/ARMeilleure/Decoders/OpCode32SimdDupGP.cs36
-rw-r--r--src/ARMeilleure/Decoders/OpCode32SimdExt.cs20
-rw-r--r--src/ARMeilleure/Decoders/OpCode32SimdImm.cs38
-rw-r--r--src/ARMeilleure/Decoders/OpCode32SimdImm44.cs41
-rw-r--r--src/ARMeilleure/Decoders/OpCode32SimdLong.cs30
-rw-r--r--src/ARMeilleure/Decoders/OpCode32SimdMemImm.cs40
-rw-r--r--src/ARMeilleure/Decoders/OpCode32SimdMemMult.cs76
-rw-r--r--src/ARMeilleure/Decoders/OpCode32SimdMemPair.cs50
-rw-r--r--src/ARMeilleure/Decoders/OpCode32SimdMemSingle.cs51
-rw-r--r--src/ARMeilleure/Decoders/OpCode32SimdMovGp.cs31
-rw-r--r--src/ARMeilleure/Decoders/OpCode32SimdMovGpDouble.cs36
-rw-r--r--src/ARMeilleure/Decoders/OpCode32SimdMovGpElem.cs51
-rw-r--r--src/ARMeilleure/Decoders/OpCode32SimdMovn.cs13
-rw-r--r--src/ARMeilleure/Decoders/OpCode32SimdReg.cs25
-rw-r--r--src/ARMeilleure/Decoders/OpCode32SimdRegElem.cs31
-rw-r--r--src/ARMeilleure/Decoders/OpCode32SimdRegElemLong.cs22
-rw-r--r--src/ARMeilleure/Decoders/OpCode32SimdRegLong.cs24
-rw-r--r--src/ARMeilleure/Decoders/OpCode32SimdRegS.cs23
-rw-r--r--src/ARMeilleure/Decoders/OpCode32SimdRegWide.cs20
-rw-r--r--src/ARMeilleure/Decoders/OpCode32SimdRev.cs23
-rw-r--r--src/ARMeilleure/Decoders/OpCode32SimdS.cs39
-rw-r--r--src/ARMeilleure/Decoders/OpCode32SimdSel.cs23
-rw-r--r--src/ARMeilleure/Decoders/OpCode32SimdShImm.cs46
-rw-r--r--src/ARMeilleure/Decoders/OpCode32SimdShImmLong.cs43
-rw-r--r--src/ARMeilleure/Decoders/OpCode32SimdShImmNarrow.cs10
-rw-r--r--src/ARMeilleure/Decoders/OpCode32SimdSpecial.cs19
-rw-r--r--src/ARMeilleure/Decoders/OpCode32SimdSqrte.cs19
-rw-r--r--src/ARMeilleure/Decoders/OpCode32SimdTbl.cs24
-rw-r--r--src/ARMeilleure/Decoders/OpCode32System.cs28
-rw-r--r--src/ARMeilleure/Decoders/OpCodeAdr.cs19
-rw-r--r--src/ARMeilleure/Decoders/OpCodeAlu.cs23
-rw-r--r--src/ARMeilleure/Decoders/OpCodeAluBinary.cs14
-rw-r--r--src/ARMeilleure/Decoders/OpCodeAluImm.cs40
-rw-r--r--src/ARMeilleure/Decoders/OpCodeAluRs.cs29
-rw-r--r--src/ARMeilleure/Decoders/OpCodeAluRx.cs19
-rw-r--r--src/ARMeilleure/Decoders/OpCodeBImm.cs11
-rw-r--r--src/ARMeilleure/Decoders/OpCodeBImmAl.cs12
-rw-r--r--src/ARMeilleure/Decoders/OpCodeBImmCmp.cs20
-rw-r--r--src/ARMeilleure/Decoders/OpCodeBImmCond.cs25
-rw-r--r--src/ARMeilleure/Decoders/OpCodeBImmTest.cs20
-rw-r--r--src/ARMeilleure/Decoders/OpCodeBReg.cs24
-rw-r--r--src/ARMeilleure/Decoders/OpCodeBfm.cs29
-rw-r--r--src/ARMeilleure/Decoders/OpCodeCcmp.cs32
-rw-r--r--src/ARMeilleure/Decoders/OpCodeCcmpImm.cs11
-rw-r--r--src/ARMeilleure/Decoders/OpCodeCcmpReg.cs15
-rw-r--r--src/ARMeilleure/Decoders/OpCodeCsel.cs17
-rw-r--r--src/ARMeilleure/Decoders/OpCodeException.cs14
-rw-r--r--src/ARMeilleure/Decoders/OpCodeMem.cs19
-rw-r--r--src/ARMeilleure/Decoders/OpCodeMemEx.cs16
-rw-r--r--src/ARMeilleure/Decoders/OpCodeMemImm.cs53
-rw-r--r--src/ARMeilleure/Decoders/OpCodeMemLit.cs28
-rw-r--r--src/ARMeilleure/Decoders/OpCodeMemPair.cs25
-rw-r--r--src/ARMeilleure/Decoders/OpCodeMemReg.cs20
-rw-r--r--src/ARMeilleure/Decoders/OpCodeMov.cs38
-rw-r--r--src/ARMeilleure/Decoders/OpCodeMul.cs16
-rw-r--r--src/ARMeilleure/Decoders/OpCodeSimd.cs24
-rw-r--r--src/ARMeilleure/Decoders/OpCodeSimdCvt.cs21
-rw-r--r--src/ARMeilleure/Decoders/OpCodeSimdExt.cs14
-rw-r--r--src/ARMeilleure/Decoders/OpCodeSimdFcond.cs17
-rw-r--r--src/ARMeilleure/Decoders/OpCodeSimdFmov.cs32
-rw-r--r--src/ARMeilleure/Decoders/OpCodeSimdHelper.cs88
-rw-r--r--src/ARMeilleure/Decoders/OpCodeSimdImm.cs107
-rw-r--r--src/ARMeilleure/Decoders/OpCodeSimdIns.cs36
-rw-r--r--src/ARMeilleure/Decoders/OpCodeSimdMemImm.cs28
-rw-r--r--src/ARMeilleure/Decoders/OpCodeSimdMemLit.cs31
-rw-r--r--src/ARMeilleure/Decoders/OpCodeSimdMemMs.cs48
-rw-r--r--src/ARMeilleure/Decoders/OpCodeSimdMemPair.cs16
-rw-r--r--src/ARMeilleure/Decoders/OpCodeSimdMemReg.cs21
-rw-r--r--src/ARMeilleure/Decoders/OpCodeSimdMemSs.cs97
-rw-r--r--src/ARMeilleure/Decoders/OpCodeSimdReg.cs18
-rw-r--r--src/ARMeilleure/Decoders/OpCodeSimdRegElem.cs31
-rw-r--r--src/ARMeilleure/Decoders/OpCodeSimdRegElemF.cs33
-rw-r--r--src/ARMeilleure/Decoders/OpCodeSimdShImm.cs18
-rw-r--r--src/ARMeilleure/Decoders/OpCodeSimdTbl.cs12
-rw-r--r--src/ARMeilleure/Decoders/OpCodeSystem.cs24
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT16.cs15
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT16AddSubImm3.cs24
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT16AddSubReg.cs20
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT16AddSubSp.cs23
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT16Adr.cs19
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT16AluImm8.cs24
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT16AluImmZero.cs24
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT16AluRegHigh.cs20
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT16AluRegLow.cs20
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT16AluUx.cs22
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT16BImm11.cs15
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT16BImm8.cs17
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT16BImmCmp.cs19
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT16BReg.cs14
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT16Exception.cs14
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT16IfThen.cs33
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT16MemImm5.cs58
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT16MemLit.cs26
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT16MemMult.cs34
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT16MemReg.cs27
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT16MemSp.cs28
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT16MemStack.cs42
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT16ShiftImm.cs24
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT16ShiftReg.cs27
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT16SpRel.cs24
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT32.cs15
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT32Alu.cs20
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT32AluBf.cs22
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT32AluImm.cs38
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT32AluImm12.cs16
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT32AluMla.cs29
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT32AluReg.cs14
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT32AluRsImm.cs20
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT32AluUmull.cs28
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT32AluUx.cs18
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT32BImm20.cs27
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT32BImm24.cs35
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT32MemImm12.cs25
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT32MemImm8.cs29
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT32MemImm8D.cs31
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT32MemLdEx.cs26
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT32MemMult.cs52
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT32MemRsImm.cs30
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT32MemStEx.cs27
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT32MovImm16.cs16
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT32ShiftReg.cs19
-rw-r--r--src/ARMeilleure/Decoders/OpCodeT32Tb.cs16
-rw-r--r--src/ARMeilleure/Decoders/OpCodeTable.cs1509
-rw-r--r--src/ARMeilleure/Decoders/Optimizations/TailCallRemover.cs88
-rw-r--r--src/ARMeilleure/Decoders/RegisterSize.cs10
-rw-r--r--src/ARMeilleure/Decoders/ShiftType.cs10
-rw-r--r--src/ARMeilleure/Diagnostics/IRDumper.cs311
-rw-r--r--src/ARMeilleure/Diagnostics/Logger.cs56
-rw-r--r--src/ARMeilleure/Diagnostics/PassName.cs19
-rw-r--r--src/ARMeilleure/Diagnostics/Symbols.cs84
-rw-r--r--src/ARMeilleure/Diagnostics/TranslatorEventSource.cs67
-rw-r--r--src/ARMeilleure/Instructions/CryptoHelper.cs280
-rw-r--r--src/ARMeilleure/Instructions/InstEmitAlu.cs400
-rw-r--r--src/ARMeilleure/Instructions/InstEmitAlu32.cs931
-rw-r--r--src/ARMeilleure/Instructions/InstEmitAluHelper.cs613
-rw-r--r--src/ARMeilleure/Instructions/InstEmitBfm.cs196
-rw-r--r--src/ARMeilleure/Instructions/InstEmitCcmp.cs61
-rw-r--r--src/ARMeilleure/Instructions/InstEmitCsel.cs53
-rw-r--r--src/ARMeilleure/Instructions/InstEmitDiv.cs67
-rw-r--r--src/ARMeilleure/Instructions/InstEmitException.cs55
-rw-r--r--src/ARMeilleure/Instructions/InstEmitException32.cs39
-rw-r--r--src/ARMeilleure/Instructions/InstEmitFlow.cs107
-rw-r--r--src/ARMeilleure/Instructions/InstEmitFlow32.cs136
-rw-r--r--src/ARMeilleure/Instructions/InstEmitFlowHelper.cs240
-rw-r--r--src/ARMeilleure/Instructions/InstEmitHash.cs69
-rw-r--r--src/ARMeilleure/Instructions/InstEmitHash32.cs53
-rw-r--r--src/ARMeilleure/Instructions/InstEmitHashHelper.cs118
-rw-r--r--src/ARMeilleure/Instructions/InstEmitHelper.cs264
-rw-r--r--src/ARMeilleure/Instructions/InstEmitMemory.cs184
-rw-r--r--src/ARMeilleure/Instructions/InstEmitMemory32.cs265
-rw-r--r--src/ARMeilleure/Instructions/InstEmitMemoryEx.cs178
-rw-r--r--src/ARMeilleure/Instructions/InstEmitMemoryEx32.cs237
-rw-r--r--src/ARMeilleure/Instructions/InstEmitMemoryExHelper.cs174
-rw-r--r--src/ARMeilleure/Instructions/InstEmitMemoryHelper.cs648
-rw-r--r--src/ARMeilleure/Instructions/InstEmitMove.cs41
-rw-r--r--src/ARMeilleure/Instructions/InstEmitMul.cs100
-rw-r--r--src/ARMeilleure/Instructions/InstEmitMul32.cs379
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs5224
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs1703
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdCmp.cs799
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdCmp32.cs437
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdCrypto.cs99
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdCrypto32.cs99
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdCvt.cs1891
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdCvt32.cs800
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdHash.cs147
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdHash32.cs64
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdHashHelper.cs56
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdHelper.cs2088
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdHelper32.cs1286
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdHelper32Arm64.cs366
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdHelperArm64.cs720
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdLogical.cs612
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdLogical32.cs266
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdMemory.cs160
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdMemory32.cs352
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdMove.cs850
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdMove32.cs656
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdShift.cs1827
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdShift32.cs389
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSystem.cs248
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSystem32.cs351
-rw-r--r--src/ARMeilleure/Instructions/InstName.cs685
-rw-r--r--src/ARMeilleure/Instructions/NativeInterface.cs195
-rw-r--r--src/ARMeilleure/Instructions/SoftFallback.cs624
-rw-r--r--src/ARMeilleure/Instructions/SoftFloat.cs3480
-rw-r--r--src/ARMeilleure/IntermediateRepresentation/BasicBlock.cs159
-rw-r--r--src/ARMeilleure/IntermediateRepresentation/BasicBlockFrequency.cs8
-rw-r--r--src/ARMeilleure/IntermediateRepresentation/Comparison.cs24
-rw-r--r--src/ARMeilleure/IntermediateRepresentation/IIntrusiveListNode.cs8
-rw-r--r--src/ARMeilleure/IntermediateRepresentation/Instruction.cs72
-rw-r--r--src/ARMeilleure/IntermediateRepresentation/Intrinsic.cs636
-rw-r--r--src/ARMeilleure/IntermediateRepresentation/IntrusiveList.cs208
-rw-r--r--src/ARMeilleure/IntermediateRepresentation/MemoryOperand.cs54
-rw-r--r--src/ARMeilleure/IntermediateRepresentation/Multiplier.cs11
-rw-r--r--src/ARMeilleure/IntermediateRepresentation/Operand.cs594
-rw-r--r--src/ARMeilleure/IntermediateRepresentation/OperandKind.cs13
-rw-r--r--src/ARMeilleure/IntermediateRepresentation/OperandType.cs65
-rw-r--r--src/ARMeilleure/IntermediateRepresentation/Operation.cs376
-rw-r--r--src/ARMeilleure/IntermediateRepresentation/PhiOperation.cs37
-rw-r--r--src/ARMeilleure/IntermediateRepresentation/Register.cs43
-rw-r--r--src/ARMeilleure/IntermediateRepresentation/RegisterType.cs10
-rw-r--r--src/ARMeilleure/Memory/IJitMemoryAllocator.cs10
-rw-r--r--src/ARMeilleure/Memory/IJitMemoryBlock.cs14
-rw-r--r--src/ARMeilleure/Memory/IMemoryManager.cs77
-rw-r--r--src/ARMeilleure/Memory/InvalidAccessException.cs23
-rw-r--r--src/ARMeilleure/Memory/MemoryManagerType.cs41
-rw-r--r--src/ARMeilleure/Memory/ReservedRegion.cs58
-rw-r--r--src/ARMeilleure/Native/JitSupportDarwin.cs13
-rw-r--r--src/ARMeilleure/Native/libs/libarmeilleure-jitsupport.dylibbin0 -> 33564 bytes
-rw-r--r--src/ARMeilleure/Native/macos_jit_support/Makefile8
-rw-r--r--src/ARMeilleure/Native/macos_jit_support/support.c14
-rw-r--r--src/ARMeilleure/Optimizations.cs68
-rw-r--r--src/ARMeilleure/Signal/NativeSignalHandler.cs422
-rw-r--r--src/ARMeilleure/Signal/TestMethods.cs84
-rw-r--r--src/ARMeilleure/Signal/UnixSignalHandlerRegistration.cs83
-rw-r--r--src/ARMeilleure/Signal/WindowsPartialUnmapHandler.cs186
-rw-r--r--src/ARMeilleure/Signal/WindowsSignalHandlerRegistration.cs44
-rw-r--r--src/ARMeilleure/State/Aarch32Mode.cs15
-rw-r--r--src/ARMeilleure/State/ExceptionCallback.cs5
-rw-r--r--src/ARMeilleure/State/ExecutionContext.cs173
-rw-r--r--src/ARMeilleure/State/ExecutionMode.cs9
-rw-r--r--src/ARMeilleure/State/FPCR.cs22
-rw-r--r--src/ARMeilleure/State/FPException.cs12
-rw-r--r--src/ARMeilleure/State/FPRoundingMode.cs11
-rw-r--r--src/ARMeilleure/State/FPSCR.cs15
-rw-r--r--src/ARMeilleure/State/FPSR.cs18
-rw-r--r--src/ARMeilleure/State/FPState.cs31
-rw-r--r--src/ARMeilleure/State/FPType.cs11
-rw-r--r--src/ARMeilleure/State/ICounter.cs18
-rw-r--r--src/ARMeilleure/State/NativeContext.cs269
-rw-r--r--src/ARMeilleure/State/PState.cs17
-rw-r--r--src/ARMeilleure/State/RegisterAlias.cs42
-rw-r--r--src/ARMeilleure/State/RegisterConsts.cs15
-rw-r--r--src/ARMeilleure/State/V128.cs312
-rw-r--r--src/ARMeilleure/Statistics.cs94
-rw-r--r--src/ARMeilleure/Translation/ArmEmitterContext.cs282
-rw-r--r--src/ARMeilleure/Translation/Cache/CacheEntry.cs26
-rw-r--r--src/ARMeilleure/Translation/Cache/CacheMemoryAllocator.cs96
-rw-r--r--src/ARMeilleure/Translation/Cache/JitCache.cs198
-rw-r--r--src/ARMeilleure/Translation/Cache/JitCacheInvalidation.cs79
-rw-r--r--src/ARMeilleure/Translation/Cache/JitUnwindWindows.cs189
-rw-r--r--src/ARMeilleure/Translation/Compiler.cs68
-rw-r--r--src/ARMeilleure/Translation/CompilerContext.cs26
-rw-r--r--src/ARMeilleure/Translation/CompilerOptions.cs17
-rw-r--r--src/ARMeilleure/Translation/ControlFlowGraph.cs155
-rw-r--r--src/ARMeilleure/Translation/DelegateHelper.cs104
-rw-r--r--src/ARMeilleure/Translation/DelegateInfo.cs19
-rw-r--r--src/ARMeilleure/Translation/Delegates.cs261
-rw-r--r--src/ARMeilleure/Translation/DispatcherFunction.cs7
-rw-r--r--src/ARMeilleure/Translation/Dominance.cs95
-rw-r--r--src/ARMeilleure/Translation/EmitterContext.cs680
-rw-r--r--src/ARMeilleure/Translation/GuestFunction.cs6
-rw-r--r--src/ARMeilleure/Translation/IntervalTree.cs745
-rw-r--r--src/ARMeilleure/Translation/PTC/EncodingCache.cs9
-rw-r--r--src/ARMeilleure/Translation/PTC/IPtcLoadState.cs10
-rw-r--r--src/ARMeilleure/Translation/PTC/Ptc.cs1131
-rw-r--r--src/ARMeilleure/Translation/PTC/PtcFormatter.cs179
-rw-r--r--src/ARMeilleure/Translation/PTC/PtcLoadingState.cs9
-rw-r--r--src/ARMeilleure/Translation/PTC/PtcProfiler.cs421
-rw-r--r--src/ARMeilleure/Translation/PTC/PtcState.cs10
-rw-r--r--src/ARMeilleure/Translation/RegisterToLocal.cs52
-rw-r--r--src/ARMeilleure/Translation/RegisterUsage.cs394
-rw-r--r--src/ARMeilleure/Translation/RejitRequest.cs16
-rw-r--r--src/ARMeilleure/Translation/SsaConstruction.cs289
-rw-r--r--src/ARMeilleure/Translation/SsaDeconstruction.cs48
-rw-r--r--src/ARMeilleure/Translation/TranslatedFunction.cs34
-rw-r--r--src/ARMeilleure/Translation/Translator.cs576
-rw-r--r--src/ARMeilleure/Translation/TranslatorCache.cs95
-rw-r--r--src/ARMeilleure/Translation/TranslatorQueue.cs121
-rw-r--r--src/ARMeilleure/Translation/TranslatorStubs.cs312
-rw-r--r--src/ARMeilleure/Translation/TranslatorTestMethods.cs148
417 files changed, 68496 insertions, 0 deletions
diff --git a/src/ARMeilleure/ARMeilleure.csproj b/src/ARMeilleure/ARMeilleure.csproj
new file mode 100644
index 00000000..fa555115
--- /dev/null
+++ b/src/ARMeilleure/ARMeilleure.csproj
@@ -0,0 +1,26 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+ <PropertyGroup>
+ <TargetFramework>net7.0</TargetFramework>
+ <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+ </PropertyGroup>
+
+ <ItemGroup>
+ <ProjectReference Include="..\Ryujinx.Common\Ryujinx.Common.csproj" />
+ <ProjectReference Include="..\Ryujinx.Memory\Ryujinx.Memory.csproj" />
+ </ItemGroup>
+
+ <ItemGroup>
+ <ContentWithTargetPath Include="Native\libs\libarmeilleure-jitsupport.dylib" Condition="'$(RuntimeIdentifier)' == '' OR '$(RuntimeIdentifier)' == 'osx-arm64'">
+ <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+ <TargetPath>libarmeilleure-jitsupport.dylib</TargetPath>
+ </ContentWithTargetPath>
+ </ItemGroup>
+
+ <ItemGroup>
+ <AssemblyAttribute Include="System.Runtime.CompilerServices.InternalsVisibleTo">
+ <_Parameter1>Ryujinx.Tests</_Parameter1>
+ </AssemblyAttribute>
+ </ItemGroup>
+
+</Project>
diff --git a/src/ARMeilleure/Allocators.cs b/src/ARMeilleure/Allocators.cs
new file mode 100644
index 00000000..deabf9a2
--- /dev/null
+++ b/src/ARMeilleure/Allocators.cs
@@ -0,0 +1,42 @@
+using ARMeilleure.Common;
+using System;
+using System.Runtime.CompilerServices;
+
+namespace ARMeilleure
+{
+ static class Allocators
+ {
+ [ThreadStatic] private static ArenaAllocator _default;
+ [ThreadStatic] private static ArenaAllocator _operands;
+ [ThreadStatic] private static ArenaAllocator _operations;
+ [ThreadStatic] private static ArenaAllocator _references;
+ [ThreadStatic] private static ArenaAllocator _liveRanges;
+ [ThreadStatic] private static ArenaAllocator _liveIntervals;
+
+ public static ArenaAllocator Default => GetAllocator(ref _default, 256 * 1024, 4);
+ public static ArenaAllocator Operands => GetAllocator(ref _operands, 64 * 1024, 8);
+ public static ArenaAllocator Operations => GetAllocator(ref _operations, 64 * 1024, 8);
+ public static ArenaAllocator References => GetAllocator(ref _references, 64 * 1024, 8);
+ public static ArenaAllocator LiveRanges => GetAllocator(ref _liveRanges, 64 * 1024, 8);
+ public static ArenaAllocator LiveIntervals => GetAllocator(ref _liveIntervals, 64 * 1024, 8);
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static ArenaAllocator GetAllocator(ref ArenaAllocator alloc, uint pageSize, uint pageCount)
+ {
+ if (alloc == null)
+ {
+ alloc = new ArenaAllocator(pageSize, pageCount);
+ }
+
+ return alloc;
+ }
+
+ public static void ResetAll()
+ {
+ Default.Reset();
+ Operands.Reset();
+ Operations.Reset();
+ References.Reset();
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/Arm64/Arm64Optimizer.cs b/src/ARMeilleure/CodeGen/Arm64/Arm64Optimizer.cs
new file mode 100644
index 00000000..fdd4d024
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Arm64/Arm64Optimizer.cs
@@ -0,0 +1,270 @@
+using ARMeilleure.CodeGen.Optimizations;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System.Collections.Generic;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+using static ARMeilleure.IntermediateRepresentation.Operation.Factory;
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+ static class Arm64Optimizer
+ {
+ private const int MaxConstantUses = 10000;
+
+ public static void RunPass(ControlFlowGraph cfg)
+ {
+ var constants = new Dictionary<ulong, Operand>();
+
+ Operand GetConstantCopy(BasicBlock block, Operation operation, Operand source)
+ {
+ // If the constant has many uses, we also force a new constant mov to be added, in order
+ // to avoid overflow of the counts field (that is limited to 16 bits).
+ if (!constants.TryGetValue(source.Value, out var constant) || constant.UsesCount > MaxConstantUses)
+ {
+ constant = Local(source.Type);
+
+ Operation copyOp = Operation(Instruction.Copy, constant, source);
+
+ block.Operations.AddBefore(operation, copyOp);
+
+ constants[source.Value] = constant;
+ }
+
+ return constant;
+ }
+
+ for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
+ {
+ constants.Clear();
+
+ Operation nextNode;
+
+ for (Operation node = block.Operations.First; node != default; node = nextNode)
+ {
+ nextNode = node.ListNext;
+
+ // Insert copies for constants that can't fit on a 32-bit immediate.
+ // Doing this early unblocks a few optimizations.
+ if (node.Instruction == Instruction.Add)
+ {
+ Operand src1 = node.GetSource(0);
+ Operand src2 = node.GetSource(1);
+
+ if (src1.Kind == OperandKind.Constant && (src1.Relocatable || ConstTooLong(src1, OperandType.I32)))
+ {
+ node.SetSource(0, GetConstantCopy(block, node, src1));
+ }
+
+ if (src2.Kind == OperandKind.Constant && (src2.Relocatable || ConstTooLong(src2, OperandType.I32)))
+ {
+ node.SetSource(1, GetConstantCopy(block, node, src2));
+ }
+ }
+
+ // Try to fold something like:
+ // lsl x1, x1, #2
+ // add x0, x0, x1
+ // ldr x0, [x0]
+ // add x2, x2, #16
+ // ldr x2, [x2]
+ // Into:
+ // ldr x0, [x0, x1, lsl #2]
+ // ldr x2, [x2, #16]
+ if (IsMemoryLoadOrStore(node.Instruction))
+ {
+ OperandType type;
+
+ if (node.Destination != default)
+ {
+ type = node.Destination.Type;
+ }
+ else
+ {
+ type = node.GetSource(1).Type;
+ }
+
+ Operand memOp = GetMemoryOperandOrNull(node.GetSource(0), type);
+
+ if (memOp != default)
+ {
+ node.SetSource(0, memOp);
+ }
+ }
+ }
+ }
+
+ Optimizer.RemoveUnusedNodes(cfg);
+ }
+
+ private static Operand GetMemoryOperandOrNull(Operand addr, OperandType type)
+ {
+ Operand baseOp = addr;
+
+ // First we check if the address is the result of a local X with immediate
+ // addition. If that is the case, then the baseOp is X, and the memory operand immediate
+ // becomes the addition immediate. Otherwise baseOp keeps being the address.
+ int imm = GetConstOp(ref baseOp, type);
+ if (imm != 0)
+ {
+ return MemoryOp(type, baseOp, default, Multiplier.x1, imm);
+ }
+
+ // Now we check if the baseOp is the result of a local Y with a local Z addition.
+ // If that is the case, we now set baseOp to Y and indexOp to Z. We further check
+ // if Z is the result of a left shift of local W by a value == 0 or == Log2(AccessSize),
+ // if that is the case, we set indexOp to W and adjust the scale value of the memory operand
+ // to match that of the left shift.
+ // There is one missed case, which is the address being a shift result, but this is
+ // probably not worth optimizing as it should never happen.
+ (Operand indexOp, Multiplier scale) = GetIndexOp(ref baseOp, type);
+
+ // If baseOp is still equal to address, then there's nothing that can be optimized.
+ if (baseOp == addr)
+ {
+ return default;
+ }
+
+ return MemoryOp(type, baseOp, indexOp, scale, 0);
+ }
+
+ private static int GetConstOp(ref Operand baseOp, OperandType accessType)
+ {
+ Operation operation = GetAsgOpWithInst(baseOp, Instruction.Add);
+
+ if (operation == default)
+ {
+ return 0;
+ }
+
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ Operand constOp;
+ Operand otherOp;
+
+ if (src1.Kind == OperandKind.Constant && src2.Kind == OperandKind.LocalVariable)
+ {
+ constOp = src1;
+ otherOp = src2;
+ }
+ else if (src1.Kind == OperandKind.LocalVariable && src2.Kind == OperandKind.Constant)
+ {
+ constOp = src2;
+ otherOp = src1;
+ }
+ else
+ {
+ return 0;
+ }
+
+ // If we have addition by a constant that we can't encode on the instruction,
+ // then we can't optimize it further.
+ if (ConstTooLong(constOp, accessType))
+ {
+ return 0;
+ }
+
+ baseOp = otherOp;
+
+ return constOp.AsInt32();
+ }
+
+ private static (Operand, Multiplier) GetIndexOp(ref Operand baseOp, OperandType accessType)
+ {
+ Operand indexOp = default;
+
+ Multiplier scale = Multiplier.x1;
+
+ Operation addOp = GetAsgOpWithInst(baseOp, Instruction.Add);
+
+ if (addOp == default)
+ {
+ return (indexOp, scale);
+ }
+
+ Operand src1 = addOp.GetSource(0);
+ Operand src2 = addOp.GetSource(1);
+
+ if (src1.Kind != OperandKind.LocalVariable || src2.Kind != OperandKind.LocalVariable)
+ {
+ return (indexOp, scale);
+ }
+
+ baseOp = src1;
+ indexOp = src2;
+
+ Operation shlOp = GetAsgOpWithInst(src1, Instruction.ShiftLeft);
+
+ bool indexOnSrc2 = false;
+
+ if (shlOp == default)
+ {
+ shlOp = GetAsgOpWithInst(src2, Instruction.ShiftLeft);
+
+ indexOnSrc2 = true;
+ }
+
+ if (shlOp != default)
+ {
+ Operand shSrc = shlOp.GetSource(0);
+ Operand shift = shlOp.GetSource(1);
+
+ int maxShift = Assembler.GetScaleForType(accessType);
+
+ if (shSrc.Kind == OperandKind.LocalVariable &&
+ shift.Kind == OperandKind.Constant &&
+ (shift.Value == 0 || shift.Value == (ulong)maxShift))
+ {
+ scale = shift.Value switch
+ {
+ 1 => Multiplier.x2,
+ 2 => Multiplier.x4,
+ 3 => Multiplier.x8,
+ 4 => Multiplier.x16,
+ _ => Multiplier.x1
+ };
+
+ baseOp = indexOnSrc2 ? src1 : src2;
+ indexOp = shSrc;
+ }
+ }
+
+ return (indexOp, scale);
+ }
+
+ private static Operation GetAsgOpWithInst(Operand op, Instruction inst)
+ {
+ // If we have multiple assignments, folding is not safe
+ // as the value may be different depending on the
+ // control flow path.
+ if (op.AssignmentsCount != 1)
+ {
+ return default;
+ }
+
+ Operation asgOp = op.Assignments[0];
+
+ if (asgOp.Instruction != inst)
+ {
+ return default;
+ }
+
+ return asgOp;
+ }
+
+ private static bool IsMemoryLoadOrStore(Instruction inst)
+ {
+ return inst == Instruction.Load || inst == Instruction.Store;
+ }
+
+ private static bool ConstTooLong(Operand constOp, OperandType accessType)
+ {
+ if ((uint)constOp.Value != constOp.Value)
+ {
+ return true;
+ }
+
+ return !CodeGenCommon.ConstFitsOnUImm12(constOp.AsInt32(), accessType);
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/Arm64/ArmCondition.cs b/src/ARMeilleure/CodeGen/Arm64/ArmCondition.cs
new file mode 100644
index 00000000..db27a810
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Arm64/ArmCondition.cs
@@ -0,0 +1,47 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+ enum ArmCondition
+ {
+ Eq = 0,
+ Ne = 1,
+ GeUn = 2,
+ LtUn = 3,
+ Mi = 4,
+ Pl = 5,
+ Vs = 6,
+ Vc = 7,
+ GtUn = 8,
+ LeUn = 9,
+ Ge = 10,
+ Lt = 11,
+ Gt = 12,
+ Le = 13,
+ Al = 14,
+ Nv = 15
+ }
+
+ static class ComparisonArm64Extensions
+ {
+ public static ArmCondition ToArmCondition(this Comparison comp)
+ {
+ return comp switch
+ {
+ Comparison.Equal => ArmCondition.Eq,
+ Comparison.NotEqual => ArmCondition.Ne,
+ Comparison.Greater => ArmCondition.Gt,
+ Comparison.LessOrEqual => ArmCondition.Le,
+ Comparison.GreaterUI => ArmCondition.GtUn,
+ Comparison.LessOrEqualUI => ArmCondition.LeUn,
+ Comparison.GreaterOrEqual => ArmCondition.Ge,
+ Comparison.Less => ArmCondition.Lt,
+ Comparison.GreaterOrEqualUI => ArmCondition.GeUn,
+ Comparison.LessUI => ArmCondition.LtUn,
+
+ _ => throw new ArgumentException(null, nameof(comp))
+ };
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/Arm64/ArmExtensionType.cs b/src/ARMeilleure/CodeGen/Arm64/ArmExtensionType.cs
new file mode 100644
index 00000000..062a6d0b
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Arm64/ArmExtensionType.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.CodeGen.Arm64
+{
+ enum ArmExtensionType
+ {
+ Uxtb = 0,
+ Uxth = 1,
+ Uxtw = 2,
+ Uxtx = 3,
+ Sxtb = 4,
+ Sxth = 5,
+ Sxtw = 6,
+ Sxtx = 7
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/Arm64/ArmShiftType.cs b/src/ARMeilleure/CodeGen/Arm64/ArmShiftType.cs
new file mode 100644
index 00000000..d223a146
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Arm64/ArmShiftType.cs
@@ -0,0 +1,11 @@
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+ enum ArmShiftType
+ {
+ Lsl = 0,
+ Lsr = 1,
+ Asr = 2,
+ Ror = 3
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/Arm64/Assembler.cs b/src/ARMeilleure/CodeGen/Arm64/Assembler.cs
new file mode 100644
index 00000000..0ec0be7c
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Arm64/Assembler.cs
@@ -0,0 +1,1160 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+using System.Diagnostics;
+using System.IO;
+using static ARMeilleure.IntermediateRepresentation.Operand;
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+ class Assembler
+ {
+ public const uint SfFlag = 1u << 31;
+
+ private const int SpRegister = 31;
+ private const int ZrRegister = 31;
+
+ private readonly Stream _stream;
+
+ public Assembler(Stream stream)
+ {
+ _stream = stream;
+ }
+
+ public void Add(Operand rd, Operand rn, Operand rm, ArmExtensionType extensionType, int shiftAmount = 0)
+ {
+ WriteInstructionAuto(0x0b200000u, rd, rn, rm, extensionType, shiftAmount);
+ }
+
+ public void Add(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0, bool immForm = false)
+ {
+ WriteInstructionAuto(0x11000000u, 0x0b000000u, rd, rn, rm, shiftType, shiftAmount, immForm);
+ }
+
+ public void And(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+ {
+ WriteInstructionBitwiseAuto(0x12000000u, 0x0a000000u, rd, rn, rm, shiftType, shiftAmount);
+ }
+
+ public void Ands(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+ {
+ WriteInstructionBitwiseAuto(0x72000000u, 0x6a000000u, rd, rn, rm, shiftType, shiftAmount);
+ }
+
+ public void Asr(Operand rd, Operand rn, Operand rm)
+ {
+ if (rm.Kind == OperandKind.Constant)
+ {
+ int shift = rm.AsInt32();
+ int mask = rd.Type == OperandType.I64 ? 63 : 31;
+ shift &= mask;
+ Sbfm(rd, rn, shift, mask);
+ }
+ else
+ {
+ Asrv(rd, rn, rm);
+ }
+ }
+
+ public void Asrv(Operand rd, Operand rn, Operand rm)
+ {
+ WriteInstructionBitwiseAuto(0x1ac02800u, rd, rn, rm);
+ }
+
+ public void B(int imm)
+ {
+ WriteUInt32(0x14000000u | EncodeSImm26_2(imm));
+ }
+
+ public void B(ArmCondition condition, int imm)
+ {
+ WriteUInt32(0x54000000u | (uint)condition | (EncodeSImm19_2(imm) << 5));
+ }
+
+ public void Blr(Operand rn)
+ {
+ WriteUInt32(0xd63f0000u | (EncodeReg(rn) << 5));
+ }
+
+ public void Br(Operand rn)
+ {
+ WriteUInt32(0xd61f0000u | (EncodeReg(rn) << 5));
+ }
+
+ public void Brk()
+ {
+ WriteUInt32(0xd4200000u);
+ }
+
+ public void Cbz(Operand rt, int imm)
+ {
+ WriteInstructionAuto(0x34000000u | (EncodeSImm19_2(imm) << 5), rt);
+ }
+
+ public void Cbnz(Operand rt, int imm)
+ {
+ WriteInstructionAuto(0x35000000u | (EncodeSImm19_2(imm) << 5), rt);
+ }
+
+ public void Clrex(int crm = 15)
+ {
+ WriteUInt32(0xd503305fu | (EncodeUImm4(crm) << 8));
+ }
+
+ public void Clz(Operand rd, Operand rn)
+ {
+ WriteInstructionAuto(0x5ac01000u, rd, rn);
+ }
+
+ public void CmeqVector(Operand rd, Operand rn, Operand rm, int size, bool q = true)
+ {
+ Debug.Assert((uint)size < 4);
+ WriteSimdInstruction(0x2e208c00u | ((uint)size << 22), rd, rn, rm, q);
+ }
+
+ public void Cmp(Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+ {
+ Subs(Factory.Register(ZrRegister, RegisterType.Integer, rn.Type), rn, rm, shiftType, shiftAmount);
+ }
+
+ public void Csel(Operand rd, Operand rn, Operand rm, ArmCondition condition)
+ {
+ WriteInstructionBitwiseAuto(0x1a800000u | ((uint)condition << 12), rd, rn, rm);
+ }
+
+ public void Cset(Operand rd, ArmCondition condition)
+ {
+ var zr = Factory.Register(ZrRegister, RegisterType.Integer, rd.Type);
+ Csinc(rd, zr, zr, (ArmCondition)((int)condition ^ 1));
+ }
+
+ public void Csinc(Operand rd, Operand rn, Operand rm, ArmCondition condition)
+ {
+ WriteInstructionBitwiseAuto(0x1a800400u | ((uint)condition << 12), rd, rn, rm);
+ }
+
+ public void Dmb(uint option)
+ {
+ WriteUInt32(0xd50330bfu | (option << 8));
+ }
+
+ public void DupScalar(Operand rd, Operand rn, int index, int size)
+ {
+ WriteInstruction(0x5e000400u | (EncodeIndexSizeImm5(index, size) << 16), rd, rn);
+ }
+
+ public void Eor(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+ {
+ WriteInstructionBitwiseAuto(0x52000000u, 0x4a000000u, rd, rn, rm, shiftType, shiftAmount);
+ }
+
+ public void EorVector(Operand rd, Operand rn, Operand rm, bool q = true)
+ {
+ WriteSimdInstruction(0x2e201c00u, rd, rn, rm, q);
+ }
+
+ public void Extr(Operand rd, Operand rn, Operand rm, int imms)
+ {
+ uint n = rd.Type == OperandType.I64 ? 1u << 22 : 0u;
+ WriteInstructionBitwiseAuto(0x13800000u | n | (EncodeUImm6(imms) << 10), rd, rn, rm);
+ }
+
+ public void FaddScalar(Operand rd, Operand rn, Operand rm)
+ {
+ WriteFPInstructionAuto(0x1e202800u, rd, rn, rm);
+ }
+
+ public void FcvtScalar(Operand rd, Operand rn)
+ {
+ uint instruction = 0x1e224000u | (rd.Type == OperandType.FP64 ? 1u << 15 : 1u << 22);
+ WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5));
+ }
+
+ public void FdivScalar(Operand rd, Operand rn, Operand rm)
+ {
+ WriteFPInstructionAuto(0x1e201800u, rd, rn, rm);
+ }
+
+ public void Fmov(Operand rd, Operand rn)
+ {
+ WriteFPInstructionAuto(0x1e204000u, rd, rn);
+ }
+
+ public void Fmov(Operand rd, Operand rn, bool topHalf)
+ {
+ Debug.Assert(rd.Type.IsInteger() != rn.Type.IsInteger());
+ Debug.Assert(rd.Type == OperandType.I64 || rn.Type == OperandType.I64 || !topHalf);
+
+ uint opcode = rd.Type.IsInteger() ? 0b110u : 0b111u;
+
+ uint rmode = topHalf ? 1u << 19 : 0u;
+ uint ftype = rd.Type == OperandType.FP64 || rn.Type == OperandType.FP64 ? 1u << 22 : 0u;
+ uint sf = rd.Type == OperandType.I64 || rn.Type == OperandType.I64 ? SfFlag : 0u;
+
+ WriteUInt32(0x1e260000u | (opcode << 16) | rmode | ftype | sf | EncodeReg(rd) | (EncodeReg(rn) << 5));
+ }
+
+ public void FmulScalar(Operand rd, Operand rn, Operand rm)
+ {
+ WriteFPInstructionAuto(0x1e200800u, rd, rn, rm);
+ }
+
+ public void FnegScalar(Operand rd, Operand rn)
+ {
+ WriteFPInstructionAuto(0x1e214000u, rd, rn);
+ }
+
+ public void FsubScalar(Operand rd, Operand rn, Operand rm)
+ {
+ WriteFPInstructionAuto(0x1e203800u, rd, rn, rm);
+ }
+
+ public void Ins(Operand rd, Operand rn, int index, int size)
+ {
+ WriteInstruction(0x4e001c00u | (EncodeIndexSizeImm5(index, size) << 16), rd, rn);
+ }
+
+ public void Ins(Operand rd, Operand rn, int srcIndex, int dstIndex, int size)
+ {
+ uint imm4 = (uint)srcIndex << size;
+ Debug.Assert((uint)srcIndex < (16u >> size));
+ WriteInstruction(0x6e000400u | (imm4 << 11) | (EncodeIndexSizeImm5(dstIndex, size) << 16), rd, rn);
+ }
+
+ public void Ldaxp(Operand rt, Operand rt2, Operand rn)
+ {
+ WriteInstruction(0x887f8000u | ((rt.Type == OperandType.I64 ? 3u : 2u) << 30), rt, rn, rt2);
+ }
+
+ public void Ldaxr(Operand rt, Operand rn)
+ {
+ WriteInstruction(0x085ffc00u | ((rt.Type == OperandType.I64 ? 3u : 2u) << 30), rt, rn);
+ }
+
+ public void Ldaxrb(Operand rt, Operand rn)
+ {
+ WriteInstruction(0x085ffc00u, rt, rn);
+ }
+
+ public void Ldaxrh(Operand rt, Operand rn)
+ {
+ WriteInstruction(0x085ffc00u | (1u << 30), rt, rn);
+ }
+
+ public void LdpRiPost(Operand rt, Operand rt2, Operand rn, int imm)
+ {
+ uint instruction = GetLdpStpInstruction(0x28c00000u, 0x2cc00000u, imm, rt.Type);
+ WriteInstruction(instruction, rt, rn, rt2);
+ }
+
+ public void LdpRiPre(Operand rt, Operand rt2, Operand rn, int imm)
+ {
+ uint instruction = GetLdpStpInstruction(0x29c00000u, 0x2dc00000u, imm, rt.Type);
+ WriteInstruction(instruction, rt, rn, rt2);
+ }
+
+ public void LdpRiUn(Operand rt, Operand rt2, Operand rn, int imm)
+ {
+ uint instruction = GetLdpStpInstruction(0x29400000u, 0x2d400000u, imm, rt.Type);
+ WriteInstruction(instruction, rt, rn, rt2);
+ }
+
+ public void Ldr(Operand rt, Operand rn)
+ {
+ if (rn.Kind == OperandKind.Memory)
+ {
+ MemoryOperand memOp = rn.GetMemory();
+
+ if (memOp.Index != default)
+ {
+ Debug.Assert(memOp.Displacement == 0);
+ Debug.Assert(memOp.Scale == Multiplier.x1 || (int)memOp.Scale == GetScaleForType(rt.Type));
+ LdrRr(rt, memOp.BaseAddress, memOp.Index, ArmExtensionType.Uxtx, memOp.Scale != Multiplier.x1);
+ }
+ else
+ {
+ LdrRiUn(rt, memOp.BaseAddress, memOp.Displacement);
+ }
+ }
+ else
+ {
+ LdrRiUn(rt, rn, 0);
+ }
+ }
+
+ public void LdrLit(Operand rt, int offset)
+ {
+ uint instruction = 0x18000000u | (EncodeSImm19_2(offset) << 5);
+
+ if (rt.Type == OperandType.I64)
+ {
+ instruction |= 1u << 30;
+ }
+
+ WriteInstruction(instruction, rt);
+ }
+
+ public void LdrRiPost(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = GetLdrStrInstruction(0xb8400400u, 0x3c400400u, rt.Type) | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void LdrRiPre(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = GetLdrStrInstruction(0xb8400c00u, 0x3c400c00u, rt.Type) | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void LdrRiUn(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = GetLdrStrInstruction(0xb9400000u, 0x3d400000u, rt.Type) | (EncodeUImm12(imm, rt.Type) << 10);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void LdrRr(Operand rt, Operand rn, Operand rm, ArmExtensionType extensionType, bool shift)
+ {
+ uint instruction = GetLdrStrInstruction(0xb8600800u, 0x3ce00800u, rt.Type);
+ WriteInstructionLdrStrAuto(instruction, rt, rn, rm, extensionType, shift);
+ }
+
+ public void LdrbRiPost(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x38400400u | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void LdrbRiPre(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x38400c00u | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void LdrbRiUn(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x39400000u | (EncodeUImm12(imm, 0) << 10);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void LdrhRiPost(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x78400400u | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void LdrhRiPre(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x78400c00u | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void LdrhRiUn(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x79400000u | (EncodeUImm12(imm, 1) << 10);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void Ldur(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = GetLdrStrInstruction(0xb8400000u, 0x3c400000u, rt.Type) | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void Lsl(Operand rd, Operand rn, Operand rm)
+ {
+ if (rm.Kind == OperandKind.Constant)
+ {
+ int shift = rm.AsInt32();
+ int mask = rd.Type == OperandType.I64 ? 63 : 31;
+ shift &= mask;
+ Ubfm(rd, rn, -shift & mask, mask - shift);
+ }
+ else
+ {
+ Lslv(rd, rn, rm);
+ }
+ }
+
+ public void Lslv(Operand rd, Operand rn, Operand rm)
+ {
+ WriteInstructionBitwiseAuto(0x1ac02000u, rd, rn, rm);
+ }
+
+ public void Lsr(Operand rd, Operand rn, Operand rm)
+ {
+ if (rm.Kind == OperandKind.Constant)
+ {
+ int shift = rm.AsInt32();
+ int mask = rd.Type == OperandType.I64 ? 63 : 31;
+ shift &= mask;
+ Ubfm(rd, rn, shift, mask);
+ }
+ else
+ {
+ Lsrv(rd, rn, rm);
+ }
+ }
+
+ public void Lsrv(Operand rd, Operand rn, Operand rm)
+ {
+ WriteInstructionBitwiseAuto(0x1ac02400u, rd, rn, rm);
+ }
+
+ public void Madd(Operand rd, Operand rn, Operand rm, Operand ra)
+ {
+ WriteInstructionAuto(0x1b000000u, rd, rn, rm, ra);
+ }
+
+ public void Mul(Operand rd, Operand rn, Operand rm)
+ {
+ Madd(rd, rn, rm, Factory.Register(ZrRegister, RegisterType.Integer, rd.Type));
+ }
+
+ public void Mov(Operand rd, Operand rn)
+ {
+ if (rd.Type.IsInteger())
+ {
+ Orr(rd, Factory.Register(ZrRegister, RegisterType.Integer, rd.Type), rn);
+ }
+ else
+ {
+ OrrVector(rd, rn, rn);
+ }
+ }
+
+ public void MovSp(Operand rd, Operand rn)
+ {
+ if (rd.GetRegister().Index == SpRegister ||
+ rn.GetRegister().Index == SpRegister)
+ {
+ Add(rd, rn, Factory.Const(rd.Type, 0), immForm: true);
+ }
+ else
+ {
+ Mov(rd, rn);
+ }
+ }
+
+ public void Mov(Operand rd, int imm)
+ {
+ Movz(rd, imm, 0);
+ }
+
+ public void Movz(Operand rd, int imm, int hw)
+ {
+ Debug.Assert((hw & (rd.Type == OperandType.I64 ? 3 : 1)) == hw);
+ WriteInstructionAuto(0x52800000u | (EncodeUImm16(imm) << 5) | ((uint)hw << 21), rd);
+ }
+
+ public void Movk(Operand rd, int imm, int hw)
+ {
+ Debug.Assert((hw & (rd.Type == OperandType.I64 ? 3 : 1)) == hw);
+ WriteInstructionAuto(0x72800000u | (EncodeUImm16(imm) << 5) | ((uint)hw << 21), rd);
+ }
+
+ public void Mrs(Operand rt, uint o0, uint op1, uint crn, uint crm, uint op2)
+ {
+ uint instruction = 0xd5300000u;
+
+ instruction |= (op2 & 7) << 5;
+ instruction |= (crm & 15) << 8;
+ instruction |= (crn & 15) << 12;
+ instruction |= (op1 & 7) << 16;
+ instruction |= (o0 & 1) << 19;
+
+ WriteInstruction(instruction, rt);
+ }
+
+ public void Mvn(Operand rd, Operand rn, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+ {
+ Orn(rd, Factory.Register(ZrRegister, RegisterType.Integer, rd.Type), rn, shiftType, shiftAmount);
+ }
+
+ public void Neg(Operand rd, Operand rn, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+ {
+ Sub(rd, Factory.Register(ZrRegister, RegisterType.Integer, rd.Type), rn, shiftType, shiftAmount);
+ }
+
+ public void Orn(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+ {
+ WriteInstructionBitwiseAuto(0x2a200000u, rd, rn, rm, shiftType, shiftAmount);
+ }
+
+ public void Orr(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+ {
+ WriteInstructionBitwiseAuto(0x32000000u, 0x2a000000u, rd, rn, rm, shiftType, shiftAmount);
+ }
+
+ public void OrrVector(Operand rd, Operand rn, Operand rm, bool q = true)
+ {
+ WriteSimdInstruction(0x0ea01c00u, rd, rn, rm, q);
+ }
+
+ public void Ret(Operand rn)
+ {
+ WriteUInt32(0xd65f0000u | (EncodeReg(rn) << 5));
+ }
+
+ public void Rev(Operand rd, Operand rn)
+ {
+ uint opc0 = rd.Type == OperandType.I64 ? 1u << 10 : 0u;
+ WriteInstructionAuto(0x5ac00800u | opc0, rd, rn);
+ }
+
+ public void Ror(Operand rd, Operand rn, Operand rm)
+ {
+ if (rm.Kind == OperandKind.Constant)
+ {
+ int shift = rm.AsInt32();
+ int mask = rd.Type == OperandType.I64 ? 63 : 31;
+ shift &= mask;
+ Extr(rd, rn, rn, shift);
+ }
+ else
+ {
+ Rorv(rd, rn, rm);
+ }
+ }
+
+ public void Rorv(Operand rd, Operand rn, Operand rm)
+ {
+ WriteInstructionBitwiseAuto(0x1ac02c00u, rd, rn, rm);
+ }
+
+ public void Sbfm(Operand rd, Operand rn, int immr, int imms)
+ {
+ uint n = rd.Type == OperandType.I64 ? 1u << 22 : 0u;
+ WriteInstructionAuto(0x13000000u | n | (EncodeUImm6(imms) << 10) | (EncodeUImm6(immr) << 16), rd, rn);
+ }
+
+ public void ScvtfScalar(Operand rd, Operand rn)
+ {
+ uint instruction = 0x1e220000u;
+
+ if (rn.Type == OperandType.I64)
+ {
+ instruction |= SfFlag;
+ }
+
+ WriteFPInstructionAuto(instruction, rd, rn);
+ }
+
+ public void Sdiv(Operand rd, Operand rn, Operand rm)
+ {
+ WriteInstructionRm16Auto(0x1ac00c00u, rd, rn, rm);
+ }
+
+ public void Smulh(Operand rd, Operand rn, Operand rm)
+ {
+ WriteInstructionRm16(0x9b407c00u, rd, rn, rm);
+ }
+
+ public void Stlxp(Operand rt, Operand rt2, Operand rn, Operand rs)
+ {
+ WriteInstruction(0x88208000u | ((rt.Type == OperandType.I64 ? 3u : 2u) << 30), rt, rn, rs, rt2);
+ }
+
+ public void Stlxr(Operand rt, Operand rn, Operand rs)
+ {
+ WriteInstructionRm16(0x0800fc00u | ((rt.Type == OperandType.I64 ? 3u : 2u) << 30), rt, rn, rs);
+ }
+
+ public void Stlxrb(Operand rt, Operand rn, Operand rs)
+ {
+ WriteInstructionRm16(0x0800fc00u, rt, rn, rs);
+ }
+
+ public void Stlxrh(Operand rt, Operand rn, Operand rs)
+ {
+ WriteInstructionRm16(0x0800fc00u | (1u << 30), rt, rn, rs);
+ }
+
+ public void StpRiPost(Operand rt, Operand rt2, Operand rn, int imm)
+ {
+ uint instruction = GetLdpStpInstruction(0x28800000u, 0x2c800000u, imm, rt.Type);
+ WriteInstruction(instruction, rt, rn, rt2);
+ }
+
+ public void StpRiPre(Operand rt, Operand rt2, Operand rn, int imm)
+ {
+ uint instruction = GetLdpStpInstruction(0x29800000u, 0x2d800000u, imm, rt.Type);
+ WriteInstruction(instruction, rt, rn, rt2);
+ }
+
+ public void StpRiUn(Operand rt, Operand rt2, Operand rn, int imm)
+ {
+ uint instruction = GetLdpStpInstruction(0x29000000u, 0x2d000000u, imm, rt.Type);
+ WriteInstruction(instruction, rt, rn, rt2);
+ }
+
+ public void Str(Operand rt, Operand rn)
+ {
+ if (rn.Kind == OperandKind.Memory)
+ {
+ MemoryOperand memOp = rn.GetMemory();
+
+ if (memOp.Index != default)
+ {
+ Debug.Assert(memOp.Displacement == 0);
+ Debug.Assert(memOp.Scale == Multiplier.x1 || (int)memOp.Scale == GetScaleForType(rt.Type));
+ StrRr(rt, memOp.BaseAddress, memOp.Index, ArmExtensionType.Uxtx, memOp.Scale != Multiplier.x1);
+ }
+ else
+ {
+ StrRiUn(rt, memOp.BaseAddress, memOp.Displacement);
+ }
+ }
+ else
+ {
+ StrRiUn(rt, rn, 0);
+ }
+ }
+
+ public void StrRiPost(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = GetLdrStrInstruction(0xb8000400u, 0x3c000400u, rt.Type) | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void StrRiPre(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = GetLdrStrInstruction(0xb8000c00u, 0x3c000c00u, rt.Type) | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void StrRiUn(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = GetLdrStrInstruction(0xb9000000u, 0x3d000000u, rt.Type) | (EncodeUImm12(imm, rt.Type) << 10);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void StrRr(Operand rt, Operand rn, Operand rm, ArmExtensionType extensionType, bool shift)
+ {
+ uint instruction = GetLdrStrInstruction(0xb8200800u, 0x3ca00800u, rt.Type);
+ WriteInstructionLdrStrAuto(instruction, rt, rn, rm, extensionType, shift);
+ }
+
+ public void StrbRiPost(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x38000400u | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void StrbRiPre(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x38000c00u | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void StrbRiUn(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x39000000u | (EncodeUImm12(imm, 0) << 10);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void StrhRiPost(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x78000400u | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void StrhRiPre(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x78000c00u | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void StrhRiUn(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x79000000u | (EncodeUImm12(imm, 1) << 10);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void Stur(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = GetLdrStrInstruction(0xb8000000u, 0x3c000000u, rt.Type) | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void Sub(Operand rd, Operand rn, Operand rm, ArmExtensionType extensionType, int shiftAmount = 0)
+ {
+ WriteInstructionAuto(0x4b200000u, rd, rn, rm, extensionType, shiftAmount);
+ }
+
+ public void Sub(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+ {
+ WriteInstructionAuto(0x51000000u, 0x4b000000u, rd, rn, rm, shiftType, shiftAmount);
+ }
+
+ public void Subs(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+ {
+ WriteInstructionAuto(0x71000000u, 0x6b000000u, rd, rn, rm, shiftType, shiftAmount);
+ }
+
+ public void Sxtb(Operand rd, Operand rn)
+ {
+ Sbfm(rd, rn, 0, 7);
+ }
+
+ public void Sxth(Operand rd, Operand rn)
+ {
+ Sbfm(rd, rn, 0, 15);
+ }
+
+ public void Sxtw(Operand rd, Operand rn)
+ {
+ Sbfm(rd, rn, 0, 31);
+ }
+
+ public void Tst(Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+ {
+ Ands(Factory.Register(ZrRegister, RegisterType.Integer, rn.Type), rn, rm, shiftType, shiftAmount);
+ }
+
+ public void Ubfm(Operand rd, Operand rn, int immr, int imms)
+ {
+ uint n = rd.Type == OperandType.I64 ? 1u << 22 : 0u;
+ WriteInstructionAuto(0x53000000u | n | (EncodeUImm6(imms) << 10) | (EncodeUImm6(immr) << 16), rd, rn);
+ }
+
+ public void UcvtfScalar(Operand rd, Operand rn)
+ {
+ uint instruction = 0x1e230000u;
+
+ if (rn.Type == OperandType.I64)
+ {
+ instruction |= SfFlag;
+ }
+
+ WriteFPInstructionAuto(instruction, rd, rn);
+ }
+
+ public void Udiv(Operand rd, Operand rn, Operand rm)
+ {
+ WriteInstructionRm16Auto(0x1ac00800u, rd, rn, rm);
+ }
+
+ public void Umov(Operand rd, Operand rn, int index, int size)
+ {
+ uint q = size == 3 ? 1u << 30 : 0u;
+ WriteInstruction(0x0e003c00u | (EncodeIndexSizeImm5(index, size) << 16) | q, rd, rn);
+ }
+
+ public void Umulh(Operand rd, Operand rn, Operand rm)
+ {
+ WriteInstructionRm16(0x9bc07c00u, rd, rn, rm);
+ }
+
+ public void Uxtb(Operand rd, Operand rn)
+ {
+ Ubfm(rd, rn, 0, 7);
+ }
+
+ public void Uxth(Operand rd, Operand rn)
+ {
+ Ubfm(rd, rn, 0, 15);
+ }
+
+ private void WriteInstructionAuto(
+ uint instI,
+ uint instR,
+ Operand rd,
+ Operand rn,
+ Operand rm,
+ ArmShiftType shiftType = ArmShiftType.Lsl,
+ int shiftAmount = 0,
+ bool immForm = false)
+ {
+ if (rm.Kind == OperandKind.Constant && (rm.Value != 0 || immForm))
+ {
+ Debug.Assert(shiftAmount == 0);
+ int imm = rm.AsInt32();
+ Debug.Assert((uint)imm == rm.Value);
+ if (imm != 0 && (imm & 0xfff) == 0)
+ {
+ instI |= 1 << 22; // sh flag
+ imm >>= 12;
+ }
+ WriteInstructionAuto(instI | (EncodeUImm12(imm, 0) << 10), rd, rn);
+ }
+ else
+ {
+ instR |= EncodeUImm6(shiftAmount) << 10;
+ instR |= (uint)shiftType << 22;
+
+ WriteInstructionRm16Auto(instR, rd, rn, rm);
+ }
+ }
+
+ private void WriteInstructionAuto(
+ uint instruction,
+ Operand rd,
+ Operand rn,
+ Operand rm,
+ ArmExtensionType extensionType,
+ int shiftAmount = 0)
+ {
+ Debug.Assert((uint)shiftAmount <= 4);
+
+ instruction |= (uint)shiftAmount << 10;
+ instruction |= (uint)extensionType << 13;
+
+ WriteInstructionRm16Auto(instruction, rd, rn, rm);
+ }
+
+ private void WriteInstructionBitwiseAuto(
+ uint instI,
+ uint instR,
+ Operand rd,
+ Operand rn,
+ Operand rm,
+ ArmShiftType shiftType = ArmShiftType.Lsl,
+ int shiftAmount = 0)
+ {
+ if (rm.Kind == OperandKind.Constant && rm.Value != 0)
+ {
+ Debug.Assert(shiftAmount == 0);
+ bool canEncode = CodeGenCommon.TryEncodeBitMask(rm, out int immN, out int immS, out int immR);
+ Debug.Assert(canEncode);
+ uint instruction = instI | ((uint)immS << 10) | ((uint)immR << 16) | ((uint)immN << 22);
+
+ WriteInstructionAuto(instruction, rd, rn);
+ }
+ else
+ {
+ WriteInstructionBitwiseAuto(instR, rd, rn, rm, shiftType, shiftAmount);
+ }
+ }
+
+ private void WriteInstructionBitwiseAuto(
+ uint instruction,
+ Operand rd,
+ Operand rn,
+ Operand rm,
+ ArmShiftType shiftType = ArmShiftType.Lsl,
+ int shiftAmount = 0)
+ {
+ if (rd.Type == OperandType.I64)
+ {
+ instruction |= SfFlag;
+ }
+
+ instruction |= EncodeUImm6(shiftAmount) << 10;
+ instruction |= (uint)shiftType << 22;
+
+ WriteInstructionRm16(instruction, rd, rn, rm);
+ }
+
+ private void WriteInstructionLdrStrAuto(
+ uint instruction,
+ Operand rd,
+ Operand rn,
+ Operand rm,
+ ArmExtensionType extensionType,
+ bool shift)
+ {
+ if (shift)
+ {
+ instruction |= 1u << 12;
+ }
+
+ instruction |= (uint)extensionType << 13;
+
+ if (rd.Type == OperandType.I64)
+ {
+ instruction |= 1u << 30;
+ }
+
+ WriteInstructionRm16(instruction, rd, rn, rm);
+ }
+
+ private void WriteInstructionAuto(uint instruction, Operand rd)
+ {
+ if (rd.Type == OperandType.I64)
+ {
+ instruction |= SfFlag;
+ }
+
+ WriteInstruction(instruction, rd);
+ }
+
+ public void WriteInstructionAuto(uint instruction, Operand rd, Operand rn)
+ {
+ if (rd.Type == OperandType.I64)
+ {
+ instruction |= SfFlag;
+ }
+
+ WriteInstruction(instruction, rd, rn);
+ }
+
+ private void WriteInstructionAuto(uint instruction, Operand rd, Operand rn, Operand rm, Operand ra)
+ {
+ if (rd.Type == OperandType.I64)
+ {
+ instruction |= SfFlag;
+ }
+
+ WriteInstruction(instruction, rd, rn, rm, ra);
+ }
+
+ public void WriteInstruction(uint instruction, Operand rd)
+ {
+ WriteUInt32(instruction | EncodeReg(rd));
+ }
+
+ public void WriteInstruction(uint instruction, Operand rd, Operand rn)
+ {
+ WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5));
+ }
+
+ public void WriteInstruction(uint instruction, Operand rd, Operand rn, Operand rm)
+ {
+ WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5) | (EncodeReg(rm) << 10));
+ }
+
+ public void WriteInstruction(uint instruction, Operand rd, Operand rn, Operand rm, Operand ra)
+ {
+ WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5) | (EncodeReg(ra) << 10) | (EncodeReg(rm) << 16));
+ }
+
+ private void WriteFPInstructionAuto(uint instruction, Operand rd, Operand rn)
+ {
+ if (rd.Type == OperandType.FP64)
+ {
+ instruction |= 1u << 22;
+ }
+
+ WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5));
+ }
+
+ private void WriteFPInstructionAuto(uint instruction, Operand rd, Operand rn, Operand rm)
+ {
+ if (rd.Type == OperandType.FP64)
+ {
+ instruction |= 1u << 22;
+ }
+
+ WriteInstructionRm16(instruction, rd, rn, rm);
+ }
+
+ private void WriteSimdInstruction(uint instruction, Operand rd, Operand rn, Operand rm, bool q = true)
+ {
+ if (q)
+ {
+ instruction |= 1u << 30;
+ }
+
+ WriteInstructionRm16(instruction, rd, rn, rm);
+ }
+
+ private void WriteInstructionRm16Auto(uint instruction, Operand rd, Operand rn, Operand rm)
+ {
+ if (rd.Type == OperandType.I64)
+ {
+ instruction |= SfFlag;
+ }
+
+ WriteInstructionRm16(instruction, rd, rn, rm);
+ }
+
+ public void WriteInstructionRm16(uint instruction, Operand rd, Operand rn, Operand rm)
+ {
+ WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5) | (EncodeReg(rm) << 16));
+ }
+
+ public void WriteInstructionRm16NoRet(uint instruction, Operand rn, Operand rm)
+ {
+ WriteUInt32(instruction | (EncodeReg(rn) << 5) | (EncodeReg(rm) << 16));
+ }
+
+ private static uint GetLdpStpInstruction(uint intInst, uint vecInst, int imm, OperandType type)
+ {
+ uint instruction;
+ int scale;
+
+ if (type.IsInteger())
+ {
+ instruction = intInst;
+
+ if (type == OperandType.I64)
+ {
+ instruction |= SfFlag;
+ scale = 3;
+ }
+ else
+ {
+ scale = 2;
+ }
+ }
+ else
+ {
+ int opc = type switch
+ {
+ OperandType.FP32 => 0,
+ OperandType.FP64 => 1,
+ _ => 2
+ };
+
+ instruction = vecInst | ((uint)opc << 30);
+ scale = 2 + opc;
+ }
+
+ instruction |= (EncodeSImm7(imm, scale) << 15);
+
+ return instruction;
+ }
+
+ private static uint GetLdrStrInstruction(uint intInst, uint vecInst, OperandType type)
+ {
+ uint instruction;
+
+ if (type.IsInteger())
+ {
+ instruction = intInst;
+
+ if (type == OperandType.I64)
+ {
+ instruction |= 1 << 30;
+ }
+ }
+ else
+ {
+ instruction = vecInst;
+
+ if (type == OperandType.V128)
+ {
+ instruction |= 1u << 23;
+ }
+ else
+ {
+ instruction |= type == OperandType.FP32 ? 2u << 30 : 3u << 30;
+ }
+ }
+
+ return instruction;
+ }
+
+ private static uint EncodeIndexSizeImm5(int index, int size)
+ {
+ Debug.Assert((uint)size < 4);
+ Debug.Assert((uint)index < (16u >> size), $"Invalid index {index} and size {size} combination.");
+ return ((uint)index << (size + 1)) | (1u << size);
+ }
+
+ private static uint EncodeSImm7(int value, int scale)
+ {
+ uint imm = (uint)(value >> scale) & 0x7f;
+ Debug.Assert(((int)imm << 25) >> (25 - scale) == value, $"Failed to encode constant 0x{value:X} with scale {scale}.");
+ return imm;
+ }
+
+ private static uint EncodeSImm9(int value)
+ {
+ uint imm = (uint)value & 0x1ff;
+ Debug.Assert(((int)imm << 23) >> 23 == value, $"Failed to encode constant 0x{value:X}.");
+ return imm;
+ }
+
+ private static uint EncodeSImm19_2(int value)
+ {
+ uint imm = (uint)(value >> 2) & 0x7ffff;
+ Debug.Assert(((int)imm << 13) >> 11 == value, $"Failed to encode constant 0x{value:X}.");
+ return imm;
+ }
+
+ private static uint EncodeSImm26_2(int value)
+ {
+ uint imm = (uint)(value >> 2) & 0x3ffffff;
+ Debug.Assert(((int)imm << 6) >> 4 == value, $"Failed to encode constant 0x{value:X}.");
+ return imm;
+ }
+
+ private static uint EncodeUImm4(int value)
+ {
+ uint imm = (uint)value & 0xf;
+ Debug.Assert((int)imm == value, $"Failed to encode constant 0x{value:X}.");
+ return imm;
+ }
+
+ private static uint EncodeUImm6(int value)
+ {
+ uint imm = (uint)value & 0x3f;
+ Debug.Assert((int)imm == value, $"Failed to encode constant 0x{value:X}.");
+ return imm;
+ }
+
+ private static uint EncodeUImm12(int value, OperandType type)
+ {
+ return EncodeUImm12(value, GetScaleForType(type));
+ }
+
+ private static uint EncodeUImm12(int value, int scale)
+ {
+ uint imm = (uint)(value >> scale) & 0xfff;
+ Debug.Assert((int)imm << scale == value, $"Failed to encode constant 0x{value:X} with scale {scale}.");
+ return imm;
+ }
+
+ private static uint EncodeUImm16(int value)
+ {
+ uint imm = (uint)value & 0xffff;
+ Debug.Assert((int)imm == value, $"Failed to encode constant 0x{value:X}.");
+ return imm;
+ }
+
+ private static uint EncodeReg(Operand reg)
+ {
+ if (reg.Kind == OperandKind.Constant && reg.Value == 0)
+ {
+ return ZrRegister;
+ }
+
+ uint regIndex = (uint)reg.GetRegister().Index;
+ Debug.Assert(reg.Kind == OperandKind.Register);
+ Debug.Assert(regIndex < 32);
+ return regIndex;
+ }
+
+ public static int GetScaleForType(OperandType type)
+ {
+ return type switch
+ {
+ OperandType.I32 => 2,
+ OperandType.I64 => 3,
+ OperandType.FP32 => 2,
+ OperandType.FP64 => 3,
+ OperandType.V128 => 4,
+ _ => throw new ArgumentException($"Invalid type {type}.")
+ };
+ }
+
+ private void WriteInt16(short value)
+ {
+ WriteUInt16((ushort)value);
+ }
+
+ private void WriteInt32(int value)
+ {
+ WriteUInt32((uint)value);
+ }
+
+ private void WriteByte(byte value)
+ {
+ _stream.WriteByte(value);
+ }
+
+ private void WriteUInt16(ushort value)
+ {
+ _stream.WriteByte((byte)(value >> 0));
+ _stream.WriteByte((byte)(value >> 8));
+ }
+
+ private void WriteUInt32(uint value)
+ {
+ _stream.WriteByte((byte)(value >> 0));
+ _stream.WriteByte((byte)(value >> 8));
+ _stream.WriteByte((byte)(value >> 16));
+ _stream.WriteByte((byte)(value >> 24));
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/Arm64/CallingConvention.cs b/src/ARMeilleure/CodeGen/Arm64/CallingConvention.cs
new file mode 100644
index 00000000..fda8d786
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Arm64/CallingConvention.cs
@@ -0,0 +1,96 @@
+using System;
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+ static class CallingConvention
+ {
+ private const int RegistersMask = unchecked((int)0xffffffff);
+
+ // Some of those register have specific roles and can't be used as general purpose registers.
+ // X18 - Reserved for platform specific usage.
+ // X29 - Frame pointer.
+ // X30 - Return address.
+ // X31 - Not an actual register, in some cases maps to SP, and in others to ZR.
+ private const int ReservedRegsMask = (1 << CodeGenCommon.ReservedRegister) | (1 << 18) | (1 << 29) | (1 << 30) | (1 << 31);
+
+ public static int GetIntAvailableRegisters()
+ {
+ return RegistersMask & ~ReservedRegsMask;
+ }
+
+ public static int GetVecAvailableRegisters()
+ {
+ return RegistersMask;
+ }
+
+ public static int GetIntCallerSavedRegisters()
+ {
+ return (GetIntCalleeSavedRegisters() ^ RegistersMask) & ~ReservedRegsMask;
+ }
+
+ public static int GetFpCallerSavedRegisters()
+ {
+ return GetFpCalleeSavedRegisters() ^ RegistersMask;
+ }
+
+ public static int GetVecCallerSavedRegisters()
+ {
+ return GetVecCalleeSavedRegisters() ^ RegistersMask;
+ }
+
+ public static int GetIntCalleeSavedRegisters()
+ {
+ return 0x1ff80000; // X19 to X28
+ }
+
+ public static int GetFpCalleeSavedRegisters()
+ {
+ return 0xff00; // D8 to D15
+ }
+
+ public static int GetVecCalleeSavedRegisters()
+ {
+ return 0;
+ }
+
+ public static int GetArgumentsOnRegsCount()
+ {
+ return 8;
+ }
+
+ public static int GetIntArgumentRegister(int index)
+ {
+ if ((uint)index < (uint)GetArgumentsOnRegsCount())
+ {
+ return index;
+ }
+
+ throw new ArgumentOutOfRangeException(nameof(index));
+ }
+
+ public static int GetVecArgumentRegister(int index)
+ {
+ if ((uint)index < (uint)GetArgumentsOnRegsCount())
+ {
+ return index;
+ }
+
+ throw new ArgumentOutOfRangeException(nameof(index));
+ }
+
+ public static int GetIntReturnRegister()
+ {
+ return 0;
+ }
+
+ public static int GetIntReturnRegisterHigh()
+ {
+ return 1;
+ }
+
+ public static int GetVecReturnRegister()
+ {
+ return 0;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/Arm64/CodeGenCommon.cs b/src/ARMeilleure/CodeGen/Arm64/CodeGenCommon.cs
new file mode 100644
index 00000000..8d1e597b
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Arm64/CodeGenCommon.cs
@@ -0,0 +1,91 @@
+using ARMeilleure.IntermediateRepresentation;
+using System.Numerics;
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+ static class CodeGenCommon
+ {
+ public const int TcAddressRegister = 8;
+ public const int ReservedRegister = 17;
+
+ public static bool ConstFitsOnSImm7(int value, int scale)
+ {
+ return (((value >> scale) << 25) >> (25 - scale)) == value;
+ }
+
+ public static bool ConstFitsOnSImm9(int value)
+ {
+ return ((value << 23) >> 23) == value;
+ }
+
+ public static bool ConstFitsOnUImm12(int value)
+ {
+ return (value & 0xfff) == value;
+ }
+
+ public static bool ConstFitsOnUImm12(int value, OperandType type)
+ {
+ int scale = Assembler.GetScaleForType(type);
+ return (((value >> scale) & 0xfff) << scale) == value;
+ }
+
+ public static bool TryEncodeBitMask(Operand operand, out int immN, out int immS, out int immR)
+ {
+ return TryEncodeBitMask(operand.Type, operand.Value, out immN, out immS, out immR);
+ }
+
+ public static bool TryEncodeBitMask(OperandType type, ulong value, out int immN, out int immS, out int immR)
+ {
+ if (type == OperandType.I32)
+ {
+ value |= value << 32;
+ }
+
+ return TryEncodeBitMask(value, out immN, out immS, out immR);
+ }
+
+ public static bool TryEncodeBitMask(ulong value, out int immN, out int immS, out int immR)
+ {
+ // Some special values also can't be encoded:
+ // 0 can't be encoded because we need to subtract 1 from onesCount (which would became negative if 0).
+ // A value with all bits set can't be encoded because it is reserved according to the spec, because:
+ // Any value AND all ones will be equal itself, so it's effectively a no-op.
+ // Any value OR all ones will be equal all ones, so one can just use MOV.
+ // Any value XOR all ones will be equal its inverse, so one can just use MVN.
+ if (value == 0 || value == ulong.MaxValue)
+ {
+ immN = 0;
+ immS = 0;
+ immR = 0;
+
+ return false;
+ }
+
+ // Normalize value, rotating it such that the LSB is 1: Ensures we get a complete element that has not
+ // been cut-in-half across the word boundary.
+ int rotation = BitOperations.TrailingZeroCount(value & (value + 1));
+ ulong rotatedValue = ulong.RotateRight(value, rotation);
+
+ // Now that we have a complete element in the LSB with the LSB = 1, determine size and number of ones
+ // in element.
+ int elementSize = BitOperations.TrailingZeroCount(rotatedValue & (rotatedValue + 1));
+ int onesInElement = BitOperations.TrailingZeroCount(~rotatedValue);
+
+ // Check the value is repeating; also ensures element size is a power of two.
+ if (ulong.RotateRight(value, elementSize) != value)
+ {
+ immN = 0;
+ immS = 0;
+ immR = 0;
+
+ return false;
+ }
+
+ immN = (elementSize >> 6) & 1;
+ immS = (((~elementSize + 1) << 1) | (onesInElement - 1)) & 0x3f;
+ immR = (elementSize - rotation) & (elementSize - 1);
+
+ return true;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/Arm64/CodeGenContext.cs b/src/ARMeilleure/CodeGen/Arm64/CodeGenContext.cs
new file mode 100644
index 00000000..0dd5355f
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Arm64/CodeGenContext.cs
@@ -0,0 +1,287 @@
+using ARMeilleure.CodeGen.Linking;
+using ARMeilleure.CodeGen.RegisterAllocators;
+using ARMeilleure.IntermediateRepresentation;
+using Ryujinx.Common.Memory;
+using System;
+using System.Collections.Generic;
+using System.IO;
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+ class CodeGenContext
+ {
+ private const int BccInstLength = 4;
+ private const int CbnzInstLength = 4;
+ private const int LdrLitInstLength = 4;
+
+ private Stream _stream;
+
+ public int StreamOffset => (int)_stream.Length;
+
+ public AllocationResult AllocResult { get; }
+
+ public Assembler Assembler { get; }
+
+ public BasicBlock CurrBlock { get; private set; }
+
+ public bool HasCall { get; }
+
+ public int CallArgsRegionSize { get; }
+ public int FpLrSaveRegionSize { get; }
+
+ private readonly Dictionary<BasicBlock, long> _visitedBlocks;
+ private readonly Dictionary<BasicBlock, List<(ArmCondition Condition, long BranchPos)>> _pendingBranches;
+
+ private struct ConstantPoolEntry
+ {
+ public readonly int Offset;
+ public readonly Symbol Symbol;
+ public readonly List<(Operand, int)> LdrOffsets;
+
+ public ConstantPoolEntry(int offset, Symbol symbol)
+ {
+ Offset = offset;
+ Symbol = symbol;
+ LdrOffsets = new List<(Operand, int)>();
+ }
+ }
+
+ private readonly Dictionary<ulong, ConstantPoolEntry> _constantPool;
+
+ private bool _constantPoolWritten;
+ private long _constantPoolOffset;
+
+ private ArmCondition _jNearCondition;
+ private Operand _jNearValue;
+
+ private long _jNearPosition;
+
+ private readonly bool _relocatable;
+
+ public CodeGenContext(AllocationResult allocResult, int maxCallArgs, int blocksCount, bool relocatable)
+ {
+ _stream = MemoryStreamManager.Shared.GetStream();
+
+ AllocResult = allocResult;
+
+ Assembler = new Assembler(_stream);
+
+ bool hasCall = maxCallArgs >= 0;
+
+ HasCall = hasCall;
+
+ if (maxCallArgs < 0)
+ {
+ maxCallArgs = 0;
+ }
+
+ CallArgsRegionSize = maxCallArgs * 16;
+ FpLrSaveRegionSize = hasCall ? 16 : 0;
+
+ _visitedBlocks = new Dictionary<BasicBlock, long>();
+ _pendingBranches = new Dictionary<BasicBlock, List<(ArmCondition, long)>>();
+ _constantPool = new Dictionary<ulong, ConstantPoolEntry>();
+
+ _relocatable = relocatable;
+ }
+
+ public void EnterBlock(BasicBlock block)
+ {
+ CurrBlock = block;
+
+ long target = _stream.Position;
+
+ if (_pendingBranches.TryGetValue(block, out var list))
+ {
+ foreach (var tuple in list)
+ {
+ _stream.Seek(tuple.BranchPos, SeekOrigin.Begin);
+ WriteBranch(tuple.Condition, target);
+ }
+
+ _stream.Seek(target, SeekOrigin.Begin);
+ _pendingBranches.Remove(block);
+ }
+
+ _visitedBlocks.Add(block, target);
+ }
+
+ public void JumpTo(BasicBlock target)
+ {
+ JumpTo(ArmCondition.Al, target);
+ }
+
+ public void JumpTo(ArmCondition condition, BasicBlock target)
+ {
+ if (_visitedBlocks.TryGetValue(target, out long offset))
+ {
+ WriteBranch(condition, offset);
+ }
+ else
+ {
+ if (!_pendingBranches.TryGetValue(target, out var list))
+ {
+ list = new List<(ArmCondition, long)>();
+ _pendingBranches.Add(target, list);
+ }
+
+ list.Add((condition, _stream.Position));
+
+ _stream.Seek(BccInstLength, SeekOrigin.Current);
+ }
+ }
+
+ private void WriteBranch(ArmCondition condition, long to)
+ {
+ int imm = checked((int)(to - _stream.Position));
+
+ if (condition != ArmCondition.Al)
+ {
+ Assembler.B(condition, imm);
+ }
+ else
+ {
+ Assembler.B(imm);
+ }
+ }
+
+ public void JumpToNear(ArmCondition condition)
+ {
+ _jNearCondition = condition;
+ _jNearPosition = _stream.Position;
+
+ _stream.Seek(BccInstLength, SeekOrigin.Current);
+ }
+
+ public void JumpToNearIfNotZero(Operand value)
+ {
+ _jNearValue = value;
+ _jNearPosition = _stream.Position;
+
+ _stream.Seek(CbnzInstLength, SeekOrigin.Current);
+ }
+
+ public void JumpHere()
+ {
+ long currentPosition = _stream.Position;
+ long offset = currentPosition - _jNearPosition;
+
+ _stream.Seek(_jNearPosition, SeekOrigin.Begin);
+
+ if (_jNearValue != default)
+ {
+ Assembler.Cbnz(_jNearValue, checked((int)offset));
+ _jNearValue = default;
+ }
+ else
+ {
+ Assembler.B(_jNearCondition, checked((int)offset));
+ }
+
+ _stream.Seek(currentPosition, SeekOrigin.Begin);
+ }
+
+ public void ReserveRelocatableConstant(Operand rt, Symbol symbol, ulong value)
+ {
+ if (!_constantPool.TryGetValue(value, out ConstantPoolEntry cpe))
+ {
+ cpe = new ConstantPoolEntry(_constantPool.Count * sizeof(ulong), symbol);
+ _constantPool.Add(value, cpe);
+ }
+
+ cpe.LdrOffsets.Add((rt, (int)_stream.Position));
+ _stream.Seek(LdrLitInstLength, SeekOrigin.Current);
+ }
+
+ private long WriteConstantPool()
+ {
+ if (_constantPoolWritten)
+ {
+ return _constantPoolOffset;
+ }
+
+ long constantPoolBaseOffset = _stream.Position;
+
+ foreach (ulong value in _constantPool.Keys)
+ {
+ WriteUInt64(value);
+ }
+
+ foreach (ConstantPoolEntry cpe in _constantPool.Values)
+ {
+ foreach ((Operand rt, int ldrOffset) in cpe.LdrOffsets)
+ {
+ _stream.Seek(ldrOffset, SeekOrigin.Begin);
+
+ int absoluteOffset = checked((int)(constantPoolBaseOffset + cpe.Offset));
+ int pcRelativeOffset = absoluteOffset - ldrOffset;
+
+ Assembler.LdrLit(rt, pcRelativeOffset);
+ }
+ }
+
+ _stream.Seek(constantPoolBaseOffset + _constantPool.Count * sizeof(ulong), SeekOrigin.Begin);
+
+ _constantPoolOffset = constantPoolBaseOffset;
+ _constantPoolWritten = true;
+
+ return constantPoolBaseOffset;
+ }
+
+ public (byte[], RelocInfo) GetCode()
+ {
+ long constantPoolBaseOffset = WriteConstantPool();
+
+ byte[] code = new byte[_stream.Length];
+
+ long originalPosition = _stream.Position;
+
+ _stream.Seek(0, SeekOrigin.Begin);
+ _stream.Read(code, 0, code.Length);
+ _stream.Seek(originalPosition, SeekOrigin.Begin);
+
+ RelocInfo relocInfo;
+
+ if (_relocatable)
+ {
+ RelocEntry[] relocs = new RelocEntry[_constantPool.Count];
+
+ int index = 0;
+
+ foreach (ConstantPoolEntry cpe in _constantPool.Values)
+ {
+ if (cpe.Symbol.Type != SymbolType.None)
+ {
+ int absoluteOffset = checked((int)(constantPoolBaseOffset + cpe.Offset));
+ relocs[index++] = new RelocEntry(absoluteOffset, cpe.Symbol);
+ }
+ }
+
+ if (index != relocs.Length)
+ {
+ Array.Resize(ref relocs, index);
+ }
+
+ relocInfo = new RelocInfo(relocs);
+ }
+ else
+ {
+ relocInfo = new RelocInfo(Array.Empty<RelocEntry>());
+ }
+
+ return (code, relocInfo);
+ }
+
+ private void WriteUInt64(ulong value)
+ {
+ _stream.WriteByte((byte)(value >> 0));
+ _stream.WriteByte((byte)(value >> 8));
+ _stream.WriteByte((byte)(value >> 16));
+ _stream.WriteByte((byte)(value >> 24));
+ _stream.WriteByte((byte)(value >> 32));
+ _stream.WriteByte((byte)(value >> 40));
+ _stream.WriteByte((byte)(value >> 48));
+ _stream.WriteByte((byte)(value >> 56));
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/Arm64/CodeGenerator.cs b/src/ARMeilleure/CodeGen/Arm64/CodeGenerator.cs
new file mode 100644
index 00000000..fc4fa976
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Arm64/CodeGenerator.cs
@@ -0,0 +1,1580 @@
+using ARMeilleure.CodeGen.Linking;
+using ARMeilleure.CodeGen.Optimizations;
+using ARMeilleure.CodeGen.RegisterAllocators;
+using ARMeilleure.CodeGen.Unwinding;
+using ARMeilleure.Common;
+using ARMeilleure.Diagnostics;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Numerics;
+
+using static ARMeilleure.IntermediateRepresentation.Operand;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+ static class CodeGenerator
+ {
+ private const int DWordScale = 3;
+
+ private const int RegistersCount = 32;
+
+ private const int FpRegister = 29;
+ private const int LrRegister = 30;
+ private const int SpRegister = 31;
+ private const int ZrRegister = 31;
+
+ private enum AccessSize
+ {
+ Byte,
+ Hword,
+ Auto
+ }
+
+ private static Action<CodeGenContext, Operation>[] _instTable;
+
+ static CodeGenerator()
+ {
+ _instTable = new Action<CodeGenContext, Operation>[EnumUtils.GetCount(typeof(Instruction))];
+
+ Add(Instruction.Add, GenerateAdd);
+ Add(Instruction.BitwiseAnd, GenerateBitwiseAnd);
+ Add(Instruction.BitwiseExclusiveOr, GenerateBitwiseExclusiveOr);
+ Add(Instruction.BitwiseNot, GenerateBitwiseNot);
+ Add(Instruction.BitwiseOr, GenerateBitwiseOr);
+ Add(Instruction.BranchIf, GenerateBranchIf);
+ Add(Instruction.ByteSwap, GenerateByteSwap);
+ Add(Instruction.Call, GenerateCall);
+ //Add(Instruction.Clobber, GenerateClobber);
+ Add(Instruction.Compare, GenerateCompare);
+ Add(Instruction.CompareAndSwap, GenerateCompareAndSwap);
+ Add(Instruction.CompareAndSwap16, GenerateCompareAndSwap16);
+ Add(Instruction.CompareAndSwap8, GenerateCompareAndSwap8);
+ Add(Instruction.ConditionalSelect, GenerateConditionalSelect);
+ Add(Instruction.ConvertI64ToI32, GenerateConvertI64ToI32);
+ Add(Instruction.ConvertToFP, GenerateConvertToFP);
+ Add(Instruction.ConvertToFPUI, GenerateConvertToFPUI);
+ Add(Instruction.Copy, GenerateCopy);
+ Add(Instruction.CountLeadingZeros, GenerateCountLeadingZeros);
+ Add(Instruction.Divide, GenerateDivide);
+ Add(Instruction.DivideUI, GenerateDivideUI);
+ Add(Instruction.Fill, GenerateFill);
+ Add(Instruction.Load, GenerateLoad);
+ Add(Instruction.Load16, GenerateLoad16);
+ Add(Instruction.Load8, GenerateLoad8);
+ Add(Instruction.MemoryBarrier, GenerateMemoryBarrier);
+ Add(Instruction.Multiply, GenerateMultiply);
+ Add(Instruction.Multiply64HighSI, GenerateMultiply64HighSI);
+ Add(Instruction.Multiply64HighUI, GenerateMultiply64HighUI);
+ Add(Instruction.Negate, GenerateNegate);
+ Add(Instruction.Return, GenerateReturn);
+ Add(Instruction.RotateRight, GenerateRotateRight);
+ Add(Instruction.ShiftLeft, GenerateShiftLeft);
+ Add(Instruction.ShiftRightSI, GenerateShiftRightSI);
+ Add(Instruction.ShiftRightUI, GenerateShiftRightUI);
+ Add(Instruction.SignExtend16, GenerateSignExtend16);
+ Add(Instruction.SignExtend32, GenerateSignExtend32);
+ Add(Instruction.SignExtend8, GenerateSignExtend8);
+ Add(Instruction.Spill, GenerateSpill);
+ Add(Instruction.SpillArg, GenerateSpillArg);
+ Add(Instruction.StackAlloc, GenerateStackAlloc);
+ Add(Instruction.Store, GenerateStore);
+ Add(Instruction.Store16, GenerateStore16);
+ Add(Instruction.Store8, GenerateStore8);
+ Add(Instruction.Subtract, GenerateSubtract);
+ Add(Instruction.Tailcall, GenerateTailcall);
+ Add(Instruction.VectorCreateScalar, GenerateVectorCreateScalar);
+ Add(Instruction.VectorExtract, GenerateVectorExtract);
+ Add(Instruction.VectorExtract16, GenerateVectorExtract16);
+ Add(Instruction.VectorExtract8, GenerateVectorExtract8);
+ Add(Instruction.VectorInsert, GenerateVectorInsert);
+ Add(Instruction.VectorInsert16, GenerateVectorInsert16);
+ Add(Instruction.VectorInsert8, GenerateVectorInsert8);
+ Add(Instruction.VectorOne, GenerateVectorOne);
+ Add(Instruction.VectorZero, GenerateVectorZero);
+ Add(Instruction.VectorZeroUpper64, GenerateVectorZeroUpper64);
+ Add(Instruction.VectorZeroUpper96, GenerateVectorZeroUpper96);
+ Add(Instruction.ZeroExtend16, GenerateZeroExtend16);
+ Add(Instruction.ZeroExtend32, GenerateZeroExtend32);
+ Add(Instruction.ZeroExtend8, GenerateZeroExtend8);
+
+ static void Add(Instruction inst, Action<CodeGenContext, Operation> func)
+ {
+ _instTable[(int)inst] = func;
+ }
+ }
+
+ public static CompiledFunction Generate(CompilerContext cctx)
+ {
+ ControlFlowGraph cfg = cctx.Cfg;
+
+ Logger.StartPass(PassName.Optimization);
+
+ if (cctx.Options.HasFlag(CompilerOptions.Optimize))
+ {
+ if (cctx.Options.HasFlag(CompilerOptions.SsaForm))
+ {
+ Optimizer.RunPass(cfg);
+ }
+
+ BlockPlacement.RunPass(cfg);
+ }
+
+ Arm64Optimizer.RunPass(cfg);
+
+ Logger.EndPass(PassName.Optimization, cfg);
+
+ Logger.StartPass(PassName.PreAllocation);
+
+ StackAllocator stackAlloc = new();
+
+ PreAllocator.RunPass(cctx, stackAlloc, out int maxCallArgs);
+
+ Logger.EndPass(PassName.PreAllocation, cfg);
+
+ Logger.StartPass(PassName.RegisterAllocation);
+
+ if (cctx.Options.HasFlag(CompilerOptions.SsaForm))
+ {
+ Ssa.Deconstruct(cfg);
+ }
+
+ IRegisterAllocator regAlloc;
+
+ if (cctx.Options.HasFlag(CompilerOptions.Lsra))
+ {
+ regAlloc = new LinearScanAllocator();
+ }
+ else
+ {
+ regAlloc = new HybridAllocator();
+ }
+
+ RegisterMasks regMasks = new(
+ CallingConvention.GetIntAvailableRegisters(),
+ CallingConvention.GetVecAvailableRegisters(),
+ CallingConvention.GetIntCallerSavedRegisters(),
+ CallingConvention.GetVecCallerSavedRegisters(),
+ CallingConvention.GetIntCalleeSavedRegisters(),
+ CallingConvention.GetVecCalleeSavedRegisters(),
+ RegistersCount);
+
+ AllocationResult allocResult = regAlloc.RunPass(cfg, stackAlloc, regMasks);
+
+ Logger.EndPass(PassName.RegisterAllocation, cfg);
+
+ Logger.StartPass(PassName.CodeGeneration);
+
+ //Console.Error.WriteLine(IRDumper.GetDump(cfg));
+
+ bool relocatable = (cctx.Options & CompilerOptions.Relocatable) != 0;
+
+ CodeGenContext context = new(allocResult, maxCallArgs, cfg.Blocks.Count, relocatable);
+
+ UnwindInfo unwindInfo = WritePrologue(context);
+
+ for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
+ {
+ context.EnterBlock(block);
+
+ for (Operation node = block.Operations.First; node != default;)
+ {
+ node = GenerateOperation(context, node);
+ }
+
+ if (block.SuccessorsCount == 0)
+ {
+ // The only blocks which can have 0 successors are exit blocks.
+ Operation last = block.Operations.Last;
+
+ Debug.Assert(last.Instruction == Instruction.Tailcall ||
+ last.Instruction == Instruction.Return);
+ }
+ else
+ {
+ BasicBlock succ = block.GetSuccessor(0);
+
+ if (succ != block.ListNext)
+ {
+ context.JumpTo(succ);
+ }
+ }
+ }
+
+ (byte[] code, RelocInfo relocInfo) = context.GetCode();
+
+ Logger.EndPass(PassName.CodeGeneration);
+
+ return new CompiledFunction(code, unwindInfo, relocInfo);
+ }
+
+ private static Operation GenerateOperation(CodeGenContext context, Operation operation)
+ {
+ if (operation.Instruction == Instruction.Extended)
+ {
+ CodeGeneratorIntrinsic.GenerateOperation(context, operation);
+ }
+ else
+ {
+ if (IsLoadOrStore(operation) &&
+ operation.ListNext != default &&
+ operation.ListNext.Instruction == operation.Instruction &&
+ TryPairMemoryOp(context, operation, operation.ListNext))
+ {
+ // Skip next operation if we managed to pair them.
+ return operation.ListNext.ListNext;
+ }
+
+ Action<CodeGenContext, Operation> func = _instTable[(int)operation.Instruction];
+
+ if (func != null)
+ {
+ func(context, operation);
+ }
+ else
+ {
+ throw new ArgumentException($"Invalid instruction \"{operation.Instruction}\".");
+ }
+ }
+
+ return operation.ListNext;
+ }
+
+ private static void GenerateAdd(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ // ValidateBinOp(dest, src1, src2);
+
+ if (dest.Type.IsInteger())
+ {
+ context.Assembler.Add(dest, src1, src2);
+ }
+ else
+ {
+ context.Assembler.FaddScalar(dest, src1, src2);
+ }
+ }
+
+ private static void GenerateBitwiseAnd(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateBinOp(dest, src1, src2);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.And(dest, src1, src2);
+ }
+
+ private static void GenerateBitwiseExclusiveOr(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateBinOp(dest, src1, src2);
+
+ if (dest.Type.IsInteger())
+ {
+ context.Assembler.Eor(dest, src1, src2);
+ }
+ else
+ {
+ context.Assembler.EorVector(dest, src1, src2);
+ }
+ }
+
+ private static void GenerateBitwiseNot(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ ValidateUnOp(dest, source);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.Mvn(dest, source);
+ }
+
+ private static void GenerateBitwiseOr(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateBinOp(dest, src1, src2);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.Orr(dest, src1, src2);
+ }
+
+ private static void GenerateBranchIf(CodeGenContext context, Operation operation)
+ {
+ Operand comp = operation.GetSource(2);
+
+ Debug.Assert(comp.Kind == OperandKind.Constant);
+
+ var cond = ((Comparison)comp.AsInt32()).ToArmCondition();
+
+ GenerateCompareCommon(context, operation);
+
+ context.JumpTo(cond, context.CurrBlock.GetSuccessor(1));
+ }
+
+ private static void GenerateByteSwap(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ ValidateUnOp(dest, source);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.Rev(dest, source);
+ }
+
+ private static void GenerateCall(CodeGenContext context, Operation operation)
+ {
+ context.Assembler.Blr(operation.GetSource(0));
+ }
+
+ private static void GenerateCompare(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand comp = operation.GetSource(2);
+
+ Debug.Assert(dest.Type == OperandType.I32);
+ Debug.Assert(comp.Kind == OperandKind.Constant);
+
+ var cond = ((Comparison)comp.AsInt32()).ToArmCondition();
+
+ GenerateCompareCommon(context, operation);
+
+ context.Assembler.Cset(dest, cond);
+ }
+
+ private static void GenerateCompareAndSwap(CodeGenContext context, Operation operation)
+ {
+ if (operation.SourcesCount == 5) // CompareAndSwap128 has 5 sources, compared to CompareAndSwap64/32's 3.
+ {
+ Operand actualLow = operation.GetDestination(0);
+ Operand actualHigh = operation.GetDestination(1);
+ Operand temp0 = operation.GetDestination(2);
+ Operand temp1 = operation.GetDestination(3);
+ Operand address = operation.GetSource(0);
+ Operand expectedLow = operation.GetSource(1);
+ Operand expectedHigh = operation.GetSource(2);
+ Operand desiredLow = operation.GetSource(3);
+ Operand desiredHigh = operation.GetSource(4);
+
+ GenerateAtomicDcas(
+ context,
+ address,
+ expectedLow,
+ expectedHigh,
+ desiredLow,
+ desiredHigh,
+ actualLow,
+ actualHigh,
+ temp0,
+ temp1);
+ }
+ else
+ {
+ Operand actual = operation.GetDestination(0);
+ Operand result = operation.GetDestination(1);
+ Operand address = operation.GetSource(0);
+ Operand expected = operation.GetSource(1);
+ Operand desired = operation.GetSource(2);
+
+ GenerateAtomicCas(context, address, expected, desired, actual, result, AccessSize.Auto);
+ }
+ }
+
+ private static void GenerateCompareAndSwap16(CodeGenContext context, Operation operation)
+ {
+ Operand actual = operation.GetDestination(0);
+ Operand result = operation.GetDestination(1);
+ Operand address = operation.GetSource(0);
+ Operand expected = operation.GetSource(1);
+ Operand desired = operation.GetSource(2);
+
+ GenerateAtomicCas(context, address, expected, desired, actual, result, AccessSize.Hword);
+ }
+
+ private static void GenerateCompareAndSwap8(CodeGenContext context, Operation operation)
+ {
+ Operand actual = operation.GetDestination(0);
+ Operand result = operation.GetDestination(1);
+ Operand address = operation.GetSource(0);
+ Operand expected = operation.GetSource(1);
+ Operand desired = operation.GetSource(2);
+
+ GenerateAtomicCas(context, address, expected, desired, actual, result, AccessSize.Byte);
+ }
+
+ private static void GenerateCompareCommon(CodeGenContext context, Operation operation)
+ {
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ EnsureSameType(src1, src2);
+
+ Debug.Assert(src1.Type.IsInteger());
+
+ context.Assembler.Cmp(src1, src2);
+ }
+
+ private static void GenerateConditionalSelect(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+ Operand src3 = operation.GetSource(2);
+
+ EnsureSameType(dest, src2, src3);
+
+ Debug.Assert(dest.Type.IsInteger());
+ Debug.Assert(src1.Type == OperandType.I32);
+
+ context.Assembler.Cmp (src1, Const(src1.Type, 0));
+ context.Assembler.Csel(dest, src2, src3, ArmCondition.Ne);
+ }
+
+ private static void GenerateConvertI64ToI32(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type == OperandType.I32 && source.Type == OperandType.I64);
+
+ context.Assembler.Mov(dest, Register(source, OperandType.I32));
+ }
+
+ private static void GenerateConvertToFP(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type == OperandType.FP32 || dest.Type == OperandType.FP64);
+ Debug.Assert(dest.Type != source.Type);
+ Debug.Assert(source.Type != OperandType.V128);
+
+ if (source.Type.IsInteger())
+ {
+ context.Assembler.ScvtfScalar(dest, source);
+ }
+ else
+ {
+ context.Assembler.FcvtScalar(dest, source);
+ }
+ }
+
+ private static void GenerateConvertToFPUI(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type == OperandType.FP32 || dest.Type == OperandType.FP64);
+ Debug.Assert(dest.Type != source.Type);
+ Debug.Assert(source.Type.IsInteger());
+
+ context.Assembler.UcvtfScalar(dest, source);
+ }
+
+ private static void GenerateCopy(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ EnsureSameType(dest, source);
+
+ Debug.Assert(dest.Type.IsInteger() || source.Kind != OperandKind.Constant);
+
+ // Moves to the same register are useless.
+ if (dest.Kind == source.Kind && dest.Value == source.Value)
+ {
+ return;
+ }
+
+ if (dest.Kind == OperandKind.Register && source.Kind == OperandKind.Constant)
+ {
+ if (source.Relocatable)
+ {
+ context.ReserveRelocatableConstant(dest, source.Symbol, source.Value);
+ }
+ else
+ {
+ GenerateConstantCopy(context, dest, source.Value);
+ }
+ }
+ else
+ {
+ context.Assembler.Mov(dest, source);
+ }
+ }
+
+ private static void GenerateCountLeadingZeros(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ EnsureSameType(dest, source);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.Clz(dest, source);
+ }
+
+ private static void GenerateDivide(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand dividend = operation.GetSource(0);
+ Operand divisor = operation.GetSource(1);
+
+ ValidateBinOp(dest, dividend, divisor);
+
+ if (dest.Type.IsInteger())
+ {
+ context.Assembler.Sdiv(dest, dividend, divisor);
+ }
+ else
+ {
+ context.Assembler.FdivScalar(dest, dividend, divisor);
+ }
+ }
+
+ private static void GenerateDivideUI(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand dividend = operation.GetSource(0);
+ Operand divisor = operation.GetSource(1);
+
+ ValidateBinOp(dest, dividend, divisor);
+
+ context.Assembler.Udiv(dest, dividend, divisor);
+ }
+
+ private static void GenerateLoad(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.Destination;
+ Operand address = operation.GetSource(0);
+
+ context.Assembler.Ldr(value, address);
+ }
+
+ private static void GenerateLoad16(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.Destination;
+ Operand address = operation.GetSource(0);
+
+ Debug.Assert(value.Type.IsInteger());
+
+ context.Assembler.LdrhRiUn(value, address, 0);
+ }
+
+ private static void GenerateLoad8(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.Destination;
+ Operand address = operation.GetSource(0);
+
+ Debug.Assert(value.Type.IsInteger());
+
+ context.Assembler.LdrbRiUn(value, address, 0);
+ }
+
+ private static void GenerateMemoryBarrier(CodeGenContext context, Operation operation)
+ {
+ context.Assembler.Dmb(0xf);
+ }
+
+ private static void GenerateMultiply(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ EnsureSameType(dest, src1, src2);
+
+ if (dest.Type.IsInteger())
+ {
+ context.Assembler.Mul(dest, src1, src2);
+ }
+ else
+ {
+ context.Assembler.FmulScalar(dest, src1, src2);
+ }
+ }
+
+ private static void GenerateMultiply64HighSI(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ EnsureSameType(dest, src1, src2);
+
+ Debug.Assert(dest.Type == OperandType.I64);
+
+ context.Assembler.Smulh(dest, src1, src2);
+ }
+
+ private static void GenerateMultiply64HighUI(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ EnsureSameType(dest, src1, src2);
+
+ Debug.Assert(dest.Type == OperandType.I64);
+
+ context.Assembler.Umulh(dest, src1, src2);
+ }
+
+ private static void GenerateNegate(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ ValidateUnOp(dest, source);
+
+ if (dest.Type.IsInteger())
+ {
+ context.Assembler.Neg(dest, source);
+ }
+ else
+ {
+ context.Assembler.FnegScalar(dest, source);
+ }
+ }
+
+ private static void GenerateLoad(CodeGenContext context, Operand value, Operand address, int offset)
+ {
+ if (CodeGenCommon.ConstFitsOnUImm12(offset, value.Type))
+ {
+ context.Assembler.LdrRiUn(value, address, offset);
+ }
+ else if (CodeGenCommon.ConstFitsOnSImm9(offset))
+ {
+ context.Assembler.Ldur(value, address, offset);
+ }
+ else
+ {
+ Operand tempAddress = Register(CodeGenCommon.ReservedRegister);
+ GenerateConstantCopy(context, tempAddress, (ulong)offset);
+ context.Assembler.Add(tempAddress, address, tempAddress, ArmExtensionType.Uxtx); // Address might be SP and must be the first input.
+ context.Assembler.LdrRiUn(value, tempAddress, 0);
+ }
+ }
+
+ private static void GenerateReturn(CodeGenContext context, Operation operation)
+ {
+ WriteEpilogue(context);
+
+ context.Assembler.Ret(Register(LrRegister));
+ }
+
+ private static void GenerateRotateRight(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateShift(dest, src1, src2);
+
+ context.Assembler.Ror(dest, src1, src2);
+ }
+
+ private static void GenerateShiftLeft(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateShift(dest, src1, src2);
+
+ context.Assembler.Lsl(dest, src1, src2);
+ }
+
+ private static void GenerateShiftRightSI(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateShift(dest, src1, src2);
+
+ context.Assembler.Asr(dest, src1, src2);
+ }
+
+ private static void GenerateShiftRightUI(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateShift(dest, src1, src2);
+
+ context.Assembler.Lsr(dest, src1, src2);
+ }
+
+ private static void GenerateSignExtend16(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ context.Assembler.Sxth(dest, source);
+ }
+
+ private static void GenerateSignExtend32(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ context.Assembler.Sxtw(dest, source);
+ }
+
+ private static void GenerateSignExtend8(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ context.Assembler.Sxtb(dest, source);
+ }
+
+ private static void GenerateFill(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand offset = operation.GetSource(0);
+
+ Debug.Assert(offset.Kind == OperandKind.Constant);
+
+ int offs = offset.AsInt32() + context.CallArgsRegionSize + context.FpLrSaveRegionSize;
+
+ GenerateLoad(context, dest, Register(SpRegister), offs);
+ }
+
+ private static void GenerateStore(CodeGenContext context, Operand value, Operand address, int offset)
+ {
+ if (CodeGenCommon.ConstFitsOnUImm12(offset, value.Type))
+ {
+ context.Assembler.StrRiUn(value, address, offset);
+ }
+ else if (CodeGenCommon.ConstFitsOnSImm9(offset))
+ {
+ context.Assembler.Stur(value, address, offset);
+ }
+ else
+ {
+ Operand tempAddress = Register(CodeGenCommon.ReservedRegister);
+ GenerateConstantCopy(context, tempAddress, (ulong)offset);
+ context.Assembler.Add(tempAddress, address, tempAddress, ArmExtensionType.Uxtx); // Address might be SP and must be the first input.
+ context.Assembler.StrRiUn(value, tempAddress, 0);
+ }
+ }
+
+ private static void GenerateSpill(CodeGenContext context, Operation operation)
+ {
+ GenerateSpill(context, operation, context.CallArgsRegionSize + context.FpLrSaveRegionSize);
+ }
+
+ private static void GenerateSpillArg(CodeGenContext context, Operation operation)
+ {
+ GenerateSpill(context, operation, 0);
+ }
+
+ private static void GenerateStackAlloc(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand offset = operation.GetSource(0);
+
+ Debug.Assert(offset.Kind == OperandKind.Constant);
+
+ int offs = offset.AsInt32() + context.CallArgsRegionSize + context.FpLrSaveRegionSize;
+
+ context.Assembler.Add(dest, Register(SpRegister), Const(dest.Type, offs));
+ }
+
+ private static void GenerateStore(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.GetSource(1);
+ Operand address = operation.GetSource(0);
+
+ context.Assembler.Str(value, address);
+ }
+
+ private static void GenerateStore16(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.GetSource(1);
+ Operand address = operation.GetSource(0);
+
+ Debug.Assert(value.Type.IsInteger());
+
+ context.Assembler.StrhRiUn(value, address, 0);
+ }
+
+ private static void GenerateStore8(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.GetSource(1);
+ Operand address = operation.GetSource(0);
+
+ Debug.Assert(value.Type.IsInteger());
+
+ context.Assembler.StrbRiUn(value, address, 0);
+ }
+
+ private static void GenerateSpill(CodeGenContext context, Operation operation, int baseOffset)
+ {
+ Operand offset = operation.GetSource(0);
+ Operand source = operation.GetSource(1);
+
+ Debug.Assert(offset.Kind == OperandKind.Constant);
+
+ int offs = offset.AsInt32() + baseOffset;
+
+ GenerateStore(context, source, Register(SpRegister), offs);
+ }
+
+ private static void GenerateSubtract(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ // ValidateBinOp(dest, src1, src2);
+
+ if (dest.Type.IsInteger())
+ {
+ context.Assembler.Sub(dest, src1, src2);
+ }
+ else
+ {
+ context.Assembler.FsubScalar(dest, src1, src2);
+ }
+ }
+
+ private static void GenerateTailcall(CodeGenContext context, Operation operation)
+ {
+ WriteEpilogue(context);
+
+ context.Assembler.Br(operation.GetSource(0));
+ }
+
+ private static void GenerateVectorCreateScalar(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ if (dest != default)
+ {
+ Debug.Assert(!dest.Type.IsInteger() && source.Type.IsInteger());
+
+ OperandType destType = source.Type == OperandType.I64 ? OperandType.FP64 : OperandType.FP32;
+
+ context.Assembler.Fmov(Register(dest, destType), source, topHalf: false);
+ }
+ }
+
+ private static void GenerateVectorExtract(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination; // Value
+ Operand src1 = operation.GetSource(0); // Vector
+ Operand src2 = operation.GetSource(1); // Index
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src2.Kind == OperandKind.Constant);
+
+ byte index = src2.AsByte();
+
+ Debug.Assert(index < OperandType.V128.GetSizeInBytes() / dest.Type.GetSizeInBytes());
+
+ if (dest.Type.IsInteger())
+ {
+ context.Assembler.Umov(dest, src1, index, dest.Type == OperandType.I64 ? 3 : 2);
+ }
+ else
+ {
+ context.Assembler.DupScalar(dest, src1, index, dest.Type == OperandType.FP64 ? 3 : 2);
+ }
+ }
+
+ private static void GenerateVectorExtract16(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination; // Value
+ Operand src1 = operation.GetSource(0); // Vector
+ Operand src2 = operation.GetSource(1); // Index
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src2.Kind == OperandKind.Constant);
+
+ byte index = src2.AsByte();
+
+ Debug.Assert(index < 8);
+
+ context.Assembler.Umov(dest, src1, index, 1);
+ }
+
+ private static void GenerateVectorExtract8(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination; // Value
+ Operand src1 = operation.GetSource(0); // Vector
+ Operand src2 = operation.GetSource(1); // Index
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src2.Kind == OperandKind.Constant);
+
+ byte index = src2.AsByte();
+
+ Debug.Assert(index < 16);
+
+ context.Assembler.Umov(dest, src1, index, 0);
+ }
+
+ private static void GenerateVectorInsert(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0); // Vector
+ Operand src2 = operation.GetSource(1); // Value
+ Operand src3 = operation.GetSource(2); // Index
+
+ EnsureSameReg(dest, src1);
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src3.Kind == OperandKind.Constant);
+
+ byte index = src3.AsByte();
+
+ if (src2.Type.IsInteger())
+ {
+ context.Assembler.Ins(dest, src2, index, src2.Type == OperandType.I64 ? 3 : 2);
+ }
+ else
+ {
+ context.Assembler.Ins(dest, src2, 0, index, src2.Type == OperandType.FP64 ? 3 : 2);
+ }
+ }
+
+ private static void GenerateVectorInsert16(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0); // Vector
+ Operand src2 = operation.GetSource(1); // Value
+ Operand src3 = operation.GetSource(2); // Index
+
+ EnsureSameReg(dest, src1);
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src3.Kind == OperandKind.Constant);
+
+ byte index = src3.AsByte();
+
+ context.Assembler.Ins(dest, src2, index, 1);
+ }
+
+ private static void GenerateVectorInsert8(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0); // Vector
+ Operand src2 = operation.GetSource(1); // Value
+ Operand src3 = operation.GetSource(2); // Index
+
+ EnsureSameReg(dest, src1);
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src3.Kind == OperandKind.Constant);
+
+ byte index = src3.AsByte();
+
+ context.Assembler.Ins(dest, src2, index, 0);
+ }
+
+ private static void GenerateVectorOne(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+
+ Debug.Assert(!dest.Type.IsInteger());
+
+ context.Assembler.CmeqVector(dest, dest, dest, 2);
+ }
+
+ private static void GenerateVectorZero(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+
+ Debug.Assert(!dest.Type.IsInteger());
+
+ context.Assembler.EorVector(dest, dest, dest);
+ }
+
+ private static void GenerateVectorZeroUpper64(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type == OperandType.V128 && source.Type == OperandType.V128);
+
+ context.Assembler.Fmov(Register(dest, OperandType.FP64), Register(source, OperandType.FP64));
+ }
+
+ private static void GenerateVectorZeroUpper96(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type == OperandType.V128 && source.Type == OperandType.V128);
+
+ context.Assembler.Fmov(Register(dest, OperandType.FP32), Register(source, OperandType.FP32));
+ }
+
+ private static void GenerateZeroExtend16(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ context.Assembler.Uxth(dest, source);
+ }
+
+ private static void GenerateZeroExtend32(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ // We can eliminate the move if source is already 32-bit and the registers are the same.
+ if (dest.Value == source.Value && source.Type == OperandType.I32)
+ {
+ return;
+ }
+
+ context.Assembler.Mov(Register(dest.GetRegister().Index, OperandType.I32), source);
+ }
+
+ private static void GenerateZeroExtend8(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ context.Assembler.Uxtb(dest, source);
+ }
+
+ private static UnwindInfo WritePrologue(CodeGenContext context)
+ {
+ List<UnwindPushEntry> pushEntries = new List<UnwindPushEntry>();
+
+ Operand rsp = Register(SpRegister);
+
+ int intMask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.IntUsedRegisters;
+ int vecMask = CallingConvention.GetFpCalleeSavedRegisters() & context.AllocResult.VecUsedRegisters;
+
+ int intCalleeSavedRegsCount = BitOperations.PopCount((uint)intMask);
+ int vecCalleeSavedRegsCount = BitOperations.PopCount((uint)vecMask);
+
+ int calleeSaveRegionSize = Align16(intCalleeSavedRegsCount * 8 + vecCalleeSavedRegsCount * 8);
+
+ int offset = 0;
+
+ WritePrologueCalleeSavesPreIndexed(context, pushEntries, ref intMask, ref offset, calleeSaveRegionSize, OperandType.I64);
+ WritePrologueCalleeSavesPreIndexed(context, pushEntries, ref vecMask, ref offset, calleeSaveRegionSize, OperandType.FP64);
+
+ int localSize = Align16(context.AllocResult.SpillRegionSize + context.FpLrSaveRegionSize);
+ int outArgsSize = context.CallArgsRegionSize;
+
+ if (CodeGenCommon.ConstFitsOnSImm7(localSize, DWordScale))
+ {
+ if (context.HasCall)
+ {
+ context.Assembler.StpRiPre(Register(FpRegister), Register(LrRegister), rsp, -localSize);
+ context.Assembler.MovSp(Register(FpRegister), rsp);
+ }
+
+ if (outArgsSize != 0)
+ {
+ context.Assembler.Sub(rsp, rsp, Const(OperandType.I64, outArgsSize));
+ }
+ }
+ else
+ {
+ int frameSize = localSize + outArgsSize;
+ if (frameSize != 0)
+ {
+ if (CodeGenCommon.ConstFitsOnUImm12(frameSize))
+ {
+ context.Assembler.Sub(rsp, rsp, Const(OperandType.I64, frameSize));
+ }
+ else
+ {
+ Operand tempSize = Register(CodeGenCommon.ReservedRegister);
+ GenerateConstantCopy(context, tempSize, (ulong)frameSize);
+ context.Assembler.Sub(rsp, rsp, tempSize, ArmExtensionType.Uxtx);
+ }
+ }
+
+ context.Assembler.StpRiUn(Register(FpRegister), Register(LrRegister), rsp, outArgsSize);
+
+ if (outArgsSize != 0)
+ {
+ context.Assembler.Add(Register(FpRegister), Register(SpRegister), Const(OperandType.I64, outArgsSize));
+ }
+ else
+ {
+ context.Assembler.MovSp(Register(FpRegister), Register(SpRegister));
+ }
+ }
+
+ return new UnwindInfo(pushEntries.ToArray(), context.StreamOffset);
+ }
+
+ private static void WritePrologueCalleeSavesPreIndexed(
+ CodeGenContext context,
+ List<UnwindPushEntry> pushEntries,
+ ref int mask,
+ ref int offset,
+ int calleeSaveRegionSize,
+ OperandType type)
+ {
+ if ((BitOperations.PopCount((uint)mask) & 1) != 0)
+ {
+ int reg = BitOperations.TrailingZeroCount(mask);
+
+ pushEntries.Add(new UnwindPushEntry(UnwindPseudoOp.PushReg, context.StreamOffset, regIndex: reg));
+
+ mask &= ~(1 << reg);
+
+ if (offset != 0)
+ {
+ context.Assembler.StrRiUn(Register(reg, type), Register(SpRegister), offset);
+ }
+ else
+ {
+ context.Assembler.StrRiPre(Register(reg, type), Register(SpRegister), -calleeSaveRegionSize);
+ }
+
+ offset += type.GetSizeInBytes();
+ }
+
+ while (mask != 0)
+ {
+ int reg = BitOperations.TrailingZeroCount(mask);
+
+ pushEntries.Add(new UnwindPushEntry(UnwindPseudoOp.PushReg, context.StreamOffset, regIndex: reg));
+
+ mask &= ~(1 << reg);
+
+ int reg2 = BitOperations.TrailingZeroCount(mask);
+
+ pushEntries.Add(new UnwindPushEntry(UnwindPseudoOp.PushReg, context.StreamOffset, regIndex: reg2));
+
+ mask &= ~(1 << reg2);
+
+ if (offset != 0)
+ {
+ context.Assembler.StpRiUn(Register(reg, type), Register(reg2, type), Register(SpRegister), offset);
+ }
+ else
+ {
+ context.Assembler.StpRiPre(Register(reg, type), Register(reg2, type), Register(SpRegister), -calleeSaveRegionSize);
+ }
+
+ offset += type.GetSizeInBytes() * 2;
+ }
+ }
+
+ private static void WriteEpilogue(CodeGenContext context)
+ {
+ Operand rsp = Register(SpRegister);
+
+ int localSize = Align16(context.AllocResult.SpillRegionSize + context.FpLrSaveRegionSize);
+ int outArgsSize = context.CallArgsRegionSize;
+
+ if (CodeGenCommon.ConstFitsOnSImm7(localSize, DWordScale))
+ {
+ if (outArgsSize != 0)
+ {
+ context.Assembler.Add(rsp, rsp, Const(OperandType.I64, outArgsSize));
+ }
+
+ if (context.HasCall)
+ {
+ context.Assembler.LdpRiPost(Register(FpRegister), Register(LrRegister), rsp, localSize);
+ }
+ }
+ else
+ {
+ if (context.HasCall)
+ {
+ context.Assembler.LdpRiUn(Register(FpRegister), Register(LrRegister), rsp, outArgsSize);
+ }
+
+ int frameSize = localSize + outArgsSize;
+ if (frameSize != 0)
+ {
+ if (CodeGenCommon.ConstFitsOnUImm12(frameSize))
+ {
+ context.Assembler.Add(rsp, rsp, Const(OperandType.I64, frameSize));
+ }
+ else
+ {
+ Operand tempSize = Register(CodeGenCommon.ReservedRegister);
+ GenerateConstantCopy(context, tempSize, (ulong)frameSize);
+ context.Assembler.Add(rsp, rsp, tempSize, ArmExtensionType.Uxtx);
+ }
+ }
+ }
+
+ int intMask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.IntUsedRegisters;
+ int vecMask = CallingConvention.GetFpCalleeSavedRegisters() & context.AllocResult.VecUsedRegisters;
+
+ int intCalleeSavedRegsCount = BitOperations.PopCount((uint)intMask);
+ int vecCalleeSavedRegsCount = BitOperations.PopCount((uint)vecMask);
+
+ int offset = intCalleeSavedRegsCount * 8 + vecCalleeSavedRegsCount * 8;
+ int calleeSaveRegionSize = Align16(offset);
+
+ WriteEpilogueCalleeSavesPostIndexed(context, ref vecMask, ref offset, calleeSaveRegionSize, OperandType.FP64);
+ WriteEpilogueCalleeSavesPostIndexed(context, ref intMask, ref offset, calleeSaveRegionSize, OperandType.I64);
+ }
+
+ private static void WriteEpilogueCalleeSavesPostIndexed(
+ CodeGenContext context,
+ ref int mask,
+ ref int offset,
+ int calleeSaveRegionSize,
+ OperandType type)
+ {
+ while (mask != 0)
+ {
+ int reg = BitUtils.HighestBitSet(mask);
+
+ mask &= ~(1 << reg);
+
+ if (mask != 0)
+ {
+ int reg2 = BitUtils.HighestBitSet(mask);
+
+ mask &= ~(1 << reg2);
+
+ offset -= type.GetSizeInBytes() * 2;
+
+ if (offset != 0)
+ {
+ context.Assembler.LdpRiUn(Register(reg2, type), Register(reg, type), Register(SpRegister), offset);
+ }
+ else
+ {
+ context.Assembler.LdpRiPost(Register(reg2, type), Register(reg, type), Register(SpRegister), calleeSaveRegionSize);
+ }
+ }
+ else
+ {
+ offset -= type.GetSizeInBytes();
+
+ if (offset != 0)
+ {
+ context.Assembler.LdrRiUn(Register(reg, type), Register(SpRegister), offset);
+ }
+ else
+ {
+ context.Assembler.LdrRiPost(Register(reg, type), Register(SpRegister), calleeSaveRegionSize);
+ }
+ }
+ }
+ }
+
+ private static void GenerateConstantCopy(CodeGenContext context, Operand dest, ulong value)
+ {
+ if (value == 0)
+ {
+ context.Assembler.Mov(dest, Register(ZrRegister, dest.Type));
+ }
+ else if (CodeGenCommon.TryEncodeBitMask(dest.Type, value, out _, out _, out _))
+ {
+ context.Assembler.Orr(dest, Register(ZrRegister, dest.Type), Const(dest.Type, (long)value));
+ }
+ else
+ {
+ int hw = 0;
+ bool first = true;
+
+ while (value != 0)
+ {
+ int valueLow = (ushort)value;
+ if (valueLow != 0)
+ {
+ if (first)
+ {
+ context.Assembler.Movz(dest, valueLow, hw);
+ first = false;
+ }
+ else
+ {
+ context.Assembler.Movk(dest, valueLow, hw);
+ }
+ }
+
+ hw++;
+ value >>= 16;
+ }
+ }
+ }
+
+ private static void GenerateAtomicCas(
+ CodeGenContext context,
+ Operand address,
+ Operand expected,
+ Operand desired,
+ Operand actual,
+ Operand result,
+ AccessSize accessSize)
+ {
+ int startOffset = context.StreamOffset;
+
+ switch (accessSize)
+ {
+ case AccessSize.Byte:
+ context.Assembler.Ldaxrb(actual, address);
+ break;
+ case AccessSize.Hword:
+ context.Assembler.Ldaxrh(actual, address);
+ break;
+ default:
+ context.Assembler.Ldaxr(actual, address);
+ break;
+ }
+
+ context.Assembler.Cmp(actual, expected);
+
+ context.JumpToNear(ArmCondition.Ne);
+
+ switch (accessSize)
+ {
+ case AccessSize.Byte:
+ context.Assembler.Stlxrb(desired, address, result);
+ break;
+ case AccessSize.Hword:
+ context.Assembler.Stlxrh(desired, address, result);
+ break;
+ default:
+ context.Assembler.Stlxr(desired, address, result);
+ break;
+ }
+
+ context.Assembler.Cbnz(result, startOffset - context.StreamOffset); // Retry if store failed.
+
+ context.JumpHere();
+
+ context.Assembler.Clrex();
+ }
+
+ private static void GenerateAtomicDcas(
+ CodeGenContext context,
+ Operand address,
+ Operand expectedLow,
+ Operand expectedHigh,
+ Operand desiredLow,
+ Operand desiredHigh,
+ Operand actualLow,
+ Operand actualHigh,
+ Operand temp0,
+ Operand temp1)
+ {
+ int startOffset = context.StreamOffset;
+
+ context.Assembler.Ldaxp(actualLow, actualHigh, address);
+ context.Assembler.Eor(temp0, actualHigh, expectedHigh);
+ context.Assembler.Eor(temp1, actualLow, expectedLow);
+ context.Assembler.Orr(temp0, temp1, temp0);
+
+ context.JumpToNearIfNotZero(temp0);
+
+ Operand result = Register(temp0, OperandType.I32);
+
+ context.Assembler.Stlxp(desiredLow, desiredHigh, address, result);
+ context.Assembler.Cbnz(result, startOffset - context.StreamOffset); // Retry if store failed.
+
+ context.JumpHere();
+
+ context.Assembler.Clrex();
+ }
+
+ private static bool TryPairMemoryOp(CodeGenContext context, Operation currentOp, Operation nextOp)
+ {
+ if (!TryGetMemOpBaseAndOffset(currentOp, out Operand op1Base, out int op1Offset))
+ {
+ return false;
+ }
+
+ if (!TryGetMemOpBaseAndOffset(nextOp, out Operand op2Base, out int op2Offset))
+ {
+ return false;
+ }
+
+ if (op1Base != op2Base)
+ {
+ return false;
+ }
+
+ OperandType valueType = GetMemOpValueType(currentOp);
+
+ if (valueType != GetMemOpValueType(nextOp) || op1Offset + valueType.GetSizeInBytes() != op2Offset)
+ {
+ return false;
+ }
+
+ if (!CodeGenCommon.ConstFitsOnSImm7(op1Offset, valueType.GetSizeInBytesLog2()))
+ {
+ return false;
+ }
+
+ if (currentOp.Instruction == Instruction.Load)
+ {
+ context.Assembler.LdpRiUn(currentOp.Destination, nextOp.Destination, op1Base, op1Offset);
+ }
+ else if (currentOp.Instruction == Instruction.Store)
+ {
+ context.Assembler.StpRiUn(currentOp.GetSource(1), nextOp.GetSource(1), op1Base, op1Offset);
+ }
+ else
+ {
+ return false;
+ }
+
+ return true;
+ }
+
+ private static bool IsLoadOrStore(Operation operation)
+ {
+ return operation.Instruction == Instruction.Load || operation.Instruction == Instruction.Store;
+ }
+
+ private static OperandType GetMemOpValueType(Operation operation)
+ {
+ if (operation.Destination != default)
+ {
+ return operation.Destination.Type;
+ }
+
+ return operation.GetSource(1).Type;
+ }
+
+ private static bool TryGetMemOpBaseAndOffset(Operation operation, out Operand baseAddress, out int offset)
+ {
+ baseAddress = default;
+ offset = 0;
+ Operand address = operation.GetSource(0);
+
+ if (address.Kind != OperandKind.Memory)
+ {
+ return false;
+ }
+
+ MemoryOperand memOp = address.GetMemory();
+ Operand baseOp = memOp.BaseAddress;
+
+ if (baseOp == default)
+ {
+ baseOp = memOp.Index;
+
+ if (baseOp == default || memOp.Scale != Multiplier.x1)
+ {
+ return false;
+ }
+ }
+ if (memOp.Index != default)
+ {
+ return false;
+ }
+
+ baseAddress = memOp.BaseAddress;
+ offset = memOp.Displacement;
+
+ return true;
+ }
+
+ private static Operand Register(Operand operand, OperandType type = OperandType.I64)
+ {
+ return Register(operand.GetRegister().Index, type);
+ }
+
+ private static Operand Register(int register, OperandType type = OperandType.I64)
+ {
+ return Factory.Register(register, RegisterType.Integer, type);
+ }
+
+ private static int Align16(int value)
+ {
+ return (value + 0xf) & ~0xf;
+ }
+
+ [Conditional("DEBUG")]
+ private static void ValidateUnOp(Operand dest, Operand source)
+ {
+ // Destination and source aren't forced to be equals
+ // EnsureSameReg (dest, source);
+ EnsureSameType(dest, source);
+ }
+
+ [Conditional("DEBUG")]
+ private static void ValidateBinOp(Operand dest, Operand src1, Operand src2)
+ {
+ // Destination and source aren't forced to be equals
+ // EnsureSameReg (dest, src1);
+ EnsureSameType(dest, src1, src2);
+ }
+
+ [Conditional("DEBUG")]
+ private static void ValidateShift(Operand dest, Operand src1, Operand src2)
+ {
+ // Destination and source aren't forced to be equals
+ // EnsureSameReg (dest, src1);
+ EnsureSameType(dest, src1);
+
+ Debug.Assert(dest.Type.IsInteger() && src2.Type == OperandType.I32);
+ }
+
+ private static void EnsureSameReg(Operand op1, Operand op2)
+ {
+ Debug.Assert(op1.Kind == OperandKind.Register || op1.Kind == OperandKind.Memory);
+ Debug.Assert(op1.Kind == op2.Kind);
+ Debug.Assert(op1.Value == op2.Value);
+ }
+
+ private static void EnsureSameType(Operand op1, Operand op2)
+ {
+ Debug.Assert(op1.Type == op2.Type);
+ }
+
+ private static void EnsureSameType(Operand op1, Operand op2, Operand op3)
+ {
+ Debug.Assert(op1.Type == op2.Type);
+ Debug.Assert(op1.Type == op3.Type);
+ }
+
+ private static void EnsureSameType(Operand op1, Operand op2, Operand op3, Operand op4)
+ {
+ Debug.Assert(op1.Type == op2.Type);
+ Debug.Assert(op1.Type == op3.Type);
+ Debug.Assert(op1.Type == op4.Type);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/Arm64/CodeGeneratorIntrinsic.cs b/src/ARMeilleure/CodeGen/Arm64/CodeGeneratorIntrinsic.cs
new file mode 100644
index 00000000..aaa00bb6
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Arm64/CodeGeneratorIntrinsic.cs
@@ -0,0 +1,662 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+using System.Diagnostics;
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+ static class CodeGeneratorIntrinsic
+ {
+ public static void GenerateOperation(CodeGenContext context, Operation operation)
+ {
+ Intrinsic intrin = operation.Intrinsic;
+
+ IntrinsicInfo info = IntrinsicTable.GetInfo(intrin & ~(Intrinsic.Arm64VTypeMask | Intrinsic.Arm64VSizeMask));
+
+ switch (info.Type)
+ {
+ case IntrinsicType.ScalarUnary:
+ GenerateVectorUnary(
+ context,
+ 0,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0));
+ break;
+ case IntrinsicType.ScalarUnaryByElem:
+ Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+
+ GenerateVectorUnaryByElem(
+ context,
+ 0,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ (uint)operation.GetSource(1).AsInt32(),
+ operation.Destination,
+ operation.GetSource(0));
+ break;
+ case IntrinsicType.ScalarBinary:
+ GenerateVectorBinary(
+ context,
+ 0,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0),
+ operation.GetSource(1));
+ break;
+ case IntrinsicType.ScalarBinaryFPByElem:
+ Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryFPByElem(
+ context,
+ 0,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ (uint)operation.GetSource(2).AsInt32(),
+ operation.Destination,
+ operation.GetSource(0),
+ operation.GetSource(1));
+ break;
+ case IntrinsicType.ScalarBinaryRd:
+ GenerateVectorUnary(
+ context,
+ 0,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(1));
+ break;
+ case IntrinsicType.ScalarBinaryShl:
+ Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryShlImm(
+ context,
+ 0,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0),
+ (uint)operation.GetSource(1).AsInt32());
+ break;
+ case IntrinsicType.ScalarBinaryShr:
+ Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryShrImm(
+ context,
+ 0,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0),
+ (uint)operation.GetSource(1).AsInt32());
+ break;
+ case IntrinsicType.ScalarFPCompare:
+ GenerateScalarFPCompare(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0),
+ operation.GetSource(1));
+ break;
+ case IntrinsicType.ScalarFPConvFixed:
+ Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryShrImm(
+ context,
+ 0,
+ ((uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift) + 2u,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0),
+ (uint)operation.GetSource(1).AsInt32());
+ break;
+ case IntrinsicType.ScalarFPConvFixedGpr:
+ Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+
+ GenerateScalarFPConvGpr(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0),
+ (uint)operation.GetSource(1).AsInt32());
+ break;
+ case IntrinsicType.ScalarFPConvGpr:
+ GenerateScalarFPConvGpr(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0));
+ break;
+ case IntrinsicType.ScalarTernary:
+ GenerateScalarTernary(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(1),
+ operation.GetSource(2),
+ operation.GetSource(0));
+ break;
+ case IntrinsicType.ScalarTernaryFPRdByElem:
+ Debug.Assert(operation.GetSource(3).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryFPByElem(
+ context,
+ 0,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ (uint)operation.GetSource(3).AsInt32(),
+ operation.Destination,
+ operation.GetSource(1),
+ operation.GetSource(2));
+ break;
+ case IntrinsicType.ScalarTernaryShlRd:
+ Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryShlImm(
+ context,
+ 0,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(1),
+ (uint)operation.GetSource(2).AsInt32());
+ break;
+ case IntrinsicType.ScalarTernaryShrRd:
+ Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryShrImm(
+ context,
+ 0,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(1),
+ (uint)operation.GetSource(2).AsInt32());
+ break;
+
+ case IntrinsicType.VectorUnary:
+ GenerateVectorUnary(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0));
+ break;
+ case IntrinsicType.VectorUnaryByElem:
+ Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+
+ GenerateVectorUnaryByElem(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ (uint)operation.GetSource(1).AsInt32(),
+ operation.Destination,
+ operation.GetSource(0));
+ break;
+ case IntrinsicType.VectorBinary:
+ GenerateVectorBinary(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0),
+ operation.GetSource(1));
+ break;
+ case IntrinsicType.VectorBinaryBitwise:
+ GenerateVectorBinary(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0),
+ operation.GetSource(1));
+ break;
+ case IntrinsicType.VectorBinaryByElem:
+ Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryByElem(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ (uint)operation.GetSource(2).AsInt32(),
+ operation.Destination,
+ operation.GetSource(0),
+ operation.GetSource(1));
+ break;
+ case IntrinsicType.VectorBinaryFPByElem:
+ Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryFPByElem(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ (uint)operation.GetSource(2).AsInt32(),
+ operation.Destination,
+ operation.GetSource(0),
+ operation.GetSource(1));
+ break;
+ case IntrinsicType.VectorBinaryRd:
+ GenerateVectorUnary(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(1));
+ break;
+ case IntrinsicType.VectorBinaryShl:
+ Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryShlImm(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0),
+ (uint)operation.GetSource(1).AsInt32());
+ break;
+ case IntrinsicType.VectorBinaryShr:
+ Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryShrImm(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0),
+ (uint)operation.GetSource(1).AsInt32());
+ break;
+ case IntrinsicType.VectorFPConvFixed:
+ Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryShrImm(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ ((uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift) + 2u,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0),
+ (uint)operation.GetSource(1).AsInt32());
+ break;
+ case IntrinsicType.VectorInsertByElem:
+ Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+ Debug.Assert(operation.GetSource(3).Kind == OperandKind.Constant);
+
+ GenerateVectorInsertByElem(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ (uint)operation.GetSource(3).AsInt32(),
+ (uint)operation.GetSource(1).AsInt32(),
+ operation.Destination,
+ operation.GetSource(2));
+ break;
+ case IntrinsicType.VectorLookupTable:
+ Debug.Assert((uint)(operation.SourcesCount - 2) <= 3);
+
+ for (int i = 1; i < operation.SourcesCount - 1; i++)
+ {
+ Register currReg = operation.GetSource(i).GetRegister();
+ Register prevReg = operation.GetSource(i - 1).GetRegister();
+
+ Debug.Assert(prevReg.Index + 1 == currReg.Index && currReg.Type == RegisterType.Vector);
+ }
+
+ GenerateVectorBinary(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ info.Inst | ((uint)(operation.SourcesCount - 2) << 13),
+ operation.Destination,
+ operation.GetSource(0),
+ operation.GetSource(operation.SourcesCount - 1));
+ break;
+ case IntrinsicType.VectorTernaryFPRdByElem:
+ Debug.Assert(operation.GetSource(3).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryFPByElem(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ (uint)operation.GetSource(3).AsInt32(),
+ operation.Destination,
+ operation.GetSource(1),
+ operation.GetSource(2));
+ break;
+ case IntrinsicType.VectorTernaryRd:
+ GenerateVectorBinary(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(1),
+ operation.GetSource(2));
+ break;
+ case IntrinsicType.VectorTernaryRdBitwise:
+ GenerateVectorBinary(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(1),
+ operation.GetSource(2));
+ break;
+ case IntrinsicType.VectorTernaryRdByElem:
+ Debug.Assert(operation.GetSource(3).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryByElem(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ (uint)operation.GetSource(3).AsInt32(),
+ operation.Destination,
+ operation.GetSource(1),
+ operation.GetSource(2));
+ break;
+ case IntrinsicType.VectorTernaryShlRd:
+ Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryShlImm(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(1),
+ (uint)operation.GetSource(2).AsInt32());
+ break;
+ case IntrinsicType.VectorTernaryShrRd:
+ Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryShrImm(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(1),
+ (uint)operation.GetSource(2).AsInt32());
+ break;
+
+ case IntrinsicType.GetRegister:
+ context.Assembler.WriteInstruction(info.Inst, operation.Destination);
+ break;
+ case IntrinsicType.SetRegister:
+ context.Assembler.WriteInstruction(info.Inst, operation.GetSource(0));
+ break;
+
+ default:
+ throw new NotImplementedException(info.Type.ToString());
+ }
+ }
+
+ private static void GenerateScalarFPCompare(
+ CodeGenContext context,
+ uint sz,
+ uint instruction,
+ Operand dest,
+ Operand rn,
+ Operand rm)
+ {
+ instruction |= (sz << 22);
+
+ if (rm.Kind == OperandKind.Constant && rm.Value == 0)
+ {
+ instruction |= 0b1000;
+ rm = rn;
+ }
+
+ context.Assembler.WriteInstructionRm16NoRet(instruction, rn, rm);
+ context.Assembler.Mrs(dest, 1, 3, 4, 2, 0);
+ }
+
+ private static void GenerateScalarFPConvGpr(
+ CodeGenContext context,
+ uint sz,
+ uint instruction,
+ Operand rd,
+ Operand rn)
+ {
+ instruction |= (sz << 22);
+
+ if (rd.Type.IsInteger())
+ {
+ context.Assembler.WriteInstructionAuto(instruction, rd, rn);
+ }
+ else
+ {
+ if (rn.Type == OperandType.I64)
+ {
+ instruction |= Assembler.SfFlag;
+ }
+
+ context.Assembler.WriteInstruction(instruction, rd, rn);
+ }
+ }
+
+ private static void GenerateScalarFPConvGpr(
+ CodeGenContext context,
+ uint sz,
+ uint instruction,
+ Operand rd,
+ Operand rn,
+ uint fBits)
+ {
+ Debug.Assert(fBits <= 64);
+
+ instruction |= (sz << 22);
+ instruction |= (64 - fBits) << 10;
+
+ if (rd.Type.IsInteger())
+ {
+ Debug.Assert(rd.Type != OperandType.I32 || fBits <= 32);
+
+ context.Assembler.WriteInstructionAuto(instruction, rd, rn);
+ }
+ else
+ {
+ if (rn.Type == OperandType.I64)
+ {
+ instruction |= Assembler.SfFlag;
+ }
+ else
+ {
+ Debug.Assert(fBits <= 32);
+ }
+
+ context.Assembler.WriteInstruction(instruction, rd, rn);
+ }
+
+ }
+
+ private static void GenerateScalarTernary(
+ CodeGenContext context,
+ uint sz,
+ uint instruction,
+ Operand rd,
+ Operand rn,
+ Operand rm,
+ Operand ra)
+ {
+ instruction |= (sz << 22);
+
+ context.Assembler.WriteInstruction(instruction, rd, rn, rm, ra);
+ }
+
+ private static void GenerateVectorUnary(
+ CodeGenContext context,
+ uint q,
+ uint sz,
+ uint instruction,
+ Operand rd,
+ Operand rn)
+ {
+ instruction |= (q << 30) | (sz << 22);
+
+ context.Assembler.WriteInstruction(instruction, rd, rn);
+ }
+
+ private static void GenerateVectorUnaryByElem(
+ CodeGenContext context,
+ uint q,
+ uint sz,
+ uint instruction,
+ uint srcIndex,
+ Operand rd,
+ Operand rn)
+ {
+ uint imm5 = (srcIndex << ((int)sz + 1)) | (1u << (int)sz);
+
+ instruction |= (q << 30) | (imm5 << 16);
+
+ context.Assembler.WriteInstruction(instruction, rd, rn);
+ }
+
+ private static void GenerateVectorBinary(
+ CodeGenContext context,
+ uint q,
+ uint instruction,
+ Operand rd,
+ Operand rn,
+ Operand rm)
+ {
+ instruction |= (q << 30);
+
+ context.Assembler.WriteInstructionRm16(instruction, rd, rn, rm);
+ }
+
+ private static void GenerateVectorBinary(
+ CodeGenContext context,
+ uint q,
+ uint sz,
+ uint instruction,
+ Operand rd,
+ Operand rn,
+ Operand rm)
+ {
+ instruction |= (q << 30) | (sz << 22);
+
+ context.Assembler.WriteInstructionRm16(instruction, rd, rn, rm);
+ }
+
+ private static void GenerateVectorBinaryByElem(
+ CodeGenContext context,
+ uint q,
+ uint size,
+ uint instruction,
+ uint srcIndex,
+ Operand rd,
+ Operand rn,
+ Operand rm)
+ {
+ instruction |= (q << 30) | (size << 22);
+
+ if (size == 2)
+ {
+ instruction |= ((srcIndex & 1) << 21) | ((srcIndex & 2) << 10);
+ }
+ else
+ {
+ instruction |= ((srcIndex & 3) << 20) | ((srcIndex & 4) << 9);
+ }
+
+ context.Assembler.WriteInstructionRm16(instruction, rd, rn, rm);
+ }
+
+ private static void GenerateVectorBinaryFPByElem(
+ CodeGenContext context,
+ uint q,
+ uint sz,
+ uint instruction,
+ uint srcIndex,
+ Operand rd,
+ Operand rn,
+ Operand rm)
+ {
+ instruction |= (q << 30) | (sz << 22);
+
+ if (sz != 0)
+ {
+ instruction |= (srcIndex & 1) << 11;
+ }
+ else
+ {
+ instruction |= ((srcIndex & 1) << 21) | ((srcIndex & 2) << 10);
+ }
+
+ context.Assembler.WriteInstructionRm16(instruction, rd, rn, rm);
+ }
+
+ private static void GenerateVectorBinaryShlImm(
+ CodeGenContext context,
+ uint q,
+ uint sz,
+ uint instruction,
+ Operand rd,
+ Operand rn,
+ uint shift)
+ {
+ instruction |= (q << 30);
+
+ Debug.Assert(shift >= 0 && shift < (8u << (int)sz));
+
+ uint imm = (8u << (int)sz) | (shift & (0x3fu >> (int)(3 - sz)));
+
+ instruction |= (imm << 16);
+
+ context.Assembler.WriteInstruction(instruction, rd, rn);
+ }
+
+ private static void GenerateVectorBinaryShrImm(
+ CodeGenContext context,
+ uint q,
+ uint sz,
+ uint instruction,
+ Operand rd,
+ Operand rn,
+ uint shift)
+ {
+ instruction |= (q << 30);
+
+ Debug.Assert(shift > 0 && shift <= (8u << (int)sz));
+
+ uint imm = (8u << (int)sz) | ((8u << (int)sz) - shift);
+
+ instruction |= (imm << 16);
+
+ context.Assembler.WriteInstruction(instruction, rd, rn);
+ }
+
+ private static void GenerateVectorInsertByElem(
+ CodeGenContext context,
+ uint sz,
+ uint instruction,
+ uint srcIndex,
+ uint dstIndex,
+ Operand rd,
+ Operand rn)
+ {
+ uint imm4 = srcIndex << (int)sz;
+ uint imm5 = (dstIndex << ((int)sz + 1)) | (1u << (int)sz);
+
+ instruction |= imm4 << 11;
+ instruction |= imm5 << 16;
+
+ context.Assembler.WriteInstruction(instruction, rd, rn);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/Arm64/HardwareCapabilities.cs b/src/ARMeilleure/CodeGen/Arm64/HardwareCapabilities.cs
new file mode 100644
index 00000000..99ff299e
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Arm64/HardwareCapabilities.cs
@@ -0,0 +1,185 @@
+using System;
+using System.Linq;
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics.Arm;
+using System.Runtime.Versioning;
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+ static partial class HardwareCapabilities
+ {
+ static HardwareCapabilities()
+ {
+ if (!ArmBase.Arm64.IsSupported)
+ {
+ return;
+ }
+
+ if (OperatingSystem.IsLinux())
+ {
+ LinuxFeatureInfoHwCap = (LinuxFeatureFlagsHwCap)getauxval(AT_HWCAP);
+ LinuxFeatureInfoHwCap2 = (LinuxFeatureFlagsHwCap2)getauxval(AT_HWCAP2);
+ }
+
+ if (OperatingSystem.IsMacOS())
+ {
+ for (int i = 0; i < _sysctlNames.Length; i++)
+ {
+ if (CheckSysctlName(_sysctlNames[i]))
+ {
+ MacOsFeatureInfo |= (MacOsFeatureFlags)(1 << i);
+ }
+ }
+ }
+ }
+
+#region Linux
+
+ private const ulong AT_HWCAP = 16;
+ private const ulong AT_HWCAP2 = 26;
+
+ [LibraryImport("libc", SetLastError = true)]
+ private static partial ulong getauxval(ulong type);
+
+ [Flags]
+ public enum LinuxFeatureFlagsHwCap : ulong
+ {
+ Fp = 1 << 0,
+ Asimd = 1 << 1,
+ Evtstrm = 1 << 2,
+ Aes = 1 << 3,
+ Pmull = 1 << 4,
+ Sha1 = 1 << 5,
+ Sha2 = 1 << 6,
+ Crc32 = 1 << 7,
+ Atomics = 1 << 8,
+ FpHp = 1 << 9,
+ AsimdHp = 1 << 10,
+ CpuId = 1 << 11,
+ AsimdRdm = 1 << 12,
+ Jscvt = 1 << 13,
+ Fcma = 1 << 14,
+ Lrcpc = 1 << 15,
+ DcpOp = 1 << 16,
+ Sha3 = 1 << 17,
+ Sm3 = 1 << 18,
+ Sm4 = 1 << 19,
+ AsimdDp = 1 << 20,
+ Sha512 = 1 << 21,
+ Sve = 1 << 22,
+ AsimdFhm = 1 << 23,
+ Dit = 1 << 24,
+ Uscat = 1 << 25,
+ Ilrcpc = 1 << 26,
+ FlagM = 1 << 27,
+ Ssbs = 1 << 28,
+ Sb = 1 << 29,
+ Paca = 1 << 30,
+ Pacg = 1UL << 31
+ }
+
+ [Flags]
+ public enum LinuxFeatureFlagsHwCap2 : ulong
+ {
+ Dcpodp = 1 << 0,
+ Sve2 = 1 << 1,
+ SveAes = 1 << 2,
+ SvePmull = 1 << 3,
+ SveBitperm = 1 << 4,
+ SveSha3 = 1 << 5,
+ SveSm4 = 1 << 6,
+ FlagM2 = 1 << 7,
+ Frint = 1 << 8,
+ SveI8mm = 1 << 9,
+ SveF32mm = 1 << 10,
+ SveF64mm = 1 << 11,
+ SveBf16 = 1 << 12,
+ I8mm = 1 << 13,
+ Bf16 = 1 << 14,
+ Dgh = 1 << 15,
+ Rng = 1 << 16,
+ Bti = 1 << 17,
+ Mte = 1 << 18,
+ Ecv = 1 << 19,
+ Afp = 1 << 20,
+ Rpres = 1 << 21,
+ Mte3 = 1 << 22,
+ Sme = 1 << 23,
+ Sme_i16i64 = 1 << 24,
+ Sme_f64f64 = 1 << 25,
+ Sme_i8i32 = 1 << 26,
+ Sme_f16f32 = 1 << 27,
+ Sme_b16f32 = 1 << 28,
+ Sme_f32f32 = 1 << 29,
+ Sme_fa64 = 1 << 30,
+ Wfxt = 1UL << 31,
+ Ebf16 = 1UL << 32,
+ Sve_Ebf16 = 1UL << 33,
+ Cssc = 1UL << 34,
+ Rprfm = 1UL << 35,
+ Sve2p1 = 1UL << 36
+ }
+
+ public static LinuxFeatureFlagsHwCap LinuxFeatureInfoHwCap { get; } = 0;
+ public static LinuxFeatureFlagsHwCap2 LinuxFeatureInfoHwCap2 { get; } = 0;
+
+#endregion
+
+#region macOS
+
+ [LibraryImport("libSystem.dylib", SetLastError = true)]
+ private static unsafe partial int sysctlbyname([MarshalAs(UnmanagedType.LPStr)] string name, out int oldValue, ref ulong oldSize, IntPtr newValue, ulong newValueSize);
+
+ [SupportedOSPlatform("macos")]
+ private static bool CheckSysctlName(string name)
+ {
+ ulong size = sizeof(int);
+ if (sysctlbyname(name, out int val, ref size, IntPtr.Zero, 0) == 0 && size == sizeof(int))
+ {
+ return val != 0;
+ }
+ return false;
+ }
+
+ private static string[] _sysctlNames = new string[]
+ {
+ "hw.optional.floatingpoint",
+ "hw.optional.AdvSIMD",
+ "hw.optional.arm.FEAT_FP16",
+ "hw.optional.arm.FEAT_AES",
+ "hw.optional.arm.FEAT_PMULL",
+ "hw.optional.arm.FEAT_LSE",
+ "hw.optional.armv8_crc32",
+ "hw.optional.arm.FEAT_SHA1",
+ "hw.optional.arm.FEAT_SHA256"
+ };
+
+ [Flags]
+ public enum MacOsFeatureFlags
+ {
+ Fp = 1 << 0,
+ AdvSimd = 1 << 1,
+ Fp16 = 1 << 2,
+ Aes = 1 << 3,
+ Pmull = 1 << 4,
+ Lse = 1 << 5,
+ Crc32 = 1 << 6,
+ Sha1 = 1 << 7,
+ Sha256 = 1 << 8
+ }
+
+ public static MacOsFeatureFlags MacOsFeatureInfo { get; } = 0;
+
+#endregion
+
+ public static bool SupportsAdvSimd => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Asimd) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.AdvSimd);
+ public static bool SupportsAes => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Aes) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Aes);
+ public static bool SupportsPmull => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Pmull) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Pmull);
+ public static bool SupportsLse => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Atomics) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Lse);
+ public static bool SupportsCrc32 => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Crc32) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Crc32);
+ public static bool SupportsSha1 => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Sha1) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Sha1);
+ public static bool SupportsSha256 => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Sha2) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Sha256);
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/Arm64/IntrinsicInfo.cs b/src/ARMeilleure/CodeGen/Arm64/IntrinsicInfo.cs
new file mode 100644
index 00000000..8695db90
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Arm64/IntrinsicInfo.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.CodeGen.Arm64
+{
+ struct IntrinsicInfo
+ {
+ public uint Inst { get; }
+ public IntrinsicType Type { get; }
+
+ public IntrinsicInfo(uint inst, IntrinsicType type)
+ {
+ Inst = inst;
+ Type = type;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/Arm64/IntrinsicTable.cs b/src/ARMeilleure/CodeGen/Arm64/IntrinsicTable.cs
new file mode 100644
index 00000000..a309d56d
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Arm64/IntrinsicTable.cs
@@ -0,0 +1,463 @@
+using ARMeilleure.Common;
+using ARMeilleure.IntermediateRepresentation;
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+ static class IntrinsicTable
+ {
+ private static IntrinsicInfo[] _intrinTable;
+
+ static IntrinsicTable()
+ {
+ _intrinTable = new IntrinsicInfo[EnumUtils.GetCount(typeof(Intrinsic))];
+
+ Add(Intrinsic.Arm64AbsS, new IntrinsicInfo(0x5e20b800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64AbsV, new IntrinsicInfo(0x0e20b800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64AddhnV, new IntrinsicInfo(0x0e204000u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64AddpS, new IntrinsicInfo(0x5e31b800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64AddpV, new IntrinsicInfo(0x0e20bc00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64AddvV, new IntrinsicInfo(0x0e31b800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64AddS, new IntrinsicInfo(0x5e208400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64AddV, new IntrinsicInfo(0x0e208400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64AesdV, new IntrinsicInfo(0x4e285800u, IntrinsicType.Vector128Unary));
+ Add(Intrinsic.Arm64AeseV, new IntrinsicInfo(0x4e284800u, IntrinsicType.Vector128Unary));
+ Add(Intrinsic.Arm64AesimcV, new IntrinsicInfo(0x4e287800u, IntrinsicType.Vector128Unary));
+ Add(Intrinsic.Arm64AesmcV, new IntrinsicInfo(0x4e286800u, IntrinsicType.Vector128Unary));
+ Add(Intrinsic.Arm64AndV, new IntrinsicInfo(0x0e201c00u, IntrinsicType.VectorBinaryBitwise));
+ Add(Intrinsic.Arm64BicVi, new IntrinsicInfo(0x2f001400u, IntrinsicType.VectorBinaryBitwiseImm));
+ Add(Intrinsic.Arm64BicV, new IntrinsicInfo(0x0e601c00u, IntrinsicType.VectorBinaryBitwise));
+ Add(Intrinsic.Arm64BifV, new IntrinsicInfo(0x2ee01c00u, IntrinsicType.VectorTernaryRdBitwise));
+ Add(Intrinsic.Arm64BitV, new IntrinsicInfo(0x2ea01c00u, IntrinsicType.VectorTernaryRdBitwise));
+ Add(Intrinsic.Arm64BslV, new IntrinsicInfo(0x2e601c00u, IntrinsicType.VectorTernaryRdBitwise));
+ Add(Intrinsic.Arm64ClsV, new IntrinsicInfo(0x0e204800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64ClzV, new IntrinsicInfo(0x2e204800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64CmeqS, new IntrinsicInfo(0x7e208c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64CmeqV, new IntrinsicInfo(0x2e208c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64CmeqSz, new IntrinsicInfo(0x5e209800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64CmeqVz, new IntrinsicInfo(0x0e209800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64CmgeS, new IntrinsicInfo(0x5e203c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64CmgeV, new IntrinsicInfo(0x0e203c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64CmgeSz, new IntrinsicInfo(0x7e208800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64CmgeVz, new IntrinsicInfo(0x2e208800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64CmgtS, new IntrinsicInfo(0x5e203400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64CmgtV, new IntrinsicInfo(0x0e203400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64CmgtSz, new IntrinsicInfo(0x5e208800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64CmgtVz, new IntrinsicInfo(0x0e208800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64CmhiS, new IntrinsicInfo(0x7e203400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64CmhiV, new IntrinsicInfo(0x2e203400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64CmhsS, new IntrinsicInfo(0x7e203c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64CmhsV, new IntrinsicInfo(0x2e203c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64CmleSz, new IntrinsicInfo(0x7e209800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64CmleVz, new IntrinsicInfo(0x2e209800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64CmltSz, new IntrinsicInfo(0x5e20a800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64CmltVz, new IntrinsicInfo(0x0e20a800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64CmtstS, new IntrinsicInfo(0x5e208c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64CmtstV, new IntrinsicInfo(0x0e208c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64CntV, new IntrinsicInfo(0x0e205800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64DupSe, new IntrinsicInfo(0x5e000400u, IntrinsicType.ScalarUnaryByElem));
+ Add(Intrinsic.Arm64DupVe, new IntrinsicInfo(0x0e000400u, IntrinsicType.VectorUnaryByElem));
+ Add(Intrinsic.Arm64DupGp, new IntrinsicInfo(0x0e000c00u, IntrinsicType.VectorUnaryByElem));
+ Add(Intrinsic.Arm64EorV, new IntrinsicInfo(0x2e201c00u, IntrinsicType.VectorBinaryBitwise));
+ Add(Intrinsic.Arm64ExtV, new IntrinsicInfo(0x2e000000u, IntrinsicType.VectorExt));
+ Add(Intrinsic.Arm64FabdS, new IntrinsicInfo(0x7ea0d400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FabdV, new IntrinsicInfo(0x2ea0d400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FabsV, new IntrinsicInfo(0x0ea0f800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FabsS, new IntrinsicInfo(0x1e20c000u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FacgeS, new IntrinsicInfo(0x7e20ec00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FacgeV, new IntrinsicInfo(0x2e20ec00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FacgtS, new IntrinsicInfo(0x7ea0ec00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FacgtV, new IntrinsicInfo(0x2ea0ec00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FaddpS, new IntrinsicInfo(0x7e30d800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FaddpV, new IntrinsicInfo(0x2e20d400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FaddV, new IntrinsicInfo(0x0e20d400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FaddS, new IntrinsicInfo(0x1e202800u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FccmpeS, new IntrinsicInfo(0x1e200410u, IntrinsicType.ScalarFPCompareCond));
+ Add(Intrinsic.Arm64FccmpS, new IntrinsicInfo(0x1e200400u, IntrinsicType.ScalarFPCompareCond));
+ Add(Intrinsic.Arm64FcmeqS, new IntrinsicInfo(0x5e20e400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FcmeqV, new IntrinsicInfo(0x0e20e400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FcmeqSz, new IntrinsicInfo(0x5ea0d800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcmeqVz, new IntrinsicInfo(0x0ea0d800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcmgeS, new IntrinsicInfo(0x7e20e400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FcmgeV, new IntrinsicInfo(0x2e20e400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FcmgeSz, new IntrinsicInfo(0x7ea0c800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcmgeVz, new IntrinsicInfo(0x2ea0c800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcmgtS, new IntrinsicInfo(0x7ea0e400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FcmgtV, new IntrinsicInfo(0x2ea0e400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FcmgtSz, new IntrinsicInfo(0x5ea0c800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcmgtVz, new IntrinsicInfo(0x0ea0c800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcmleSz, new IntrinsicInfo(0x7ea0d800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcmleVz, new IntrinsicInfo(0x2ea0d800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcmltSz, new IntrinsicInfo(0x5ea0e800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcmltVz, new IntrinsicInfo(0x0ea0e800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcmpeS, new IntrinsicInfo(0x1e202010u, IntrinsicType.ScalarFPCompare));
+ Add(Intrinsic.Arm64FcmpS, new IntrinsicInfo(0x1e202000u, IntrinsicType.ScalarFPCompare));
+ Add(Intrinsic.Arm64FcselS, new IntrinsicInfo(0x1e200c00u, IntrinsicType.ScalarFcsel));
+ Add(Intrinsic.Arm64FcvtasS, new IntrinsicInfo(0x5e21c800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcvtasV, new IntrinsicInfo(0x0e21c800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtasGp, new IntrinsicInfo(0x1e240000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64FcvtauS, new IntrinsicInfo(0x7e21c800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcvtauV, new IntrinsicInfo(0x2e21c800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtauGp, new IntrinsicInfo(0x1e250000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64FcvtlV, new IntrinsicInfo(0x0e217800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtmsS, new IntrinsicInfo(0x5e21b800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcvtmsV, new IntrinsicInfo(0x0e21b800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtmsGp, new IntrinsicInfo(0x1e300000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64FcvtmuS, new IntrinsicInfo(0x7e21b800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcvtmuV, new IntrinsicInfo(0x2e21b800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtmuGp, new IntrinsicInfo(0x1e310000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64FcvtnsS, new IntrinsicInfo(0x5e21a800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcvtnsV, new IntrinsicInfo(0x0e21a800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtnsGp, new IntrinsicInfo(0x1e200000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64FcvtnuS, new IntrinsicInfo(0x7e21a800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcvtnuV, new IntrinsicInfo(0x2e21a800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtnuGp, new IntrinsicInfo(0x1e210000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64FcvtnV, new IntrinsicInfo(0x0e216800u, IntrinsicType.VectorBinaryRd));
+ Add(Intrinsic.Arm64FcvtpsS, new IntrinsicInfo(0x5ea1a800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcvtpsV, new IntrinsicInfo(0x0ea1a800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtpsGp, new IntrinsicInfo(0x1e280000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64FcvtpuS, new IntrinsicInfo(0x7ea1a800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcvtpuV, new IntrinsicInfo(0x2ea1a800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtpuGp, new IntrinsicInfo(0x1e290000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64FcvtxnS, new IntrinsicInfo(0x7e216800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcvtxnV, new IntrinsicInfo(0x2e216800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtzsSFixed, new IntrinsicInfo(0x5f00fc00u, IntrinsicType.ScalarFPConvFixed));
+ Add(Intrinsic.Arm64FcvtzsVFixed, new IntrinsicInfo(0x0f00fc00u, IntrinsicType.VectorFPConvFixed));
+ Add(Intrinsic.Arm64FcvtzsS, new IntrinsicInfo(0x5ea1b800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcvtzsV, new IntrinsicInfo(0x0ea1b800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtzsGpFixed, new IntrinsicInfo(0x1e180000u, IntrinsicType.ScalarFPConvFixedGpr));
+ Add(Intrinsic.Arm64FcvtzsGp, new IntrinsicInfo(0x1e380000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64FcvtzuSFixed, new IntrinsicInfo(0x7f00fc00u, IntrinsicType.ScalarFPConvFixed));
+ Add(Intrinsic.Arm64FcvtzuVFixed, new IntrinsicInfo(0x2f00fc00u, IntrinsicType.VectorFPConvFixed));
+ Add(Intrinsic.Arm64FcvtzuS, new IntrinsicInfo(0x7ea1b800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcvtzuV, new IntrinsicInfo(0x2ea1b800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtzuGpFixed, new IntrinsicInfo(0x1e190000u, IntrinsicType.ScalarFPConvFixedGpr));
+ Add(Intrinsic.Arm64FcvtzuGp, new IntrinsicInfo(0x1e390000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64FcvtS, new IntrinsicInfo(0x1e224000u, IntrinsicType.ScalarFPConv));
+ Add(Intrinsic.Arm64FdivV, new IntrinsicInfo(0x2e20fc00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FdivS, new IntrinsicInfo(0x1e201800u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FmaddS, new IntrinsicInfo(0x1f000000u, IntrinsicType.ScalarTernary));
+ Add(Intrinsic.Arm64FmaxnmpS, new IntrinsicInfo(0x7e30c800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FmaxnmpV, new IntrinsicInfo(0x2e20c400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FmaxnmvV, new IntrinsicInfo(0x2e30c800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FmaxnmV, new IntrinsicInfo(0x0e20c400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FmaxnmS, new IntrinsicInfo(0x1e206800u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FmaxpS, new IntrinsicInfo(0x7e30f800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FmaxpV, new IntrinsicInfo(0x2e20f400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FmaxvV, new IntrinsicInfo(0x2e30f800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FmaxV, new IntrinsicInfo(0x0e20f400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FmaxS, new IntrinsicInfo(0x1e204800u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FminnmpS, new IntrinsicInfo(0x7eb0c800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FminnmpV, new IntrinsicInfo(0x2ea0c400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FminnmvV, new IntrinsicInfo(0x2eb0c800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FminnmV, new IntrinsicInfo(0x0ea0c400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FminnmS, new IntrinsicInfo(0x1e207800u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FminpS, new IntrinsicInfo(0x7eb0f800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FminpV, new IntrinsicInfo(0x2ea0f400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FminvV, new IntrinsicInfo(0x2eb0f800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FminV, new IntrinsicInfo(0x0ea0f400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FminS, new IntrinsicInfo(0x1e205800u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FmlaSe, new IntrinsicInfo(0x5f801000u, IntrinsicType.ScalarTernaryFPRdByElem));
+ Add(Intrinsic.Arm64FmlaVe, new IntrinsicInfo(0x0f801000u, IntrinsicType.VectorTernaryFPRdByElem));
+ Add(Intrinsic.Arm64FmlaV, new IntrinsicInfo(0x0e20cc00u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64FmlsSe, new IntrinsicInfo(0x5f805000u, IntrinsicType.ScalarTernaryFPRdByElem));
+ Add(Intrinsic.Arm64FmlsVe, new IntrinsicInfo(0x0f805000u, IntrinsicType.VectorTernaryFPRdByElem));
+ Add(Intrinsic.Arm64FmlsV, new IntrinsicInfo(0x0ea0cc00u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64FmovVi, new IntrinsicInfo(0x0f00f400u, IntrinsicType.VectorFmovi));
+ Add(Intrinsic.Arm64FmovS, new IntrinsicInfo(0x1e204000u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FmovGp, new IntrinsicInfo(0x1e260000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64FmovSi, new IntrinsicInfo(0x1e201000u, IntrinsicType.ScalarFmovi));
+ Add(Intrinsic.Arm64FmsubS, new IntrinsicInfo(0x1f008000u, IntrinsicType.ScalarTernary));
+ Add(Intrinsic.Arm64FmulxSe, new IntrinsicInfo(0x7f809000u, IntrinsicType.ScalarBinaryFPByElem));
+ Add(Intrinsic.Arm64FmulxVe, new IntrinsicInfo(0x2f809000u, IntrinsicType.VectorBinaryFPByElem));
+ Add(Intrinsic.Arm64FmulxS, new IntrinsicInfo(0x5e20dc00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FmulxV, new IntrinsicInfo(0x0e20dc00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FmulSe, new IntrinsicInfo(0x5f809000u, IntrinsicType.ScalarBinaryFPByElem));
+ Add(Intrinsic.Arm64FmulVe, new IntrinsicInfo(0x0f809000u, IntrinsicType.VectorBinaryFPByElem));
+ Add(Intrinsic.Arm64FmulV, new IntrinsicInfo(0x2e20dc00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FmulS, new IntrinsicInfo(0x1e200800u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FnegV, new IntrinsicInfo(0x2ea0f800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FnegS, new IntrinsicInfo(0x1e214000u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FnmaddS, new IntrinsicInfo(0x1f200000u, IntrinsicType.ScalarTernary));
+ Add(Intrinsic.Arm64FnmsubS, new IntrinsicInfo(0x1f208000u, IntrinsicType.ScalarTernary));
+ Add(Intrinsic.Arm64FnmulS, new IntrinsicInfo(0x1e208800u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FrecpeS, new IntrinsicInfo(0x5ea1d800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FrecpeV, new IntrinsicInfo(0x0ea1d800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FrecpsS, new IntrinsicInfo(0x5e20fc00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FrecpsV, new IntrinsicInfo(0x0e20fc00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FrecpxS, new IntrinsicInfo(0x5ea1f800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FrintaV, new IntrinsicInfo(0x2e218800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FrintaS, new IntrinsicInfo(0x1e264000u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FrintiV, new IntrinsicInfo(0x2ea19800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FrintiS, new IntrinsicInfo(0x1e27c000u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FrintmV, new IntrinsicInfo(0x0e219800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FrintmS, new IntrinsicInfo(0x1e254000u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FrintnV, new IntrinsicInfo(0x0e218800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FrintnS, new IntrinsicInfo(0x1e244000u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FrintpV, new IntrinsicInfo(0x0ea18800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FrintpS, new IntrinsicInfo(0x1e24c000u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FrintxV, new IntrinsicInfo(0x2e219800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FrintxS, new IntrinsicInfo(0x1e274000u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FrintzV, new IntrinsicInfo(0x0ea19800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FrintzS, new IntrinsicInfo(0x1e25c000u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FrsqrteS, new IntrinsicInfo(0x7ea1d800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FrsqrteV, new IntrinsicInfo(0x2ea1d800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FrsqrtsS, new IntrinsicInfo(0x5ea0fc00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FrsqrtsV, new IntrinsicInfo(0x0ea0fc00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FsqrtV, new IntrinsicInfo(0x2ea1f800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FsqrtS, new IntrinsicInfo(0x1e21c000u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FsubV, new IntrinsicInfo(0x0ea0d400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FsubS, new IntrinsicInfo(0x1e203800u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64InsVe, new IntrinsicInfo(0x6e000400u, IntrinsicType.VectorInsertByElem));
+ Add(Intrinsic.Arm64InsGp, new IntrinsicInfo(0x4e001c00u, IntrinsicType.ScalarUnaryByElem));
+ Add(Intrinsic.Arm64Ld1rV, new IntrinsicInfo(0x0d40c000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64Ld1Vms, new IntrinsicInfo(0x0c402000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64Ld1Vss, new IntrinsicInfo(0x0d400000u, IntrinsicType.VectorLdStSs));
+ Add(Intrinsic.Arm64Ld2rV, new IntrinsicInfo(0x0d60c000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64Ld2Vms, new IntrinsicInfo(0x0c408000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64Ld2Vss, new IntrinsicInfo(0x0d600000u, IntrinsicType.VectorLdStSs));
+ Add(Intrinsic.Arm64Ld3rV, new IntrinsicInfo(0x0d40e000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64Ld3Vms, new IntrinsicInfo(0x0c404000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64Ld3Vss, new IntrinsicInfo(0x0d402000u, IntrinsicType.VectorLdStSs));
+ Add(Intrinsic.Arm64Ld4rV, new IntrinsicInfo(0x0d60e000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64Ld4Vms, new IntrinsicInfo(0x0c400000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64Ld4Vss, new IntrinsicInfo(0x0d602000u, IntrinsicType.VectorLdStSs));
+ Add(Intrinsic.Arm64MlaVe, new IntrinsicInfo(0x2f000000u, IntrinsicType.VectorTernaryRdByElem));
+ Add(Intrinsic.Arm64MlaV, new IntrinsicInfo(0x0e209400u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64MlsVe, new IntrinsicInfo(0x2f004000u, IntrinsicType.VectorTernaryRdByElem));
+ Add(Intrinsic.Arm64MlsV, new IntrinsicInfo(0x2e209400u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64MoviV, new IntrinsicInfo(0x0f000400u, IntrinsicType.VectorMovi));
+ Add(Intrinsic.Arm64MrsFpcr, new IntrinsicInfo(0xd53b4400u, IntrinsicType.GetRegister));
+ Add(Intrinsic.Arm64MsrFpcr, new IntrinsicInfo(0xd51b4400u, IntrinsicType.SetRegister));
+ Add(Intrinsic.Arm64MrsFpsr, new IntrinsicInfo(0xd53b4420u, IntrinsicType.GetRegister));
+ Add(Intrinsic.Arm64MsrFpsr, new IntrinsicInfo(0xd51b4420u, IntrinsicType.SetRegister));
+ Add(Intrinsic.Arm64MulVe, new IntrinsicInfo(0x0f008000u, IntrinsicType.VectorBinaryByElem));
+ Add(Intrinsic.Arm64MulV, new IntrinsicInfo(0x0e209c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64MvniV, new IntrinsicInfo(0x2f000400u, IntrinsicType.VectorMvni));
+ Add(Intrinsic.Arm64NegS, new IntrinsicInfo(0x7e20b800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64NegV, new IntrinsicInfo(0x2e20b800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64NotV, new IntrinsicInfo(0x2e205800u, IntrinsicType.VectorUnaryBitwise));
+ Add(Intrinsic.Arm64OrnV, new IntrinsicInfo(0x0ee01c00u, IntrinsicType.VectorBinaryBitwise));
+ Add(Intrinsic.Arm64OrrVi, new IntrinsicInfo(0x0f001400u, IntrinsicType.VectorBinaryBitwiseImm));
+ Add(Intrinsic.Arm64OrrV, new IntrinsicInfo(0x0ea01c00u, IntrinsicType.VectorBinaryBitwise));
+ Add(Intrinsic.Arm64PmullV, new IntrinsicInfo(0x0e20e000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64PmulV, new IntrinsicInfo(0x2e209c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64RaddhnV, new IntrinsicInfo(0x2e204000u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64RbitV, new IntrinsicInfo(0x2e605800u, IntrinsicType.VectorUnaryBitwise));
+ Add(Intrinsic.Arm64Rev16V, new IntrinsicInfo(0x0e201800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64Rev32V, new IntrinsicInfo(0x2e200800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64Rev64V, new IntrinsicInfo(0x0e200800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64RshrnV, new IntrinsicInfo(0x0f008c00u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64RsubhnV, new IntrinsicInfo(0x2e206000u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64SabalV, new IntrinsicInfo(0x0e205000u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64SabaV, new IntrinsicInfo(0x0e207c00u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64SabdlV, new IntrinsicInfo(0x0e207000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SabdV, new IntrinsicInfo(0x0e207400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SadalpV, new IntrinsicInfo(0x0e206800u, IntrinsicType.VectorBinaryRd));
+ Add(Intrinsic.Arm64SaddlpV, new IntrinsicInfo(0x0e202800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64SaddlvV, new IntrinsicInfo(0x0e303800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64SaddlV, new IntrinsicInfo(0x0e200000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SaddwV, new IntrinsicInfo(0x0e201000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64ScvtfSFixed, new IntrinsicInfo(0x5f00e400u, IntrinsicType.ScalarFPConvFixed));
+ Add(Intrinsic.Arm64ScvtfVFixed, new IntrinsicInfo(0x0f00e400u, IntrinsicType.VectorFPConvFixed));
+ Add(Intrinsic.Arm64ScvtfS, new IntrinsicInfo(0x5e21d800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64ScvtfV, new IntrinsicInfo(0x0e21d800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64ScvtfGpFixed, new IntrinsicInfo(0x1e020000u, IntrinsicType.ScalarFPConvFixedGpr));
+ Add(Intrinsic.Arm64ScvtfGp, new IntrinsicInfo(0x1e220000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64Sha1cV, new IntrinsicInfo(0x5e000000u, IntrinsicType.Vector128Binary));
+ Add(Intrinsic.Arm64Sha1hV, new IntrinsicInfo(0x5e280800u, IntrinsicType.Vector128Unary));
+ Add(Intrinsic.Arm64Sha1mV, new IntrinsicInfo(0x5e002000u, IntrinsicType.Vector128Binary));
+ Add(Intrinsic.Arm64Sha1pV, new IntrinsicInfo(0x5e001000u, IntrinsicType.Vector128Binary));
+ Add(Intrinsic.Arm64Sha1su0V, new IntrinsicInfo(0x5e003000u, IntrinsicType.Vector128Binary));
+ Add(Intrinsic.Arm64Sha1su1V, new IntrinsicInfo(0x5e281800u, IntrinsicType.Vector128Unary));
+ Add(Intrinsic.Arm64Sha256h2V, new IntrinsicInfo(0x5e005000u, IntrinsicType.Vector128Binary));
+ Add(Intrinsic.Arm64Sha256hV, new IntrinsicInfo(0x5e004000u, IntrinsicType.Vector128Binary));
+ Add(Intrinsic.Arm64Sha256su0V, new IntrinsicInfo(0x5e282800u, IntrinsicType.Vector128Unary));
+ Add(Intrinsic.Arm64Sha256su1V, new IntrinsicInfo(0x5e006000u, IntrinsicType.Vector128Binary));
+ Add(Intrinsic.Arm64ShaddV, new IntrinsicInfo(0x0e200400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64ShllV, new IntrinsicInfo(0x2e213800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64ShlS, new IntrinsicInfo(0x5f005400u, IntrinsicType.ScalarBinaryShl));
+ Add(Intrinsic.Arm64ShlV, new IntrinsicInfo(0x0f005400u, IntrinsicType.VectorBinaryShl));
+ Add(Intrinsic.Arm64ShrnV, new IntrinsicInfo(0x0f008400u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64ShsubV, new IntrinsicInfo(0x0e202400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SliS, new IntrinsicInfo(0x7f005400u, IntrinsicType.ScalarTernaryShlRd));
+ Add(Intrinsic.Arm64SliV, new IntrinsicInfo(0x2f005400u, IntrinsicType.VectorTernaryShlRd));
+ Add(Intrinsic.Arm64SmaxpV, new IntrinsicInfo(0x0e20a400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SmaxvV, new IntrinsicInfo(0x0e30a800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64SmaxV, new IntrinsicInfo(0x0e206400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SminpV, new IntrinsicInfo(0x0e20ac00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SminvV, new IntrinsicInfo(0x0e31a800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64SminV, new IntrinsicInfo(0x0e206c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SmlalVe, new IntrinsicInfo(0x0f002000u, IntrinsicType.VectorTernaryRdByElem));
+ Add(Intrinsic.Arm64SmlalV, new IntrinsicInfo(0x0e208000u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64SmlslVe, new IntrinsicInfo(0x0f006000u, IntrinsicType.VectorTernaryRdByElem));
+ Add(Intrinsic.Arm64SmlslV, new IntrinsicInfo(0x0e20a000u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64SmovV, new IntrinsicInfo(0x0e002c00u, IntrinsicType.VectorUnaryByElem));
+ Add(Intrinsic.Arm64SmullVe, new IntrinsicInfo(0x0f00a000u, IntrinsicType.VectorBinaryByElem));
+ Add(Intrinsic.Arm64SmullV, new IntrinsicInfo(0x0e20c000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SqabsS, new IntrinsicInfo(0x5e207800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64SqabsV, new IntrinsicInfo(0x0e207800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64SqaddS, new IntrinsicInfo(0x5e200c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SqaddV, new IntrinsicInfo(0x0e200c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SqdmlalSe, new IntrinsicInfo(0x5f003000u, IntrinsicType.ScalarBinaryByElem));
+ Add(Intrinsic.Arm64SqdmlalVe, new IntrinsicInfo(0x0f003000u, IntrinsicType.VectorBinaryByElem));
+ Add(Intrinsic.Arm64SqdmlalS, new IntrinsicInfo(0x5e209000u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SqdmlalV, new IntrinsicInfo(0x0e209000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SqdmlslSe, new IntrinsicInfo(0x5f007000u, IntrinsicType.ScalarBinaryByElem));
+ Add(Intrinsic.Arm64SqdmlslVe, new IntrinsicInfo(0x0f007000u, IntrinsicType.VectorBinaryByElem));
+ Add(Intrinsic.Arm64SqdmlslS, new IntrinsicInfo(0x5e20b000u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SqdmlslV, new IntrinsicInfo(0x0e20b000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SqdmulhSe, new IntrinsicInfo(0x5f00c000u, IntrinsicType.ScalarBinaryByElem));
+ Add(Intrinsic.Arm64SqdmulhVe, new IntrinsicInfo(0x0f00c000u, IntrinsicType.VectorBinaryByElem));
+ Add(Intrinsic.Arm64SqdmulhS, new IntrinsicInfo(0x5e20b400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SqdmulhV, new IntrinsicInfo(0x0e20b400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SqdmullSe, new IntrinsicInfo(0x5f00b000u, IntrinsicType.ScalarBinaryByElem));
+ Add(Intrinsic.Arm64SqdmullVe, new IntrinsicInfo(0x0f00b000u, IntrinsicType.VectorBinaryByElem));
+ Add(Intrinsic.Arm64SqdmullS, new IntrinsicInfo(0x5e20d000u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SqdmullV, new IntrinsicInfo(0x0e20d000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SqnegS, new IntrinsicInfo(0x7e207800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64SqnegV, new IntrinsicInfo(0x2e207800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64SqrdmulhSe, new IntrinsicInfo(0x5f00d000u, IntrinsicType.ScalarBinaryByElem));
+ Add(Intrinsic.Arm64SqrdmulhVe, new IntrinsicInfo(0x0f00d000u, IntrinsicType.VectorBinaryByElem));
+ Add(Intrinsic.Arm64SqrdmulhS, new IntrinsicInfo(0x7e20b400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SqrdmulhV, new IntrinsicInfo(0x2e20b400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SqrshlS, new IntrinsicInfo(0x5e205c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SqrshlV, new IntrinsicInfo(0x0e205c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SqrshrnS, new IntrinsicInfo(0x5f009c00u, IntrinsicType.ScalarTernaryShrRd));
+ Add(Intrinsic.Arm64SqrshrnV, new IntrinsicInfo(0x0f009c00u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64SqrshrunS, new IntrinsicInfo(0x7f008c00u, IntrinsicType.ScalarTernaryShrRd));
+ Add(Intrinsic.Arm64SqrshrunV, new IntrinsicInfo(0x2f008c00u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64SqshluS, new IntrinsicInfo(0x7f006400u, IntrinsicType.ScalarBinaryShl));
+ Add(Intrinsic.Arm64SqshluV, new IntrinsicInfo(0x2f006400u, IntrinsicType.VectorBinaryShl));
+ Add(Intrinsic.Arm64SqshlSi, new IntrinsicInfo(0x5f007400u, IntrinsicType.ScalarBinaryShl));
+ Add(Intrinsic.Arm64SqshlVi, new IntrinsicInfo(0x0f007400u, IntrinsicType.VectorBinaryShl));
+ Add(Intrinsic.Arm64SqshlS, new IntrinsicInfo(0x5e204c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SqshlV, new IntrinsicInfo(0x0e204c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SqshrnS, new IntrinsicInfo(0x5f009400u, IntrinsicType.ScalarTernaryShrRd));
+ Add(Intrinsic.Arm64SqshrnV, new IntrinsicInfo(0x0f009400u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64SqshrunS, new IntrinsicInfo(0x7f008400u, IntrinsicType.ScalarTernaryShrRd));
+ Add(Intrinsic.Arm64SqshrunV, new IntrinsicInfo(0x2f008400u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64SqsubS, new IntrinsicInfo(0x5e202c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SqsubV, new IntrinsicInfo(0x0e202c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SqxtnS, new IntrinsicInfo(0x5e214800u, IntrinsicType.ScalarBinaryRd));
+ Add(Intrinsic.Arm64SqxtnV, new IntrinsicInfo(0x0e214800u, IntrinsicType.VectorBinaryRd));
+ Add(Intrinsic.Arm64SqxtunS, new IntrinsicInfo(0x7e212800u, IntrinsicType.ScalarBinaryRd));
+ Add(Intrinsic.Arm64SqxtunV, new IntrinsicInfo(0x2e212800u, IntrinsicType.VectorBinaryRd));
+ Add(Intrinsic.Arm64SrhaddV, new IntrinsicInfo(0x0e201400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SriS, new IntrinsicInfo(0x7f004400u, IntrinsicType.ScalarTernaryShrRd));
+ Add(Intrinsic.Arm64SriV, new IntrinsicInfo(0x2f004400u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64SrshlS, new IntrinsicInfo(0x5e205400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SrshlV, new IntrinsicInfo(0x0e205400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SrshrS, new IntrinsicInfo(0x5f002400u, IntrinsicType.ScalarBinaryShr));
+ Add(Intrinsic.Arm64SrshrV, new IntrinsicInfo(0x0f002400u, IntrinsicType.VectorBinaryShr));
+ Add(Intrinsic.Arm64SrsraS, new IntrinsicInfo(0x5f003400u, IntrinsicType.ScalarTernaryShrRd));
+ Add(Intrinsic.Arm64SrsraV, new IntrinsicInfo(0x0f003400u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64SshllV, new IntrinsicInfo(0x0f00a400u, IntrinsicType.VectorBinaryShl));
+ Add(Intrinsic.Arm64SshlS, new IntrinsicInfo(0x5e204400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SshlV, new IntrinsicInfo(0x0e204400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SshrS, new IntrinsicInfo(0x5f000400u, IntrinsicType.ScalarBinaryShr));
+ Add(Intrinsic.Arm64SshrV, new IntrinsicInfo(0x0f000400u, IntrinsicType.VectorBinaryShr));
+ Add(Intrinsic.Arm64SsraS, new IntrinsicInfo(0x5f001400u, IntrinsicType.ScalarTernaryShrRd));
+ Add(Intrinsic.Arm64SsraV, new IntrinsicInfo(0x0f001400u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64SsublV, new IntrinsicInfo(0x0e202000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SsubwV, new IntrinsicInfo(0x0e203000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64St1Vms, new IntrinsicInfo(0x0c002000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64St1Vss, new IntrinsicInfo(0x0d000000u, IntrinsicType.VectorLdStSs));
+ Add(Intrinsic.Arm64St2Vms, new IntrinsicInfo(0x0c008000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64St2Vss, new IntrinsicInfo(0x0d200000u, IntrinsicType.VectorLdStSs));
+ Add(Intrinsic.Arm64St3Vms, new IntrinsicInfo(0x0c004000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64St3Vss, new IntrinsicInfo(0x0d002000u, IntrinsicType.VectorLdStSs));
+ Add(Intrinsic.Arm64St4Vms, new IntrinsicInfo(0x0c000000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64St4Vss, new IntrinsicInfo(0x0d202000u, IntrinsicType.VectorLdStSs));
+ Add(Intrinsic.Arm64SubhnV, new IntrinsicInfo(0x0e206000u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64SubS, new IntrinsicInfo(0x7e208400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SubV, new IntrinsicInfo(0x2e208400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SuqaddS, new IntrinsicInfo(0x5e203800u, IntrinsicType.ScalarBinaryRd));
+ Add(Intrinsic.Arm64SuqaddV, new IntrinsicInfo(0x0e203800u, IntrinsicType.VectorBinaryRd));
+ Add(Intrinsic.Arm64TblV, new IntrinsicInfo(0x0e000000u, IntrinsicType.VectorLookupTable));
+ Add(Intrinsic.Arm64TbxV, new IntrinsicInfo(0x0e001000u, IntrinsicType.VectorLookupTable));
+ Add(Intrinsic.Arm64Trn1V, new IntrinsicInfo(0x0e002800u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64Trn2V, new IntrinsicInfo(0x0e006800u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UabalV, new IntrinsicInfo(0x2e205000u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64UabaV, new IntrinsicInfo(0x2e207c00u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64UabdlV, new IntrinsicInfo(0x2e207000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UabdV, new IntrinsicInfo(0x2e207400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UadalpV, new IntrinsicInfo(0x2e206800u, IntrinsicType.VectorBinaryRd));
+ Add(Intrinsic.Arm64UaddlpV, new IntrinsicInfo(0x2e202800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64UaddlvV, new IntrinsicInfo(0x2e303800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64UaddlV, new IntrinsicInfo(0x2e200000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UaddwV, new IntrinsicInfo(0x2e201000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UcvtfSFixed, new IntrinsicInfo(0x7f00e400u, IntrinsicType.ScalarFPConvFixed));
+ Add(Intrinsic.Arm64UcvtfVFixed, new IntrinsicInfo(0x2f00e400u, IntrinsicType.VectorFPConvFixed));
+ Add(Intrinsic.Arm64UcvtfS, new IntrinsicInfo(0x7e21d800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64UcvtfV, new IntrinsicInfo(0x2e21d800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64UcvtfGpFixed, new IntrinsicInfo(0x1e030000u, IntrinsicType.ScalarFPConvFixedGpr));
+ Add(Intrinsic.Arm64UcvtfGp, new IntrinsicInfo(0x1e230000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64UhaddV, new IntrinsicInfo(0x2e200400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UhsubV, new IntrinsicInfo(0x2e202400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UmaxpV, new IntrinsicInfo(0x2e20a400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UmaxvV, new IntrinsicInfo(0x2e30a800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64UmaxV, new IntrinsicInfo(0x2e206400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UminpV, new IntrinsicInfo(0x2e20ac00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UminvV, new IntrinsicInfo(0x2e31a800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64UminV, new IntrinsicInfo(0x2e206c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UmlalVe, new IntrinsicInfo(0x2f002000u, IntrinsicType.VectorTernaryRdByElem));
+ Add(Intrinsic.Arm64UmlalV, new IntrinsicInfo(0x2e208000u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64UmlslVe, new IntrinsicInfo(0x2f006000u, IntrinsicType.VectorTernaryRdByElem));
+ Add(Intrinsic.Arm64UmlslV, new IntrinsicInfo(0x2e20a000u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64UmovV, new IntrinsicInfo(0x0e003c00u, IntrinsicType.VectorUnaryByElem));
+ Add(Intrinsic.Arm64UmullVe, new IntrinsicInfo(0x2f00a000u, IntrinsicType.VectorBinaryByElem));
+ Add(Intrinsic.Arm64UmullV, new IntrinsicInfo(0x2e20c000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UqaddS, new IntrinsicInfo(0x7e200c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64UqaddV, new IntrinsicInfo(0x2e200c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UqrshlS, new IntrinsicInfo(0x7e205c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64UqrshlV, new IntrinsicInfo(0x2e205c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UqrshrnS, new IntrinsicInfo(0x7f009c00u, IntrinsicType.ScalarTernaryShrRd));
+ Add(Intrinsic.Arm64UqrshrnV, new IntrinsicInfo(0x2f009c00u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64UqshlSi, new IntrinsicInfo(0x7f007400u, IntrinsicType.ScalarBinaryShl));
+ Add(Intrinsic.Arm64UqshlVi, new IntrinsicInfo(0x2f007400u, IntrinsicType.VectorBinaryShl));
+ Add(Intrinsic.Arm64UqshlS, new IntrinsicInfo(0x7e204c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64UqshlV, new IntrinsicInfo(0x2e204c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UqshrnS, new IntrinsicInfo(0x7f009400u, IntrinsicType.ScalarTernaryShrRd));
+ Add(Intrinsic.Arm64UqshrnV, new IntrinsicInfo(0x2f009400u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64UqsubS, new IntrinsicInfo(0x7e202c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64UqsubV, new IntrinsicInfo(0x2e202c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UqxtnS, new IntrinsicInfo(0x7e214800u, IntrinsicType.ScalarBinaryRd));
+ Add(Intrinsic.Arm64UqxtnV, new IntrinsicInfo(0x2e214800u, IntrinsicType.VectorBinaryRd));
+ Add(Intrinsic.Arm64UrecpeV, new IntrinsicInfo(0x0ea1c800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64UrhaddV, new IntrinsicInfo(0x2e201400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UrshlS, new IntrinsicInfo(0x7e205400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64UrshlV, new IntrinsicInfo(0x2e205400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UrshrS, new IntrinsicInfo(0x7f002400u, IntrinsicType.ScalarBinaryShr));
+ Add(Intrinsic.Arm64UrshrV, new IntrinsicInfo(0x2f002400u, IntrinsicType.VectorBinaryShr));
+ Add(Intrinsic.Arm64UrsqrteV, new IntrinsicInfo(0x2ea1c800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64UrsraS, new IntrinsicInfo(0x7f003400u, IntrinsicType.ScalarTernaryShrRd));
+ Add(Intrinsic.Arm64UrsraV, new IntrinsicInfo(0x2f003400u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64UshllV, new IntrinsicInfo(0x2f00a400u, IntrinsicType.VectorBinaryShl));
+ Add(Intrinsic.Arm64UshlS, new IntrinsicInfo(0x7e204400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64UshlV, new IntrinsicInfo(0x2e204400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UshrS, new IntrinsicInfo(0x7f000400u, IntrinsicType.ScalarBinaryShr));
+ Add(Intrinsic.Arm64UshrV, new IntrinsicInfo(0x2f000400u, IntrinsicType.VectorBinaryShr));
+ Add(Intrinsic.Arm64UsqaddS, new IntrinsicInfo(0x7e203800u, IntrinsicType.ScalarBinaryRd));
+ Add(Intrinsic.Arm64UsqaddV, new IntrinsicInfo(0x2e203800u, IntrinsicType.VectorBinaryRd));
+ Add(Intrinsic.Arm64UsraS, new IntrinsicInfo(0x7f001400u, IntrinsicType.ScalarTernaryShrRd));
+ Add(Intrinsic.Arm64UsraV, new IntrinsicInfo(0x2f001400u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64UsublV, new IntrinsicInfo(0x2e202000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UsubwV, new IntrinsicInfo(0x2e203000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64Uzp1V, new IntrinsicInfo(0x0e001800u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64Uzp2V, new IntrinsicInfo(0x0e005800u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64XtnV, new IntrinsicInfo(0x0e212800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64Zip1V, new IntrinsicInfo(0x0e003800u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64Zip2V, new IntrinsicInfo(0x0e007800u, IntrinsicType.VectorBinary));
+ }
+
+ private static void Add(Intrinsic intrin, IntrinsicInfo info)
+ {
+ _intrinTable[(int)intrin] = info;
+ }
+
+ public static IntrinsicInfo GetInfo(Intrinsic intrin)
+ {
+ return _intrinTable[(int)intrin];
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/Arm64/IntrinsicType.cs b/src/ARMeilleure/CodeGen/Arm64/IntrinsicType.cs
new file mode 100644
index 00000000..800eca93
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Arm64/IntrinsicType.cs
@@ -0,0 +1,59 @@
+namespace ARMeilleure.CodeGen.Arm64
+{
+ enum IntrinsicType
+ {
+ ScalarUnary,
+ ScalarUnaryByElem,
+ ScalarBinary,
+ ScalarBinaryByElem,
+ ScalarBinaryFPByElem,
+ ScalarBinaryRd,
+ ScalarBinaryShl,
+ ScalarBinaryShr,
+ ScalarFcsel,
+ ScalarFmovi,
+ ScalarFPCompare,
+ ScalarFPCompareCond,
+ ScalarFPConv,
+ ScalarFPConvFixed,
+ ScalarFPConvFixedGpr,
+ ScalarFPConvGpr,
+ ScalarTernary,
+ ScalarTernaryFPRdByElem,
+ ScalarTernaryShlRd,
+ ScalarTernaryShrRd,
+
+ VectorUnary,
+ VectorUnaryBitwise,
+ VectorUnaryByElem,
+ VectorBinary,
+ VectorBinaryBitwise,
+ VectorBinaryBitwiseImm,
+ VectorBinaryByElem,
+ VectorBinaryFPByElem,
+ VectorBinaryRd,
+ VectorBinaryShl,
+ VectorBinaryShr,
+ VectorExt,
+ VectorFmovi,
+ VectorFPConvFixed,
+ VectorInsertByElem,
+ VectorLdSt,
+ VectorLdStSs,
+ VectorLookupTable,
+ VectorMovi,
+ VectorMvni,
+ VectorTernaryFPRdByElem,
+ VectorTernaryRd,
+ VectorTernaryRdBitwise,
+ VectorTernaryRdByElem,
+ VectorTernaryShlRd,
+ VectorTernaryShrRd,
+
+ Vector128Unary,
+ Vector128Binary,
+
+ GetRegister,
+ SetRegister
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/Arm64/PreAllocator.cs b/src/ARMeilleure/CodeGen/Arm64/PreAllocator.cs
new file mode 100644
index 00000000..6ea9d239
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Arm64/PreAllocator.cs
@@ -0,0 +1,892 @@
+using ARMeilleure.CodeGen.RegisterAllocators;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+using static ARMeilleure.IntermediateRepresentation.Operation.Factory;
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+ static class PreAllocator
+ {
+ private class ConstantDict
+ {
+ private readonly Dictionary<(ulong, OperandType), Operand> _constants;
+
+ public ConstantDict()
+ {
+ _constants = new Dictionary<(ulong, OperandType), Operand>();
+ }
+
+ public void Add(ulong value, OperandType type, Operand local)
+ {
+ _constants.Add((value, type), local);
+ }
+
+ public bool TryGetValue(ulong value, OperandType type, out Operand local)
+ {
+ return _constants.TryGetValue((value, type), out local);
+ }
+ }
+
+ public static void RunPass(CompilerContext cctx, StackAllocator stackAlloc, out int maxCallArgs)
+ {
+ maxCallArgs = -1;
+
+ Span<Operation> buffer = default;
+
+ Operand[] preservedArgs = new Operand[CallingConvention.GetArgumentsOnRegsCount()];
+
+ for (BasicBlock block = cctx.Cfg.Blocks.First; block != null; block = block.ListNext)
+ {
+ ConstantDict constants = new ConstantDict();
+
+ Operation nextNode;
+
+ for (Operation node = block.Operations.First; node != default; node = nextNode)
+ {
+ nextNode = node.ListNext;
+
+ if (node.Instruction == Instruction.Phi)
+ {
+ continue;
+ }
+
+ InsertConstantRegCopies(constants, block.Operations, node);
+ InsertDestructiveRegCopies(block.Operations, node);
+
+ switch (node.Instruction)
+ {
+ case Instruction.Call:
+ // Get the maximum number of arguments used on a call.
+ // On windows, when a struct is returned from the call,
+ // we also need to pass the pointer where the struct
+ // should be written on the first argument.
+ int argsCount = node.SourcesCount - 1;
+
+ if (node.Destination != default && node.Destination.Type == OperandType.V128)
+ {
+ argsCount++;
+ }
+
+ if (maxCallArgs < argsCount)
+ {
+ maxCallArgs = argsCount;
+ }
+
+ // Copy values to registers expected by the function
+ // being called, as mandated by the ABI.
+ InsertCallCopies(constants, block.Operations, node);
+ break;
+ case Instruction.CompareAndSwap:
+ case Instruction.CompareAndSwap16:
+ case Instruction.CompareAndSwap8:
+ nextNode = GenerateCompareAndSwap(block.Operations, node);
+ break;
+ case Instruction.LoadArgument:
+ nextNode = InsertLoadArgumentCopy(cctx, ref buffer, block.Operations, preservedArgs, node);
+ break;
+ case Instruction.Return:
+ InsertReturnCopy(block.Operations, node);
+ break;
+ case Instruction.Tailcall:
+ InsertTailcallCopies(constants, block.Operations, stackAlloc, node, node);
+ break;
+ }
+ }
+ }
+ }
+
+ private static void InsertConstantRegCopies(ConstantDict constants, IntrusiveList<Operation> nodes, Operation node)
+ {
+ if (node.SourcesCount == 0 || IsIntrinsicWithConst(node))
+ {
+ return;
+ }
+
+ Instruction inst = node.Instruction;
+
+ Operand src1 = node.GetSource(0);
+ Operand src2;
+
+ if (src1.Kind == OperandKind.Constant)
+ {
+ if (!src1.Type.IsInteger())
+ {
+ // Handle non-integer types (FP32, FP64 and V128).
+ // For instructions without an immediate operand, we do the following:
+ // - Insert a copy with the constant value (as integer) to a GPR.
+ // - Insert a copy from the GPR to a XMM register.
+ // - Replace the constant use with the XMM register.
+ src1 = AddFloatConstantCopy(constants, nodes, node, src1);
+
+ node.SetSource(0, src1);
+ }
+ else if (!HasConstSrc1(node, src1.Value))
+ {
+ // Handle integer types.
+ // Most ALU instructions accepts a 32-bits immediate on the second operand.
+ // We need to ensure the following:
+ // - If the constant is on operand 1, we need to move it.
+ // -- But first, we try to swap operand 1 and 2 if the instruction is commutative.
+ // -- Doing so may allow us to encode the constant as operand 2 and avoid a copy.
+ // - If the constant is on operand 2, we check if the instruction supports it,
+ // if not, we also add a copy. 64-bits constants are usually not supported.
+ if (IsCommutative(node))
+ {
+ src2 = node.GetSource(1);
+
+ Operand temp = src1;
+
+ src1 = src2;
+ src2 = temp;
+
+ node.SetSource(0, src1);
+ node.SetSource(1, src2);
+ }
+
+ if (src1.Kind == OperandKind.Constant)
+ {
+ src1 = AddIntConstantCopy(constants, nodes, node, src1);
+
+ node.SetSource(0, src1);
+ }
+ }
+ }
+
+ if (node.SourcesCount < 2)
+ {
+ return;
+ }
+
+ src2 = node.GetSource(1);
+
+ if (src2.Kind == OperandKind.Constant)
+ {
+ if (!src2.Type.IsInteger())
+ {
+ src2 = AddFloatConstantCopy(constants, nodes, node, src2);
+
+ node.SetSource(1, src2);
+ }
+ else if (!HasConstSrc2(inst, src2))
+ {
+ src2 = AddIntConstantCopy(constants, nodes, node, src2);
+
+ node.SetSource(1, src2);
+ }
+ }
+
+ if (node.SourcesCount < 3 ||
+ node.Instruction == Instruction.BranchIf ||
+ node.Instruction == Instruction.Compare ||
+ node.Instruction == Instruction.VectorInsert ||
+ node.Instruction == Instruction.VectorInsert16 ||
+ node.Instruction == Instruction.VectorInsert8)
+ {
+ return;
+ }
+
+ for (int srcIndex = 2; srcIndex < node.SourcesCount; srcIndex++)
+ {
+ Operand src = node.GetSource(srcIndex);
+
+ if (src.Kind == OperandKind.Constant)
+ {
+ if (!src.Type.IsInteger())
+ {
+ src = AddFloatConstantCopy(constants, nodes, node, src);
+
+ node.SetSource(srcIndex, src);
+ }
+ else
+ {
+ src = AddIntConstantCopy(constants, nodes, node, src);
+
+ node.SetSource(srcIndex, src);
+ }
+ }
+ }
+ }
+
+ private static void InsertDestructiveRegCopies(IntrusiveList<Operation> nodes, Operation node)
+ {
+ if (node.Destination == default || node.SourcesCount == 0)
+ {
+ return;
+ }
+
+ Operand dest = node.Destination;
+ Operand src1 = node.GetSource(0);
+
+ if (IsSameOperandDestSrc1(node) && src1.Kind == OperandKind.LocalVariable)
+ {
+ bool useNewLocal = false;
+
+ for (int srcIndex = 1; srcIndex < node.SourcesCount; srcIndex++)
+ {
+ if (node.GetSource(srcIndex) == dest)
+ {
+ useNewLocal = true;
+
+ break;
+ }
+ }
+
+ if (useNewLocal)
+ {
+ // Dest is being used as some source already, we need to use a new
+ // local to store the temporary value, otherwise the value on dest
+ // local would be overwritten.
+ Operand temp = Local(dest.Type);
+
+ nodes.AddBefore(node, Operation(Instruction.Copy, temp, src1));
+
+ node.SetSource(0, temp);
+
+ nodes.AddAfter(node, Operation(Instruction.Copy, dest, temp));
+
+ node.Destination = temp;
+ }
+ else
+ {
+ nodes.AddBefore(node, Operation(Instruction.Copy, dest, src1));
+
+ node.SetSource(0, dest);
+ }
+ }
+ }
+
+ private static void InsertCallCopies(ConstantDict constants, IntrusiveList<Operation> nodes, Operation node)
+ {
+ Operation operation = node;
+
+ Operand dest = operation.Destination;
+
+ List<Operand> sources = new List<Operand>
+ {
+ operation.GetSource(0)
+ };
+
+ int argsCount = operation.SourcesCount - 1;
+
+ int intMax = CallingConvention.GetArgumentsOnRegsCount();
+ int vecMax = CallingConvention.GetArgumentsOnRegsCount();
+
+ int intCount = 0;
+ int vecCount = 0;
+
+ int stackOffset = 0;
+
+ for (int index = 0; index < argsCount; index++)
+ {
+ Operand source = operation.GetSource(index + 1);
+
+ bool passOnReg;
+
+ if (source.Type.IsInteger())
+ {
+ passOnReg = intCount < intMax;
+ }
+ else if (source.Type == OperandType.V128)
+ {
+ passOnReg = intCount + 1 < intMax;
+ }
+ else
+ {
+ passOnReg = vecCount < vecMax;
+ }
+
+ if (source.Type == OperandType.V128 && passOnReg)
+ {
+ // V128 is a struct, we pass each half on a GPR if possible.
+ Operand argReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
+ Operand argReg2 = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
+
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg, source, Const(0)));
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg2, source, Const(1)));
+
+ continue;
+ }
+
+ if (passOnReg)
+ {
+ Operand argReg = source.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntArgumentRegister(intCount++), source.Type)
+ : Xmm(CallingConvention.GetVecArgumentRegister(vecCount++), source.Type);
+
+ Operation copyOp = Operation(Instruction.Copy, argReg, source);
+
+ InsertConstantRegCopies(constants, nodes, nodes.AddBefore(node, copyOp));
+
+ sources.Add(argReg);
+ }
+ else
+ {
+ Operand offset = Const(stackOffset);
+
+ Operation spillOp = Operation(Instruction.SpillArg, default, offset, source);
+
+ InsertConstantRegCopies(constants, nodes, nodes.AddBefore(node, spillOp));
+
+ stackOffset += source.Type.GetSizeInBytes();
+ }
+ }
+
+ if (dest != default)
+ {
+ if (dest.Type == OperandType.V128)
+ {
+ Operand retLReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64);
+ Operand retHReg = Gpr(CallingConvention.GetIntReturnRegisterHigh(), OperandType.I64);
+
+ node = nodes.AddAfter(node, Operation(Instruction.VectorCreateScalar, dest, retLReg));
+ nodes.AddAfter(node, Operation(Instruction.VectorInsert, dest, dest, retHReg, Const(1)));
+
+ operation.Destination = default;
+ }
+ else
+ {
+ Operand retReg = dest.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntReturnRegister(), dest.Type)
+ : Xmm(CallingConvention.GetVecReturnRegister(), dest.Type);
+
+ Operation copyOp = Operation(Instruction.Copy, dest, retReg);
+
+ nodes.AddAfter(node, copyOp);
+
+ operation.Destination = retReg;
+ }
+ }
+
+ operation.SetSources(sources.ToArray());
+ }
+
+ private static void InsertTailcallCopies(
+ ConstantDict constants,
+ IntrusiveList<Operation> nodes,
+ StackAllocator stackAlloc,
+ Operation node,
+ Operation operation)
+ {
+ List<Operand> sources = new List<Operand>
+ {
+ operation.GetSource(0)
+ };
+
+ int argsCount = operation.SourcesCount - 1;
+
+ int intMax = CallingConvention.GetArgumentsOnRegsCount();
+ int vecMax = CallingConvention.GetArgumentsOnRegsCount();
+
+ int intCount = 0;
+ int vecCount = 0;
+
+ // Handle arguments passed on registers.
+ for (int index = 0; index < argsCount; index++)
+ {
+ Operand source = operation.GetSource(1 + index);
+
+ bool passOnReg;
+
+ if (source.Type.IsInteger())
+ {
+ passOnReg = intCount + 1 < intMax;
+ }
+ else
+ {
+ passOnReg = vecCount < vecMax;
+ }
+
+ if (source.Type == OperandType.V128 && passOnReg)
+ {
+ // V128 is a struct, we pass each half on a GPR if possible.
+ Operand argReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
+ Operand argReg2 = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
+
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg, source, Const(0)));
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg2, source, Const(1)));
+
+ continue;
+ }
+
+ if (passOnReg)
+ {
+ Operand argReg = source.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntArgumentRegister(intCount++), source.Type)
+ : Xmm(CallingConvention.GetVecArgumentRegister(vecCount++), source.Type);
+
+ Operation copyOp = Operation(Instruction.Copy, argReg, source);
+
+ InsertConstantRegCopies(constants, nodes, nodes.AddBefore(node, copyOp));
+
+ sources.Add(argReg);
+ }
+ else
+ {
+ throw new NotImplementedException("Spilling is not currently supported for tail calls. (too many arguments)");
+ }
+ }
+
+ // The target address must be on the return registers, since we
+ // don't return anything and it is guaranteed to not be a
+ // callee saved register (which would be trashed on the epilogue).
+ Operand tcAddress = Gpr(CodeGenCommon.TcAddressRegister, OperandType.I64);
+
+ Operation addrCopyOp = Operation(Instruction.Copy, tcAddress, operation.GetSource(0));
+
+ nodes.AddBefore(node, addrCopyOp);
+
+ sources[0] = tcAddress;
+
+ operation.SetSources(sources.ToArray());
+ }
+
+ private static Operation GenerateCompareAndSwap(IntrusiveList<Operation> nodes, Operation node)
+ {
+ Operand expected = node.GetSource(1);
+
+ if (expected.Type == OperandType.V128)
+ {
+ Operand dest = node.Destination;
+ Operand expectedLow = Local(OperandType.I64);
+ Operand expectedHigh = Local(OperandType.I64);
+ Operand desiredLow = Local(OperandType.I64);
+ Operand desiredHigh = Local(OperandType.I64);
+ Operand actualLow = Local(OperandType.I64);
+ Operand actualHigh = Local(OperandType.I64);
+
+ Operand address = node.GetSource(0);
+ Operand desired = node.GetSource(2);
+
+ void SplitOperand(Operand source, Operand low, Operand high)
+ {
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, low, source, Const(0)));
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, high, source, Const(1)));
+ }
+
+ SplitOperand(expected, expectedLow, expectedHigh);
+ SplitOperand(desired, desiredLow, desiredHigh);
+
+ Operation operation = node;
+
+ // Update the sources and destinations with split 64-bit halfs of the whole 128-bit values.
+ // We also need a additional registers that will be used to store temporary information.
+ operation.SetDestinations(new[] { actualLow, actualHigh, Local(OperandType.I64), Local(OperandType.I64) });
+ operation.SetSources(new[] { address, expectedLow, expectedHigh, desiredLow, desiredHigh });
+
+ // Add some dummy uses of the input operands, as the CAS operation will be a loop,
+ // so they can't be used as destination operand.
+ for (int i = 0; i < operation.SourcesCount; i++)
+ {
+ Operand src = operation.GetSource(i);
+ node = nodes.AddAfter(node, Operation(Instruction.Copy, src, src));
+ }
+
+ // Assemble the vector with the 64-bit values at the given memory location.
+ node = nodes.AddAfter(node, Operation(Instruction.VectorCreateScalar, dest, actualLow));
+ node = nodes.AddAfter(node, Operation(Instruction.VectorInsert, dest, dest, actualHigh, Const(1)));
+ }
+ else
+ {
+ // We need a additional register where the store result will be written to.
+ node.SetDestinations(new[] { node.Destination, Local(OperandType.I32) });
+
+ // Add some dummy uses of the input operands, as the CAS operation will be a loop,
+ // so they can't be used as destination operand.
+ Operation operation = node;
+
+ for (int i = 0; i < operation.SourcesCount; i++)
+ {
+ Operand src = operation.GetSource(i);
+ node = nodes.AddAfter(node, Operation(Instruction.Copy, src, src));
+ }
+ }
+
+ return node.ListNext;
+ }
+
+ private static void InsertReturnCopy(IntrusiveList<Operation> nodes, Operation node)
+ {
+ if (node.SourcesCount == 0)
+ {
+ return;
+ }
+
+ Operand source = node.GetSource(0);
+
+ if (source.Type == OperandType.V128)
+ {
+ Operand retLReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64);
+ Operand retHReg = Gpr(CallingConvention.GetIntReturnRegisterHigh(), OperandType.I64);
+
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, retLReg, source, Const(0)));
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, retHReg, source, Const(1)));
+ }
+ else
+ {
+ Operand retReg = source.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntReturnRegister(), source.Type)
+ : Xmm(CallingConvention.GetVecReturnRegister(), source.Type);
+
+ Operation retCopyOp = Operation(Instruction.Copy, retReg, source);
+
+ nodes.AddBefore(node, retCopyOp);
+ }
+ }
+
+ private static Operation InsertLoadArgumentCopy(
+ CompilerContext cctx,
+ ref Span<Operation> buffer,
+ IntrusiveList<Operation> nodes,
+ Operand[] preservedArgs,
+ Operation node)
+ {
+ Operand source = node.GetSource(0);
+
+ Debug.Assert(source.Kind == OperandKind.Constant, "Non-constant LoadArgument source kind.");
+
+ int index = source.AsInt32();
+
+ int intCount = 0;
+ int vecCount = 0;
+
+ for (int cIndex = 0; cIndex < index; cIndex++)
+ {
+ OperandType argType = cctx.FuncArgTypes[cIndex];
+
+ if (argType.IsInteger())
+ {
+ intCount++;
+ }
+ else if (argType == OperandType.V128)
+ {
+ intCount += 2;
+ }
+ else
+ {
+ vecCount++;
+ }
+ }
+
+ bool passOnReg;
+
+ if (source.Type.IsInteger())
+ {
+ passOnReg = intCount < CallingConvention.GetArgumentsOnRegsCount();
+ }
+ else if (source.Type == OperandType.V128)
+ {
+ passOnReg = intCount + 1 < CallingConvention.GetArgumentsOnRegsCount();
+ }
+ else
+ {
+ passOnReg = vecCount < CallingConvention.GetArgumentsOnRegsCount();
+ }
+
+ if (passOnReg)
+ {
+ Operand dest = node.Destination;
+
+ if (preservedArgs[index] == default)
+ {
+ if (dest.Type == OperandType.V128)
+ {
+ // V128 is a struct, we pass each half on a GPR if possible.
+ Operand pArg = Local(OperandType.V128);
+
+ Operand argLReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount), OperandType.I64);
+ Operand argHReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount + 1), OperandType.I64);
+
+ Operation copyL = Operation(Instruction.VectorCreateScalar, pArg, argLReg);
+ Operation copyH = Operation(Instruction.VectorInsert, pArg, pArg, argHReg, Const(1));
+
+ cctx.Cfg.Entry.Operations.AddFirst(copyH);
+ cctx.Cfg.Entry.Operations.AddFirst(copyL);
+
+ preservedArgs[index] = pArg;
+ }
+ else
+ {
+ Operand pArg = Local(dest.Type);
+
+ Operand argReg = dest.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntArgumentRegister(intCount), dest.Type)
+ : Xmm(CallingConvention.GetVecArgumentRegister(vecCount), dest.Type);
+
+ Operation copyOp = Operation(Instruction.Copy, pArg, argReg);
+
+ cctx.Cfg.Entry.Operations.AddFirst(copyOp);
+
+ preservedArgs[index] = pArg;
+ }
+ }
+
+ Operation nextNode;
+
+ if (dest.AssignmentsCount == 1)
+ {
+ // Let's propagate the argument if we can to avoid copies.
+ PreAllocatorCommon.Propagate(ref buffer, dest, preservedArgs[index]);
+ nextNode = node.ListNext;
+ }
+ else
+ {
+ Operation argCopyOp = Operation(Instruction.Copy, dest, preservedArgs[index]);
+ nextNode = nodes.AddBefore(node, argCopyOp);
+ }
+
+ Delete(nodes, node);
+ return nextNode;
+ }
+ else
+ {
+ // TODO: Pass on stack.
+ return node;
+ }
+ }
+
+ private static Operand AddFloatConstantCopy(
+ ConstantDict constants,
+ IntrusiveList<Operation> nodes,
+ Operation node,
+ Operand source)
+ {
+ Operand temp = Local(source.Type);
+
+ Operand intConst = AddIntConstantCopy(constants, nodes, node, GetIntConst(source));
+
+ Operation copyOp = Operation(Instruction.VectorCreateScalar, temp, intConst);
+
+ nodes.AddBefore(node, copyOp);
+
+ return temp;
+ }
+
+ private static Operand AddIntConstantCopy(
+ ConstantDict constants,
+ IntrusiveList<Operation> nodes,
+ Operation node,
+ Operand source)
+ {
+ if (constants.TryGetValue(source.Value, source.Type, out Operand temp))
+ {
+ return temp;
+ }
+
+ temp = Local(source.Type);
+
+ Operation copyOp = Operation(Instruction.Copy, temp, source);
+
+ nodes.AddBefore(node, copyOp);
+
+ constants.Add(source.Value, source.Type, temp);
+
+ return temp;
+ }
+
+ private static Operand GetIntConst(Operand value)
+ {
+ if (value.Type == OperandType.FP32)
+ {
+ return Const(value.AsInt32());
+ }
+ else if (value.Type == OperandType.FP64)
+ {
+ return Const(value.AsInt64());
+ }
+
+ return value;
+ }
+
+ private static void Delete(IntrusiveList<Operation> nodes, Operation node)
+ {
+ node.Destination = default;
+
+ for (int index = 0; index < node.SourcesCount; index++)
+ {
+ node.SetSource(index, default);
+ }
+
+ nodes.Remove(node);
+ }
+
+ private static Operand Gpr(int register, OperandType type)
+ {
+ return Register(register, RegisterType.Integer, type);
+ }
+
+ private static Operand Xmm(int register, OperandType type)
+ {
+ return Register(register, RegisterType.Vector, type);
+ }
+
+ private static bool IsSameOperandDestSrc1(Operation operation)
+ {
+ switch (operation.Instruction)
+ {
+ case Instruction.Extended:
+ return IsSameOperandDestSrc1(operation.Intrinsic);
+ case Instruction.VectorInsert:
+ case Instruction.VectorInsert16:
+ case Instruction.VectorInsert8:
+ return true;
+ }
+
+ return false;
+ }
+
+ private static bool IsSameOperandDestSrc1(Intrinsic intrinsic)
+ {
+ IntrinsicInfo info = IntrinsicTable.GetInfo(intrinsic & ~(Intrinsic.Arm64VTypeMask | Intrinsic.Arm64VSizeMask));
+
+ return info.Type == IntrinsicType.ScalarBinaryRd ||
+ info.Type == IntrinsicType.ScalarTernaryFPRdByElem ||
+ info.Type == IntrinsicType.ScalarTernaryShlRd ||
+ info.Type == IntrinsicType.ScalarTernaryShrRd ||
+ info.Type == IntrinsicType.VectorBinaryRd ||
+ info.Type == IntrinsicType.VectorInsertByElem ||
+ info.Type == IntrinsicType.VectorTernaryRd ||
+ info.Type == IntrinsicType.VectorTernaryRdBitwise ||
+ info.Type == IntrinsicType.VectorTernaryFPRdByElem ||
+ info.Type == IntrinsicType.VectorTernaryRdByElem ||
+ info.Type == IntrinsicType.VectorTernaryShlRd ||
+ info.Type == IntrinsicType.VectorTernaryShrRd;
+ }
+
+ private static bool HasConstSrc1(Operation node, ulong value)
+ {
+ switch (node.Instruction)
+ {
+ case Instruction.Add:
+ case Instruction.BranchIf:
+ case Instruction.Compare:
+ case Instruction.Subtract:
+ // The immediate encoding of those instructions does not allow Rn to be
+ // XZR (it will be SP instead), so we can't allow a Rn constant in this case.
+ return value == 0 && NotConstOrConst0(node.GetSource(1));
+ case Instruction.BitwiseAnd:
+ case Instruction.BitwiseExclusiveOr:
+ case Instruction.BitwiseNot:
+ case Instruction.BitwiseOr:
+ case Instruction.ByteSwap:
+ case Instruction.CountLeadingZeros:
+ case Instruction.Multiply:
+ case Instruction.Negate:
+ case Instruction.RotateRight:
+ case Instruction.ShiftLeft:
+ case Instruction.ShiftRightSI:
+ case Instruction.ShiftRightUI:
+ return value == 0;
+ case Instruction.Copy:
+ case Instruction.LoadArgument:
+ case Instruction.Spill:
+ case Instruction.SpillArg:
+ return true;
+ case Instruction.Extended:
+ return value == 0;
+ }
+
+ return false;
+ }
+
+ private static bool NotConstOrConst0(Operand operand)
+ {
+ return operand.Kind != OperandKind.Constant || operand.Value == 0;
+ }
+
+ private static bool HasConstSrc2(Instruction inst, Operand operand)
+ {
+ ulong value = operand.Value;
+
+ switch (inst)
+ {
+ case Instruction.Add:
+ case Instruction.BranchIf:
+ case Instruction.Compare:
+ case Instruction.Subtract:
+ return ConstFitsOnUImm12Sh(value);
+ case Instruction.BitwiseAnd:
+ case Instruction.BitwiseExclusiveOr:
+ case Instruction.BitwiseOr:
+ return value == 0 || CodeGenCommon.TryEncodeBitMask(operand, out _, out _, out _);
+ case Instruction.Multiply:
+ case Instruction.Store:
+ case Instruction.Store16:
+ case Instruction.Store8:
+ return value == 0;
+ case Instruction.RotateRight:
+ case Instruction.ShiftLeft:
+ case Instruction.ShiftRightSI:
+ case Instruction.ShiftRightUI:
+ case Instruction.VectorExtract:
+ case Instruction.VectorExtract16:
+ case Instruction.VectorExtract8:
+ return true;
+ case Instruction.Extended:
+ // TODO: Check if actual intrinsic is supposed to have consts here?
+ // Right now we only hit this case for fixed-point int <-> FP conversion instructions.
+ return true;
+ }
+
+ return false;
+ }
+
+ private static bool IsCommutative(Operation operation)
+ {
+ switch (operation.Instruction)
+ {
+ case Instruction.Add:
+ case Instruction.BitwiseAnd:
+ case Instruction.BitwiseExclusiveOr:
+ case Instruction.BitwiseOr:
+ case Instruction.Multiply:
+ return true;
+
+ case Instruction.BranchIf:
+ case Instruction.Compare:
+ {
+ Operand comp = operation.GetSource(2);
+
+ Debug.Assert(comp.Kind == OperandKind.Constant);
+
+ var compType = (Comparison)comp.AsInt32();
+
+ return compType == Comparison.Equal || compType == Comparison.NotEqual;
+ }
+ }
+
+ return false;
+ }
+
+ private static bool ConstFitsOnUImm12Sh(ulong value)
+ {
+ return (value & ~0xfffUL) == 0 || (value & ~0xfff000UL) == 0;
+ }
+
+ private static bool IsIntrinsicWithConst(Operation operation)
+ {
+ bool isIntrinsic = IsIntrinsic(operation.Instruction);
+
+ if (isIntrinsic)
+ {
+ Intrinsic intrinsic = operation.Intrinsic;
+ IntrinsicInfo info = IntrinsicTable.GetInfo(intrinsic & ~(Intrinsic.Arm64VTypeMask | Intrinsic.Arm64VSizeMask));
+
+ // Those have integer inputs that don't support consts.
+ return info.Type != IntrinsicType.ScalarFPConvGpr &&
+ info.Type != IntrinsicType.ScalarFPConvFixedGpr &&
+ info.Type != IntrinsicType.SetRegister;
+ }
+
+ return false;
+ }
+
+ private static bool IsIntrinsic(Instruction inst)
+ {
+ return inst == Instruction.Extended;
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/CompiledFunction.cs b/src/ARMeilleure/CodeGen/CompiledFunction.cs
new file mode 100644
index 00000000..0560bf2e
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/CompiledFunction.cs
@@ -0,0 +1,68 @@
+using ARMeilleure.CodeGen.Linking;
+using ARMeilleure.CodeGen.Unwinding;
+using ARMeilleure.Translation.Cache;
+using System;
+using System.Runtime.InteropServices;
+
+namespace ARMeilleure.CodeGen
+{
+ /// <summary>
+ /// Represents a compiled function.
+ /// </summary>
+ readonly struct CompiledFunction
+ {
+ /// <summary>
+ /// Gets the machine code of the <see cref="CompiledFunction"/>.
+ /// </summary>
+ public byte[] Code { get; }
+
+ /// <summary>
+ /// Gets the <see cref="Unwinding.UnwindInfo"/> of the <see cref="CompiledFunction"/>.
+ /// </summary>
+ public UnwindInfo UnwindInfo { get; }
+
+ /// <summary>
+ /// Gets the <see cref="Linking.RelocInfo"/> of the <see cref="CompiledFunction"/>.
+ /// </summary>
+ public RelocInfo RelocInfo { get; }
+
+ /// <summary>
+ /// Initializes a new instance of the <see cref="CompiledFunction"/> struct with the specified machine code,
+ /// unwind info and relocation info.
+ /// </summary>
+ /// <param name="code">Machine code</param>
+ /// <param name="unwindInfo">Unwind info</param>
+ /// <param name="relocInfo">Relocation info</param>
+ internal CompiledFunction(byte[] code, UnwindInfo unwindInfo, RelocInfo relocInfo)
+ {
+ Code = code;
+ UnwindInfo = unwindInfo;
+ RelocInfo = relocInfo;
+ }
+
+ /// <summary>
+ /// Maps the <see cref="CompiledFunction"/> onto the <see cref="JitCache"/> and returns a delegate of type
+ /// <typeparamref name="T"/> pointing to the mapped function.
+ /// </summary>
+ /// <typeparam name="T">Type of delegate</typeparam>
+ /// <returns>A delegate of type <typeparamref name="T"/> pointing to the mapped function</returns>
+ public T Map<T>()
+ {
+ return MapWithPointer<T>(out _);
+ }
+
+ /// <summary>
+ /// Maps the <see cref="CompiledFunction"/> onto the <see cref="JitCache"/> and returns a delegate of type
+ /// <typeparamref name="T"/> pointing to the mapped function.
+ /// </summary>
+ /// <typeparam name="T">Type of delegate</typeparam>
+ /// <param name="codePointer">Pointer to the function code in memory</param>
+ /// <returns>A delegate of type <typeparamref name="T"/> pointing to the mapped function</returns>
+ public T MapWithPointer<T>(out IntPtr codePointer)
+ {
+ codePointer = JitCache.Map(this);
+
+ return Marshal.GetDelegateForFunctionPointer<T>(codePointer);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/Linking/RelocEntry.cs b/src/ARMeilleure/CodeGen/Linking/RelocEntry.cs
new file mode 100644
index 00000000..a27bfded
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Linking/RelocEntry.cs
@@ -0,0 +1,38 @@
+namespace ARMeilleure.CodeGen.Linking
+{
+ /// <summary>
+ /// Represents a relocation.
+ /// </summary>
+ readonly struct RelocEntry
+ {
+ public const int Stride = 13; // Bytes.
+
+ /// <summary>
+ /// Gets the position of the relocation.
+ /// </summary>
+ public int Position { get; }
+
+ /// <summary>
+ /// Gets the <see cref="Symbol"/> of the relocation.
+ /// </summary>
+ public Symbol Symbol { get; }
+
+ /// <summary>
+ /// Initializes a new instance of the <see cref="RelocEntry"/> struct with the specified position and
+ /// <see cref="Symbol"/>.
+ /// </summary>
+ /// <param name="position">Position of relocation</param>
+ /// <param name="symbol">Symbol of relocation</param>
+ public RelocEntry(int position, Symbol symbol)
+ {
+ Position = position;
+ Symbol = symbol;
+ }
+
+ /// <inheritdoc/>
+ public override string ToString()
+ {
+ return $"({nameof(Position)} = {Position}, {nameof(Symbol)} = {Symbol})";
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/Linking/RelocInfo.cs b/src/ARMeilleure/CodeGen/Linking/RelocInfo.cs
new file mode 100644
index 00000000..caaf08e3
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Linking/RelocInfo.cs
@@ -0,0 +1,32 @@
+using System;
+
+namespace ARMeilleure.CodeGen.Linking
+{
+ /// <summary>
+ /// Represents relocation information about a <see cref="CompiledFunction"/>.
+ /// </summary>
+ readonly struct RelocInfo
+ {
+ /// <summary>
+ /// Gets an empty <see cref="RelocInfo"/>.
+ /// </summary>
+ public static RelocInfo Empty { get; } = new RelocInfo(null);
+
+ private readonly RelocEntry[] _entries;
+
+ /// <summary>
+ /// Gets the set of <see cref="RelocEntry"/>.
+ /// </summary>
+ public ReadOnlySpan<RelocEntry> Entries => _entries;
+
+ /// <summary>
+ /// Initializes a new instance of the <see cref="RelocInfo"/> struct with the specified set of
+ /// <see cref="RelocEntry"/>.
+ /// </summary>
+ /// <param name="entries">Set of <see cref="RelocInfo"/> to use</param>
+ public RelocInfo(RelocEntry[] entries)
+ {
+ _entries = entries;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/Linking/Symbol.cs b/src/ARMeilleure/CodeGen/Linking/Symbol.cs
new file mode 100644
index 00000000..39e0c3eb
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Linking/Symbol.cs
@@ -0,0 +1,99 @@
+using System;
+
+namespace ARMeilleure.CodeGen.Linking
+{
+ /// <summary>
+ /// Represents a symbol.
+ /// </summary>
+ readonly struct Symbol
+ {
+ private readonly ulong _value;
+
+ /// <summary>
+ /// Gets the <see cref="SymbolType"/> of the <see cref="Symbol"/>.
+ /// </summary>
+ public SymbolType Type { get; }
+
+ /// <summary>
+ /// Gets the value of the <see cref="Symbol"/>.
+ /// </summary>
+ /// <exception cref="InvalidOperationException"><see cref="Type"/> is <see cref="SymbolType.None"/></exception>
+ public ulong Value
+ {
+ get
+ {
+ if (Type == SymbolType.None)
+ {
+ ThrowSymbolNone();
+ }
+
+ return _value;
+ }
+ }
+
+ /// <summary>
+ /// Initializes a new instance of the <see cref="Symbol"/> structure with the specified <see cref="SymbolType"/> and value.
+ /// </summary>
+ /// <param name="type">Type of symbol</param>
+ /// <param name="value">Value of symbol</param>
+ public Symbol(SymbolType type, ulong value)
+ {
+ (Type, _value) = (type, value);
+ }
+
+ /// <summary>
+ /// Determines if the specified <see cref="Symbol"/> instances are equal.
+ /// </summary>
+ /// <param name="a">First instance</param>
+ /// <param name="b">Second instance</param>
+ /// <returns><see langword="true"/> if equal; otherwise <see langword="false"/></returns>
+ public static bool operator ==(Symbol a, Symbol b)
+ {
+ return a.Equals(b);
+ }
+
+ /// <summary>
+ /// Determines if the specified <see cref="Symbol"/> instances are not equal.
+ /// </summary>
+ /// <param name="a">First instance</param>
+ /// <param name="b">Second instance</param>
+ /// <returns><see langword="true"/> if not equal; otherwise <see langword="false"/></returns>
+ public static bool operator !=(Symbol a, Symbol b)
+ {
+ return !(a == b);
+ }
+
+ /// <summary>
+ /// Determines if the specified <see cref="Symbol"/> is equal to this <see cref="Symbol"/> instance.
+ /// </summary>
+ /// <param name="other">Other <see cref="Symbol"/> instance</param>
+ /// <returns><see langword="true"/> if equal; otherwise <see langword="false"/></returns>
+ public bool Equals(Symbol other)
+ {
+ return other.Type == Type && other._value == _value;
+ }
+
+ /// <inheritdoc/>
+ public override bool Equals(object obj)
+ {
+ return obj is Symbol sym && Equals(sym);
+ }
+
+ /// <inheritdoc/>
+ public override int GetHashCode()
+ {
+ return HashCode.Combine(Type, _value);
+ }
+
+ /// <inheritdoc/>
+ public override string ToString()
+ {
+ return $"{Type}:{_value}";
+ }
+
+ private static void ThrowSymbolNone()
+ {
+ throw new InvalidOperationException("Symbol refers to nothing.");
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/Linking/SymbolType.cs b/src/ARMeilleure/CodeGen/Linking/SymbolType.cs
new file mode 100644
index 00000000..b05b6969
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Linking/SymbolType.cs
@@ -0,0 +1,28 @@
+namespace ARMeilleure.CodeGen.Linking
+{
+ /// <summary>
+ /// Types of <see cref="Symbol"/>.
+ /// </summary>
+ enum SymbolType : byte
+ {
+ /// <summary>
+ /// Refers to nothing, i.e no symbol.
+ /// </summary>
+ None,
+
+ /// <summary>
+ /// Refers to an entry in <see cref="Translation.Delegates"/>.
+ /// </summary>
+ DelegateTable,
+
+ /// <summary>
+ /// Refers to an entry in <see cref="Translation.Translator.FunctionTable"/>.
+ /// </summary>
+ FunctionTable,
+
+ /// <summary>
+ /// Refers to a special symbol which is handled by <see cref="Translation.PTC.Ptc.PatchCode"/>.
+ /// </summary>
+ Special
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/Optimizations/BlockPlacement.cs b/src/ARMeilleure/CodeGen/Optimizations/BlockPlacement.cs
new file mode 100644
index 00000000..9e243d37
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Optimizations/BlockPlacement.cs
@@ -0,0 +1,72 @@
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System.Diagnostics;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.CodeGen.Optimizations
+{
+ static class BlockPlacement
+ {
+ public static void RunPass(ControlFlowGraph cfg)
+ {
+ bool update = false;
+
+ BasicBlock block;
+ BasicBlock nextBlock;
+
+ BasicBlock lastBlock = cfg.Blocks.Last;
+
+ // Move cold blocks at the end of the list, so that they are emitted away from hot code.
+ for (block = cfg.Blocks.First; block != null; block = nextBlock)
+ {
+ nextBlock = block.ListNext;
+
+ if (block.Frequency == BasicBlockFrequency.Cold)
+ {
+ cfg.Blocks.Remove(block);
+ cfg.Blocks.AddLast(block);
+ }
+
+ if (block == lastBlock)
+ {
+ break;
+ }
+ }
+
+ for (block = cfg.Blocks.First; block != null; block = nextBlock)
+ {
+ nextBlock = block.ListNext;
+
+ if (block.SuccessorsCount == 2)
+ {
+ Operation branchOp = block.Operations.Last;
+
+ Debug.Assert(branchOp.Instruction == Instruction.BranchIf);
+
+ BasicBlock falseSucc = block.GetSuccessor(0);
+ BasicBlock trueSucc = block.GetSuccessor(1);
+
+ // If true successor is next block in list, invert the condition. We avoid extra branching by
+ // making the true side the fallthrough (i.e, convert it to the false side).
+ if (trueSucc == block.ListNext)
+ {
+ Comparison comp = (Comparison)branchOp.GetSource(2).AsInt32();
+ Comparison compInv = comp.Invert();
+
+ branchOp.SetSource(2, Const((int)compInv));
+
+ block.SetSuccessor(0, trueSucc);
+ block.SetSuccessor(1, falseSucc);
+
+ update = true;
+ }
+ }
+ }
+
+ if (update)
+ {
+ cfg.Update();
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs b/src/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs
new file mode 100644
index 00000000..c5a22a53
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs
@@ -0,0 +1,346 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.CodeGen.Optimizations
+{
+ static class ConstantFolding
+ {
+ public static void RunPass(Operation operation)
+ {
+ if (operation.Destination == default || operation.SourcesCount == 0)
+ {
+ return;
+ }
+
+ if (!AreAllSourcesConstant(operation))
+ {
+ return;
+ }
+
+ OperandType type = operation.Destination.Type;
+
+ switch (operation.Instruction)
+ {
+ case Instruction.Add:
+ if (operation.GetSource(0).Relocatable ||
+ operation.GetSource(1).Relocatable)
+ {
+ break;
+ }
+
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => x + y);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => x + y);
+ }
+ break;
+
+ case Instruction.BitwiseAnd:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => x & y);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => x & y);
+ }
+ break;
+
+ case Instruction.BitwiseExclusiveOr:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => x ^ y);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => x ^ y);
+ }
+ break;
+
+ case Instruction.BitwiseNot:
+ if (type == OperandType.I32)
+ {
+ EvaluateUnaryI32(operation, (x) => ~x);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateUnaryI64(operation, (x) => ~x);
+ }
+ break;
+
+ case Instruction.BitwiseOr:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => x | y);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => x | y);
+ }
+ break;
+
+ case Instruction.ConvertI64ToI32:
+ if (type == OperandType.I32)
+ {
+ EvaluateUnaryI32(operation, (x) => x);
+ }
+ break;
+
+ case Instruction.Compare:
+ if (type == OperandType.I32 &&
+ operation.GetSource(0).Type == type &&
+ operation.GetSource(1).Type == type)
+ {
+ switch ((Comparison)operation.GetSource(2).Value)
+ {
+ case Comparison.Equal:
+ EvaluateBinaryI32(operation, (x, y) => x == y ? 1 : 0);
+ break;
+ case Comparison.NotEqual:
+ EvaluateBinaryI32(operation, (x, y) => x != y ? 1 : 0);
+ break;
+ case Comparison.Greater:
+ EvaluateBinaryI32(operation, (x, y) => x > y ? 1 : 0);
+ break;
+ case Comparison.LessOrEqual:
+ EvaluateBinaryI32(operation, (x, y) => x <= y ? 1 : 0);
+ break;
+ case Comparison.GreaterUI:
+ EvaluateBinaryI32(operation, (x, y) => (uint)x > (uint)y ? 1 : 0);
+ break;
+ case Comparison.LessOrEqualUI:
+ EvaluateBinaryI32(operation, (x, y) => (uint)x <= (uint)y ? 1 : 0);
+ break;
+ case Comparison.GreaterOrEqual:
+ EvaluateBinaryI32(operation, (x, y) => x >= y ? 1 : 0);
+ break;
+ case Comparison.Less:
+ EvaluateBinaryI32(operation, (x, y) => x < y ? 1 : 0);
+ break;
+ case Comparison.GreaterOrEqualUI:
+ EvaluateBinaryI32(operation, (x, y) => (uint)x >= (uint)y ? 1 : 0);
+ break;
+ case Comparison.LessUI:
+ EvaluateBinaryI32(operation, (x, y) => (uint)x < (uint)y ? 1 : 0);
+ break;
+ }
+ }
+ break;
+
+ case Instruction.Copy:
+ if (type == OperandType.I32)
+ {
+ EvaluateUnaryI32(operation, (x) => x);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateUnaryI64(operation, (x) => x);
+ }
+ break;
+
+ case Instruction.Divide:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => y != 0 ? x / y : 0);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => y != 0 ? x / y : 0);
+ }
+ break;
+
+ case Instruction.DivideUI:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => y != 0 ? (int)((uint)x / (uint)y) : 0);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => y != 0 ? (long)((ulong)x / (ulong)y) : 0);
+ }
+ break;
+
+ case Instruction.Multiply:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => x * y);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => x * y);
+ }
+ break;
+
+ case Instruction.Negate:
+ if (type == OperandType.I32)
+ {
+ EvaluateUnaryI32(operation, (x) => -x);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateUnaryI64(operation, (x) => -x);
+ }
+ break;
+
+ case Instruction.ShiftLeft:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => x << y);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => x << (int)y);
+ }
+ break;
+
+ case Instruction.ShiftRightSI:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => x >> y);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => x >> (int)y);
+ }
+ break;
+
+ case Instruction.ShiftRightUI:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => (int)((uint)x >> y));
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => (long)((ulong)x >> (int)y));
+ }
+ break;
+
+ case Instruction.SignExtend16:
+ if (type == OperandType.I32)
+ {
+ EvaluateUnaryI32(operation, (x) => (short)x);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateUnaryI64(operation, (x) => (short)x);
+ }
+ break;
+
+ case Instruction.SignExtend32:
+ if (type == OperandType.I32)
+ {
+ EvaluateUnaryI32(operation, (x) => x);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateUnaryI64(operation, (x) => (int)x);
+ }
+ break;
+
+ case Instruction.SignExtend8:
+ if (type == OperandType.I32)
+ {
+ EvaluateUnaryI32(operation, (x) => (sbyte)x);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateUnaryI64(operation, (x) => (sbyte)x);
+ }
+ break;
+
+ case Instruction.ZeroExtend16:
+ if (type == OperandType.I32)
+ {
+ EvaluateUnaryI32(operation, (x) => (ushort)x);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateUnaryI64(operation, (x) => (ushort)x);
+ }
+ break;
+
+ case Instruction.ZeroExtend32:
+ if (type == OperandType.I32)
+ {
+ EvaluateUnaryI32(operation, (x) => x);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateUnaryI64(operation, (x) => (uint)x);
+ }
+ break;
+
+ case Instruction.ZeroExtend8:
+ if (type == OperandType.I32)
+ {
+ EvaluateUnaryI32(operation, (x) => (byte)x);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateUnaryI64(operation, (x) => (byte)x);
+ }
+ break;
+
+ case Instruction.Subtract:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => x - y);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => x - y);
+ }
+ break;
+ }
+ }
+
+ private static bool AreAllSourcesConstant(Operation operation)
+ {
+ for (int index = 0; index < operation.SourcesCount; index++)
+ {
+ Operand srcOp = operation.GetSource(index);
+
+ if (srcOp.Kind != OperandKind.Constant)
+ {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ private static void EvaluateUnaryI32(Operation operation, Func<int, int> op)
+ {
+ int x = operation.GetSource(0).AsInt32();
+
+ operation.TurnIntoCopy(Const(op(x)));
+ }
+
+ private static void EvaluateUnaryI64(Operation operation, Func<long, long> op)
+ {
+ long x = operation.GetSource(0).AsInt64();
+
+ operation.TurnIntoCopy(Const(op(x)));
+ }
+
+ private static void EvaluateBinaryI32(Operation operation, Func<int, int, int> op)
+ {
+ int x = operation.GetSource(0).AsInt32();
+ int y = operation.GetSource(1).AsInt32();
+
+ operation.TurnIntoCopy(Const(op(x, y)));
+ }
+
+ private static void EvaluateBinaryI64(Operation operation, Func<long, long, long> op)
+ {
+ long x = operation.GetSource(0).AsInt64();
+ long y = operation.GetSource(1).AsInt64();
+
+ operation.TurnIntoCopy(Const(op(x, y)));
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/Optimizations/Optimizer.cs b/src/ARMeilleure/CodeGen/Optimizations/Optimizer.cs
new file mode 100644
index 00000000..a45bb455
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Optimizations/Optimizer.cs
@@ -0,0 +1,252 @@
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.CodeGen.Optimizations
+{
+ static class Optimizer
+ {
+ public static void RunPass(ControlFlowGraph cfg)
+ {
+ // Scratch buffer used to store uses.
+ Span<Operation> buffer = default;
+
+ bool modified;
+
+ do
+ {
+ modified = false;
+
+ for (BasicBlock block = cfg.Blocks.Last; block != null; block = block.ListPrevious)
+ {
+ Operation node;
+ Operation prevNode;
+
+ for (node = block.Operations.Last; node != default; node = prevNode)
+ {
+ prevNode = node.ListPrevious;
+
+ if (IsUnused(node))
+ {
+ RemoveNode(block, node);
+
+ modified = true;
+
+ continue;
+ }
+ else if (node.Instruction == Instruction.Phi)
+ {
+ continue;
+ }
+
+ ConstantFolding.RunPass(node);
+ Simplification.RunPass(node);
+
+ if (DestIsSingleLocalVar(node))
+ {
+ if (IsPropagableCompare(node))
+ {
+ modified |= PropagateCompare(ref buffer, node);
+
+ if (modified && IsUnused(node))
+ {
+ RemoveNode(block, node);
+ }
+ }
+ else if (IsPropagableCopy(node))
+ {
+ PropagateCopy(ref buffer, node);
+
+ RemoveNode(block, node);
+
+ modified = true;
+ }
+ }
+ }
+ }
+ }
+ while (modified);
+ }
+
+ public static void RemoveUnusedNodes(ControlFlowGraph cfg)
+ {
+ bool modified;
+
+ do
+ {
+ modified = false;
+
+ for (BasicBlock block = cfg.Blocks.Last; block != null; block = block.ListPrevious)
+ {
+ Operation node;
+ Operation prevNode;
+
+ for (node = block.Operations.Last; node != default; node = prevNode)
+ {
+ prevNode = node.ListPrevious;
+
+ if (IsUnused(node))
+ {
+ RemoveNode(block, node);
+
+ modified = true;
+ }
+ }
+ }
+ }
+ while (modified);
+ }
+
+ private static bool PropagateCompare(ref Span<Operation> buffer, Operation compOp)
+ {
+ // Try to propagate Compare operations into their BranchIf uses, when these BranchIf uses are in the form
+ // of:
+ //
+ // - BranchIf %x, 0x0, Equal ;; i.e BranchIfFalse %x
+ // - BranchIf %x, 0x0, NotEqual ;; i.e BranchIfTrue %x
+ //
+ // The commutative property of Equal and NotEqual is taken into consideration as well.
+ //
+ // For example:
+ //
+ // %x = Compare %a, %b, comp
+ // BranchIf %x, 0x0, NotEqual
+ //
+ // =>
+ //
+ // BranchIf %a, %b, comp
+
+ static bool IsZeroBranch(Operation operation, out Comparison compType)
+ {
+ compType = Comparison.Equal;
+
+ if (operation.Instruction != Instruction.BranchIf)
+ {
+ return false;
+ }
+
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+ Operand comp = operation.GetSource(2);
+
+ compType = (Comparison)comp.AsInt32();
+
+ return (src1.Kind == OperandKind.Constant && src1.Value == 0) ||
+ (src2.Kind == OperandKind.Constant && src2.Value == 0);
+ }
+
+ bool modified = false;
+
+ Operand dest = compOp.Destination;
+ Operand src1 = compOp.GetSource(0);
+ Operand src2 = compOp.GetSource(1);
+ Operand comp = compOp.GetSource(2);
+
+ Comparison compType = (Comparison)comp.AsInt32();
+
+ Span<Operation> uses = dest.GetUses(ref buffer);
+
+ foreach (Operation use in uses)
+ {
+ // If operation is a BranchIf and has a constant value 0 in its RHS or LHS source operands.
+ if (IsZeroBranch(use, out Comparison otherCompType))
+ {
+ Comparison propCompType;
+
+ if (otherCompType == Comparison.NotEqual)
+ {
+ propCompType = compType;
+ }
+ else if (otherCompType == Comparison.Equal)
+ {
+ propCompType = compType.Invert();
+ }
+ else
+ {
+ continue;
+ }
+
+ use.SetSource(0, src1);
+ use.SetSource(1, src2);
+ use.SetSource(2, Const((int)propCompType));
+
+ modified = true;
+ }
+ }
+
+ return modified;
+ }
+
+ private static void PropagateCopy(ref Span<Operation> buffer, Operation copyOp)
+ {
+ // Propagate copy source operand to all uses of the destination operand.
+ Operand dest = copyOp.Destination;
+ Operand source = copyOp.GetSource(0);
+
+ Span<Operation> uses = dest.GetUses(ref buffer);
+
+ foreach (Operation use in uses)
+ {
+ for (int index = 0; index < use.SourcesCount; index++)
+ {
+ if (use.GetSource(index) == dest)
+ {
+ use.SetSource(index, source);
+ }
+ }
+ }
+ }
+
+ private static void RemoveNode(BasicBlock block, Operation node)
+ {
+ // Remove a node from the nodes list, and also remove itself
+ // from all the use lists on the operands that this node uses.
+ block.Operations.Remove(node);
+
+ for (int index = 0; index < node.SourcesCount; index++)
+ {
+ node.SetSource(index, default);
+ }
+
+ Debug.Assert(node.Destination == default || node.Destination.UsesCount == 0);
+
+ node.Destination = default;
+ }
+
+ private static bool IsUnused(Operation node)
+ {
+ return DestIsSingleLocalVar(node) && node.Destination.UsesCount == 0 && !HasSideEffects(node);
+ }
+
+ private static bool DestIsSingleLocalVar(Operation node)
+ {
+ return node.DestinationsCount == 1 && node.Destination.Kind == OperandKind.LocalVariable;
+ }
+
+ private static bool HasSideEffects(Operation node)
+ {
+ return node.Instruction == Instruction.Call
+ || node.Instruction == Instruction.Tailcall
+ || node.Instruction == Instruction.CompareAndSwap
+ || node.Instruction == Instruction.CompareAndSwap16
+ || node.Instruction == Instruction.CompareAndSwap8;
+ }
+
+ private static bool IsPropagableCompare(Operation operation)
+ {
+ return operation.Instruction == Instruction.Compare;
+ }
+
+ private static bool IsPropagableCopy(Operation operation)
+ {
+ if (operation.Instruction != Instruction.Copy)
+ {
+ return false;
+ }
+
+ return operation.Destination.Type == operation.GetSource(0).Type;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/Optimizations/Simplification.cs b/src/ARMeilleure/CodeGen/Optimizations/Simplification.cs
new file mode 100644
index 00000000..a439d642
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Optimizations/Simplification.cs
@@ -0,0 +1,183 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.CodeGen.Optimizations
+{
+ static class Simplification
+ {
+ public static void RunPass(Operation operation)
+ {
+ switch (operation.Instruction)
+ {
+ case Instruction.Add:
+ if (operation.GetSource(0).Relocatable ||
+ operation.GetSource(1).Relocatable)
+ {
+ break;
+ }
+
+ TryEliminateBinaryOpComutative(operation, 0);
+ break;
+
+ case Instruction.BitwiseAnd:
+ TryEliminateBitwiseAnd(operation);
+ break;
+
+ case Instruction.BitwiseOr:
+ TryEliminateBitwiseOr(operation);
+ break;
+
+ case Instruction.BitwiseExclusiveOr:
+ TryEliminateBitwiseExclusiveOr(operation);
+ break;
+
+ case Instruction.ConditionalSelect:
+ TryEliminateConditionalSelect(operation);
+ break;
+
+ case Instruction.Divide:
+ TryEliminateBinaryOpY(operation, 1);
+ break;
+
+ case Instruction.Multiply:
+ TryEliminateBinaryOpComutative(operation, 1);
+ break;
+
+ case Instruction.ShiftLeft:
+ case Instruction.ShiftRightSI:
+ case Instruction.ShiftRightUI:
+ case Instruction.Subtract:
+ TryEliminateBinaryOpY(operation, 0);
+ break;
+ }
+ }
+
+ private static void TryEliminateBitwiseAnd(Operation operation)
+ {
+ // Try to recognize and optimize those 3 patterns (in order):
+ // x & 0xFFFFFFFF == x, 0xFFFFFFFF & y == y,
+ // x & 0x00000000 == 0x00000000, 0x00000000 & y == 0x00000000
+ Operand x = operation.GetSource(0);
+ Operand y = operation.GetSource(1);
+
+ if (IsConstEqual(x, AllOnes(x.Type)))
+ {
+ operation.TurnIntoCopy(y);
+ }
+ else if (IsConstEqual(y, AllOnes(y.Type)))
+ {
+ operation.TurnIntoCopy(x);
+ }
+ else if (IsConstEqual(x, 0) || IsConstEqual(y, 0))
+ {
+ operation.TurnIntoCopy(Const(x.Type, 0));
+ }
+ }
+
+ private static void TryEliminateBitwiseOr(Operation operation)
+ {
+ // Try to recognize and optimize those 3 patterns (in order):
+ // x | 0x00000000 == x, 0x00000000 | y == y,
+ // x | 0xFFFFFFFF == 0xFFFFFFFF, 0xFFFFFFFF | y == 0xFFFFFFFF
+ Operand x = operation.GetSource(0);
+ Operand y = operation.GetSource(1);
+
+ if (IsConstEqual(x, 0))
+ {
+ operation.TurnIntoCopy(y);
+ }
+ else if (IsConstEqual(y, 0))
+ {
+ operation.TurnIntoCopy(x);
+ }
+ else if (IsConstEqual(x, AllOnes(x.Type)) || IsConstEqual(y, AllOnes(y.Type)))
+ {
+ operation.TurnIntoCopy(Const(AllOnes(x.Type)));
+ }
+ }
+
+ private static void TryEliminateBitwiseExclusiveOr(Operation operation)
+ {
+ // Try to recognize and optimize those 2 patterns (in order):
+ // x ^ y == 0x00000000 when x == y
+ // 0x00000000 ^ y == y, x ^ 0x00000000 == x
+ Operand x = operation.GetSource(0);
+ Operand y = operation.GetSource(1);
+
+ if (x == y && x.Type.IsInteger())
+ {
+ operation.TurnIntoCopy(Const(x.Type, 0));
+ }
+ else
+ {
+ TryEliminateBinaryOpComutative(operation, 0);
+ }
+ }
+
+ private static void TryEliminateBinaryOpY(Operation operation, ulong comparand)
+ {
+ Operand x = operation.GetSource(0);
+ Operand y = operation.GetSource(1);
+
+ if (IsConstEqual(y, comparand))
+ {
+ operation.TurnIntoCopy(x);
+ }
+ }
+
+ private static void TryEliminateBinaryOpComutative(Operation operation, ulong comparand)
+ {
+ Operand x = operation.GetSource(0);
+ Operand y = operation.GetSource(1);
+
+ if (IsConstEqual(x, comparand))
+ {
+ operation.TurnIntoCopy(y);
+ }
+ else if (IsConstEqual(y, comparand))
+ {
+ operation.TurnIntoCopy(x);
+ }
+ }
+
+ private static void TryEliminateConditionalSelect(Operation operation)
+ {
+ Operand cond = operation.GetSource(0);
+
+ if (cond.Kind != OperandKind.Constant)
+ {
+ return;
+ }
+
+ // The condition is constant, we can turn it into a copy, and select
+ // the source based on the condition value.
+ int srcIndex = cond.Value != 0 ? 1 : 2;
+
+ Operand source = operation.GetSource(srcIndex);
+
+ operation.TurnIntoCopy(source);
+ }
+
+ private static bool IsConstEqual(Operand operand, ulong comparand)
+ {
+ if (operand.Kind != OperandKind.Constant || !operand.Type.IsInteger())
+ {
+ return false;
+ }
+
+ return operand.Value == comparand;
+ }
+
+ private static ulong AllOnes(OperandType type)
+ {
+ switch (type)
+ {
+ case OperandType.I32: return ~0U;
+ case OperandType.I64: return ~0UL;
+ }
+
+ throw new ArgumentException("Invalid operand type \"" + type + "\".");
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/Optimizations/TailMerge.cs b/src/ARMeilleure/CodeGen/Optimizations/TailMerge.cs
new file mode 100644
index 00000000..e94df159
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Optimizations/TailMerge.cs
@@ -0,0 +1,83 @@
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using static ARMeilleure.IntermediateRepresentation.Operation.Factory;
+
+namespace ARMeilleure.CodeGen.Optimizations
+{
+ static class TailMerge
+ {
+ public static void RunPass(in CompilerContext cctx)
+ {
+ ControlFlowGraph cfg = cctx.Cfg;
+
+ BasicBlock mergedReturn = new(cfg.Blocks.Count);
+
+ Operand returnValue;
+ Operation returnOp;
+
+ if (cctx.FuncReturnType == OperandType.None)
+ {
+ returnValue = default;
+ returnOp = Operation(Instruction.Return, default);
+ }
+ else
+ {
+ returnValue = cfg.AllocateLocal(cctx.FuncReturnType);
+ returnOp = Operation(Instruction.Return, default, returnValue);
+ }
+
+ mergedReturn.Frequency = BasicBlockFrequency.Cold;
+ mergedReturn.Operations.AddLast(returnOp);
+
+ for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
+ {
+ Operation op = block.Operations.Last;
+
+ if (op != default && op.Instruction == Instruction.Return)
+ {
+ block.Operations.Remove(op);
+
+ if (cctx.FuncReturnType == OperandType.None)
+ {
+ PrepareMerge(block, mergedReturn);
+ }
+ else
+ {
+ Operation copyOp = Operation(Instruction.Copy, returnValue, op.GetSource(0));
+
+ PrepareMerge(block, mergedReturn).Append(copyOp);
+ }
+ }
+ }
+
+ cfg.Blocks.AddLast(mergedReturn);
+ cfg.Update();
+ }
+
+ private static BasicBlock PrepareMerge(BasicBlock from, BasicBlock to)
+ {
+ BasicBlock fromPred = from.Predecessors.Count == 1 ? from.Predecessors[0] : null;
+
+ // If the block is empty, we can try to append to the predecessor and avoid unnecessary jumps.
+ if (from.Operations.Count == 0 && fromPred != null && fromPred.SuccessorsCount == 1)
+ {
+ for (int i = 0; i < fromPred.SuccessorsCount; i++)
+ {
+ if (fromPred.GetSuccessor(i) == from)
+ {
+ fromPred.SetSuccessor(i, to);
+ }
+ }
+
+ // NOTE: `from` becomes unreachable and the call to `cfg.Update()` will remove it.
+ return fromPred;
+ }
+ else
+ {
+ from.AddSuccessor(to);
+
+ return from;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/PreAllocatorCommon.cs b/src/ARMeilleure/CodeGen/PreAllocatorCommon.cs
new file mode 100644
index 00000000..53f279fb
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/PreAllocatorCommon.cs
@@ -0,0 +1,57 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.CodeGen
+{
+ static class PreAllocatorCommon
+ {
+ public static void Propagate(ref Span<Operation> buffer, Operand dest, Operand value)
+ {
+ ReadOnlySpan<Operation> uses = dest.GetUses(ref buffer);
+
+ foreach (Operation use in uses)
+ {
+ for (int srcIndex = 0; srcIndex < use.SourcesCount; srcIndex++)
+ {
+ Operand useSrc = use.GetSource(srcIndex);
+
+ if (useSrc == dest)
+ {
+ use.SetSource(srcIndex, value);
+ }
+ else if (useSrc.Kind == OperandKind.Memory)
+ {
+ MemoryOperand memoryOp = useSrc.GetMemory();
+
+ Operand baseAddr = memoryOp.BaseAddress;
+ Operand index = memoryOp.Index;
+ bool changed = false;
+
+ if (baseAddr == dest)
+ {
+ baseAddr = value;
+ changed = true;
+ }
+
+ if (index == dest)
+ {
+ index = value;
+ changed = true;
+ }
+
+ if (changed)
+ {
+ use.SetSource(srcIndex, MemoryOp(
+ useSrc.Type,
+ baseAddr,
+ index,
+ memoryOp.Scale,
+ memoryOp.Displacement));
+ }
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/AllocationResult.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/AllocationResult.cs
new file mode 100644
index 00000000..43e5c7e2
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/RegisterAllocators/AllocationResult.cs
@@ -0,0 +1,19 @@
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ readonly struct AllocationResult
+ {
+ public int IntUsedRegisters { get; }
+ public int VecUsedRegisters { get; }
+ public int SpillRegionSize { get; }
+
+ public AllocationResult(
+ int intUsedRegisters,
+ int vecUsedRegisters,
+ int spillRegionSize)
+ {
+ IntUsedRegisters = intUsedRegisters;
+ VecUsedRegisters = vecUsedRegisters;
+ SpillRegionSize = spillRegionSize;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/CopyResolver.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/CopyResolver.cs
new file mode 100644
index 00000000..587b1a02
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/RegisterAllocators/CopyResolver.cs
@@ -0,0 +1,259 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+using System.Collections.Generic;
+
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+using static ARMeilleure.IntermediateRepresentation.Operation.Factory;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ class CopyResolver
+ {
+ private class ParallelCopy
+ {
+ private readonly struct Copy
+ {
+ public Register Dest { get; }
+ public Register Source { get; }
+
+ public OperandType Type { get; }
+
+ public Copy(Register dest, Register source, OperandType type)
+ {
+ Dest = dest;
+ Source = source;
+ Type = type;
+ }
+ }
+
+ private readonly List<Copy> _copies;
+
+ public int Count => _copies.Count;
+
+ public ParallelCopy()
+ {
+ _copies = new List<Copy>();
+ }
+
+ public void AddCopy(Register dest, Register source, OperandType type)
+ {
+ _copies.Add(new Copy(dest, source, type));
+ }
+
+ public void Sequence(List<Operation> sequence)
+ {
+ Dictionary<Register, Register> locations = new Dictionary<Register, Register>();
+ Dictionary<Register, Register> sources = new Dictionary<Register, Register>();
+
+ Dictionary<Register, OperandType> types = new Dictionary<Register, OperandType>();
+
+ Queue<Register> pendingQueue = new Queue<Register>();
+ Queue<Register> readyQueue = new Queue<Register>();
+
+ foreach (Copy copy in _copies)
+ {
+ locations[copy.Source] = copy.Source;
+ sources[copy.Dest] = copy.Source;
+ types[copy.Dest] = copy.Type;
+
+ pendingQueue.Enqueue(copy.Dest);
+ }
+
+ foreach (Copy copy in _copies)
+ {
+ // If the destination is not used anywhere, we can assign it immediately.
+ if (!locations.ContainsKey(copy.Dest))
+ {
+ readyQueue.Enqueue(copy.Dest);
+ }
+ }
+
+ while (pendingQueue.TryDequeue(out Register current))
+ {
+ Register copyDest;
+ Register origSource;
+ Register copySource;
+
+ while (readyQueue.TryDequeue(out copyDest))
+ {
+ origSource = sources[copyDest];
+ copySource = locations[origSource];
+
+ OperandType type = types[copyDest];
+
+ EmitCopy(sequence, GetRegister(copyDest, type), GetRegister(copySource, type));
+
+ locations[origSource] = copyDest;
+
+ if (origSource == copySource && sources.ContainsKey(origSource))
+ {
+ readyQueue.Enqueue(origSource);
+ }
+ }
+
+ copyDest = current;
+ origSource = sources[copyDest];
+ copySource = locations[origSource];
+
+ if (copyDest != copySource)
+ {
+ OperandType type = types[copyDest];
+
+ type = type.IsInteger() ? OperandType.I64 : OperandType.V128;
+
+ EmitXorSwap(sequence, GetRegister(copyDest, type), GetRegister(copySource, type));
+
+ locations[origSource] = copyDest;
+
+ Register swapOther = copySource;
+
+ if (copyDest != locations[sources[copySource]])
+ {
+ // Find the other swap destination register.
+ // To do that, we search all the pending registers, and pick
+ // the one where the copy source register is equal to the
+ // current destination register being processed (copyDest).
+ foreach (Register pending in pendingQueue)
+ {
+ // Is this a copy of pending <- copyDest?
+ if (copyDest == locations[sources[pending]])
+ {
+ swapOther = pending;
+
+ break;
+ }
+ }
+ }
+
+ // The value that was previously at "copyDest" now lives on
+ // "copySource" thanks to the swap, now we need to update the
+ // location for the next copy that is supposed to copy the value
+ // that used to live on "copyDest".
+ locations[sources[swapOther]] = copySource;
+ }
+ }
+ }
+
+ private static void EmitCopy(List<Operation> sequence, Operand x, Operand y)
+ {
+ sequence.Add(Operation(Instruction.Copy, x, y));
+ }
+
+ private static void EmitXorSwap(List<Operation> sequence, Operand x, Operand y)
+ {
+ sequence.Add(Operation(Instruction.BitwiseExclusiveOr, x, x, y));
+ sequence.Add(Operation(Instruction.BitwiseExclusiveOr, y, y, x));
+ sequence.Add(Operation(Instruction.BitwiseExclusiveOr, x, x, y));
+ }
+ }
+
+ private Queue<Operation> _fillQueue = null;
+ private Queue<Operation> _spillQueue = null;
+ private ParallelCopy _parallelCopy = null;
+
+ public bool HasCopy { get; private set; }
+
+ public void AddSplit(LiveInterval left, LiveInterval right)
+ {
+ if (left.Local != right.Local)
+ {
+ throw new ArgumentException("Intervals of different variables are not allowed.");
+ }
+
+ OperandType type = left.Local.Type;
+
+ if (left.IsSpilled && !right.IsSpilled)
+ {
+ // Move from the stack to a register.
+ AddSplitFill(left, right, type);
+ }
+ else if (!left.IsSpilled && right.IsSpilled)
+ {
+ // Move from a register to the stack.
+ AddSplitSpill(left, right, type);
+ }
+ else if (!left.IsSpilled && !right.IsSpilled && left.Register != right.Register)
+ {
+ // Move from one register to another.
+ AddSplitCopy(left, right, type);
+ }
+ else if (left.SpillOffset != right.SpillOffset)
+ {
+ // This would be the stack-to-stack move case, but this is not supported.
+ throw new ArgumentException("Both intervals were spilled.");
+ }
+ }
+
+ private void AddSplitFill(LiveInterval left, LiveInterval right, OperandType type)
+ {
+ if (_fillQueue == null)
+ {
+ _fillQueue = new Queue<Operation>();
+ }
+
+ Operand register = GetRegister(right.Register, type);
+ Operand offset = Const(left.SpillOffset);
+
+ _fillQueue.Enqueue(Operation(Instruction.Fill, register, offset));
+
+ HasCopy = true;
+ }
+
+ private void AddSplitSpill(LiveInterval left, LiveInterval right, OperandType type)
+ {
+ if (_spillQueue == null)
+ {
+ _spillQueue = new Queue<Operation>();
+ }
+
+ Operand offset = Const(right.SpillOffset);
+ Operand register = GetRegister(left.Register, type);
+
+ _spillQueue.Enqueue(Operation(Instruction.Spill, default, offset, register));
+
+ HasCopy = true;
+ }
+
+ private void AddSplitCopy(LiveInterval left, LiveInterval right, OperandType type)
+ {
+ if (_parallelCopy == null)
+ {
+ _parallelCopy = new ParallelCopy();
+ }
+
+ _parallelCopy.AddCopy(right.Register, left.Register, type);
+
+ HasCopy = true;
+ }
+
+ public Operation[] Sequence()
+ {
+ List<Operation> sequence = new List<Operation>();
+
+ if (_spillQueue != null)
+ {
+ while (_spillQueue.TryDequeue(out Operation spillOp))
+ {
+ sequence.Add(spillOp);
+ }
+ }
+
+ _parallelCopy?.Sequence(sequence);
+
+ if (_fillQueue != null)
+ {
+ while (_fillQueue.TryDequeue(out Operation fillOp))
+ {
+ sequence.Add(fillOp);
+ }
+ }
+
+ return sequence.ToArray();
+ }
+
+ private static Operand GetRegister(Register reg, OperandType type)
+ {
+ return Register(reg.Index, reg.Type, type);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/HybridAllocator.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/HybridAllocator.cs
new file mode 100644
index 00000000..25952c77
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/RegisterAllocators/HybridAllocator.cs
@@ -0,0 +1,454 @@
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+using System.Numerics;
+using System.Runtime.CompilerServices;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+using static ARMeilleure.IntermediateRepresentation.Operation.Factory;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ class HybridAllocator : IRegisterAllocator
+ {
+ private readonly struct BlockInfo
+ {
+ public bool HasCall { get; }
+
+ public int IntFixedRegisters { get; }
+ public int VecFixedRegisters { get; }
+
+ public BlockInfo(bool hasCall, int intFixedRegisters, int vecFixedRegisters)
+ {
+ HasCall = hasCall;
+ IntFixedRegisters = intFixedRegisters;
+ VecFixedRegisters = vecFixedRegisters;
+ }
+ }
+
+ private struct LocalInfo
+ {
+ public int Uses { get; set; }
+ public int UsesAllocated { get; set; }
+ public int Sequence { get; set; }
+ public Operand Temp { get; set; }
+ public Operand Register { get; set; }
+ public Operand SpillOffset { get; set; }
+ public OperandType Type { get; }
+
+ private int _first;
+ private int _last;
+
+ public bool IsBlockLocal => _first == _last;
+
+ public LocalInfo(OperandType type, int uses, int blkIndex)
+ {
+ Uses = uses;
+ Type = type;
+
+ UsesAllocated = 0;
+ Sequence = 0;
+ Temp = default;
+ Register = default;
+ SpillOffset = default;
+
+ _first = -1;
+ _last = -1;
+
+ SetBlockIndex(blkIndex);
+ }
+
+ public void SetBlockIndex(int blkIndex)
+ {
+ if (_first == -1 || blkIndex < _first)
+ {
+ _first = blkIndex;
+ }
+
+ if (_last == -1 || blkIndex > _last)
+ {
+ _last = blkIndex;
+ }
+ }
+ }
+
+ private const int MaxIROperands = 4;
+ // The "visited" state is stored in the MSB of the local's value.
+ private const ulong VisitedMask = 1ul << 63;
+
+ private BlockInfo[] _blockInfo;
+ private LocalInfo[] _localInfo;
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static bool IsVisited(Operand local)
+ {
+ Debug.Assert(local.Kind == OperandKind.LocalVariable);
+
+ return (local.GetValueUnsafe() & VisitedMask) != 0;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static void SetVisited(Operand local)
+ {
+ Debug.Assert(local.Kind == OperandKind.LocalVariable);
+
+ local.GetValueUnsafe() |= VisitedMask;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private ref LocalInfo GetLocalInfo(Operand local)
+ {
+ Debug.Assert(local.Kind == OperandKind.LocalVariable);
+ Debug.Assert(IsVisited(local), "Local variable not visited. Used before defined?");
+
+ return ref _localInfo[(uint)local.GetValueUnsafe() - 1];
+ }
+
+ public AllocationResult RunPass(ControlFlowGraph cfg, StackAllocator stackAlloc, RegisterMasks regMasks)
+ {
+ int intUsedRegisters = 0;
+ int vecUsedRegisters = 0;
+
+ int intFreeRegisters = regMasks.IntAvailableRegisters;
+ int vecFreeRegisters = regMasks.VecAvailableRegisters;
+
+ _blockInfo = new BlockInfo[cfg.Blocks.Count];
+ _localInfo = new LocalInfo[cfg.Blocks.Count * 3];
+
+ int localInfoCount = 0;
+
+ for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--)
+ {
+ BasicBlock block = cfg.PostOrderBlocks[index];
+
+ int intFixedRegisters = 0;
+ int vecFixedRegisters = 0;
+
+ bool hasCall = false;
+
+ for (Operation node = block.Operations.First; node != default; node = node.ListNext)
+ {
+ if (node.Instruction == Instruction.Call)
+ {
+ hasCall = true;
+ }
+
+ foreach (Operand source in node.SourcesUnsafe)
+ {
+ if (source.Kind == OperandKind.LocalVariable)
+ {
+ GetLocalInfo(source).SetBlockIndex(block.Index);
+ }
+ else if (source.Kind == OperandKind.Memory)
+ {
+ MemoryOperand memOp = source.GetMemory();
+
+ if (memOp.BaseAddress != default)
+ {
+ GetLocalInfo(memOp.BaseAddress).SetBlockIndex(block.Index);
+ }
+
+ if (memOp.Index != default)
+ {
+ GetLocalInfo(memOp.Index).SetBlockIndex(block.Index);
+ }
+ }
+ }
+
+ foreach (Operand dest in node.DestinationsUnsafe)
+ {
+ if (dest.Kind == OperandKind.LocalVariable)
+ {
+ if (IsVisited(dest))
+ {
+ GetLocalInfo(dest).SetBlockIndex(block.Index);
+ }
+ else
+ {
+ dest.NumberLocal(++localInfoCount);
+
+ if (localInfoCount > _localInfo.Length)
+ {
+ Array.Resize(ref _localInfo, localInfoCount * 2);
+ }
+
+ SetVisited(dest);
+ GetLocalInfo(dest) = new LocalInfo(dest.Type, UsesCount(dest), block.Index);
+ }
+ }
+ else if (dest.Kind == OperandKind.Register)
+ {
+ if (dest.Type.IsInteger())
+ {
+ intFixedRegisters |= 1 << dest.GetRegister().Index;
+ }
+ else
+ {
+ vecFixedRegisters |= 1 << dest.GetRegister().Index;
+ }
+ }
+ }
+ }
+
+ _blockInfo[block.Index] = new BlockInfo(hasCall, intFixedRegisters, vecFixedRegisters);
+ }
+
+ int sequence = 0;
+
+ for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--)
+ {
+ BasicBlock block = cfg.PostOrderBlocks[index];
+
+ ref BlockInfo blkInfo = ref _blockInfo[block.Index];
+
+ int intLocalFreeRegisters = intFreeRegisters & ~blkInfo.IntFixedRegisters;
+ int vecLocalFreeRegisters = vecFreeRegisters & ~blkInfo.VecFixedRegisters;
+
+ int intCallerSavedRegisters = blkInfo.HasCall ? regMasks.IntCallerSavedRegisters : 0;
+ int vecCallerSavedRegisters = blkInfo.HasCall ? regMasks.VecCallerSavedRegisters : 0;
+
+ int intSpillTempRegisters = SelectSpillTemps(
+ intCallerSavedRegisters & ~blkInfo.IntFixedRegisters,
+ intLocalFreeRegisters);
+ int vecSpillTempRegisters = SelectSpillTemps(
+ vecCallerSavedRegisters & ~blkInfo.VecFixedRegisters,
+ vecLocalFreeRegisters);
+
+ intLocalFreeRegisters &= ~(intSpillTempRegisters | intCallerSavedRegisters);
+ vecLocalFreeRegisters &= ~(vecSpillTempRegisters | vecCallerSavedRegisters);
+
+ for (Operation node = block.Operations.First; node != default; node = node.ListNext)
+ {
+ int intLocalUse = 0;
+ int vecLocalUse = 0;
+
+ Operand AllocateRegister(Operand local)
+ {
+ ref LocalInfo info = ref GetLocalInfo(local);
+
+ info.UsesAllocated++;
+
+ Debug.Assert(info.UsesAllocated <= info.Uses);
+
+ if (info.Register != default)
+ {
+ if (info.UsesAllocated == info.Uses)
+ {
+ Register reg = info.Register.GetRegister();
+
+ if (local.Type.IsInteger())
+ {
+ intLocalFreeRegisters |= 1 << reg.Index;
+ }
+ else
+ {
+ vecLocalFreeRegisters |= 1 << reg.Index;
+ }
+ }
+
+ return info.Register;
+ }
+ else
+ {
+ Operand temp = info.Temp;
+
+ if (temp == default || info.Sequence != sequence)
+ {
+ temp = local.Type.IsInteger()
+ ? GetSpillTemp(local, intSpillTempRegisters, ref intLocalUse)
+ : GetSpillTemp(local, vecSpillTempRegisters, ref vecLocalUse);
+
+ info.Sequence = sequence;
+ info.Temp = temp;
+ }
+
+ Operation fillOp = Operation(Instruction.Fill, temp, info.SpillOffset);
+
+ block.Operations.AddBefore(node, fillOp);
+
+ return temp;
+ }
+ }
+
+ bool folded = false;
+
+ // If operation is a copy of a local and that local is living on the stack, we turn the copy into
+ // a fill, instead of inserting a fill before it.
+ if (node.Instruction == Instruction.Copy)
+ {
+ Operand source = node.GetSource(0);
+
+ if (source.Kind == OperandKind.LocalVariable)
+ {
+ ref LocalInfo info = ref GetLocalInfo(source);
+
+ if (info.Register == default)
+ {
+ Operation fillOp = Operation(Instruction.Fill, node.Destination, info.SpillOffset);
+
+ block.Operations.AddBefore(node, fillOp);
+ block.Operations.Remove(node);
+
+ node = fillOp;
+
+ folded = true;
+ }
+ }
+ }
+
+ if (!folded)
+ {
+ foreach (ref Operand source in node.SourcesUnsafe)
+ {
+ if (source.Kind == OperandKind.LocalVariable)
+ {
+ source = AllocateRegister(source);
+ }
+ else if (source.Kind == OperandKind.Memory)
+ {
+ MemoryOperand memOp = source.GetMemory();
+
+ if (memOp.BaseAddress != default)
+ {
+ memOp.BaseAddress = AllocateRegister(memOp.BaseAddress);
+ }
+
+ if (memOp.Index != default)
+ {
+ memOp.Index = AllocateRegister(memOp.Index);
+ }
+ }
+ }
+ }
+
+ int intLocalAsg = 0;
+ int vecLocalAsg = 0;
+
+ foreach (ref Operand dest in node.DestinationsUnsafe)
+ {
+ if (dest.Kind != OperandKind.LocalVariable)
+ {
+ continue;
+ }
+
+ ref LocalInfo info = ref GetLocalInfo(dest);
+
+ if (info.UsesAllocated == 0)
+ {
+ int mask = dest.Type.IsInteger()
+ ? intLocalFreeRegisters
+ : vecLocalFreeRegisters;
+
+ if (info.IsBlockLocal && mask != 0)
+ {
+ int selectedReg = BitOperations.TrailingZeroCount(mask);
+
+ info.Register = Register(selectedReg, info.Type.ToRegisterType(), info.Type);
+
+ if (dest.Type.IsInteger())
+ {
+ intLocalFreeRegisters &= ~(1 << selectedReg);
+ intUsedRegisters |= 1 << selectedReg;
+ }
+ else
+ {
+ vecLocalFreeRegisters &= ~(1 << selectedReg);
+ vecUsedRegisters |= 1 << selectedReg;
+ }
+ }
+ else
+ {
+ info.Register = default;
+ info.SpillOffset = Const(stackAlloc.Allocate(dest.Type.GetSizeInBytes()));
+ }
+ }
+
+ info.UsesAllocated++;
+
+ Debug.Assert(info.UsesAllocated <= info.Uses);
+
+ if (info.Register != default)
+ {
+ dest = info.Register;
+ }
+ else
+ {
+ Operand temp = info.Temp;
+
+ if (temp == default || info.Sequence != sequence)
+ {
+ temp = dest.Type.IsInteger()
+ ? GetSpillTemp(dest, intSpillTempRegisters, ref intLocalAsg)
+ : GetSpillTemp(dest, vecSpillTempRegisters, ref vecLocalAsg);
+
+ info.Sequence = sequence;
+ info.Temp = temp;
+ }
+
+ dest = temp;
+
+ Operation spillOp = Operation(Instruction.Spill, default, info.SpillOffset, temp);
+
+ block.Operations.AddAfter(node, spillOp);
+
+ node = spillOp;
+ }
+ }
+
+ sequence++;
+
+ intUsedRegisters |= intLocalAsg | intLocalUse;
+ vecUsedRegisters |= vecLocalAsg | vecLocalUse;
+ }
+ }
+
+ return new AllocationResult(intUsedRegisters, vecUsedRegisters, stackAlloc.TotalSize);
+ }
+
+ private static int SelectSpillTemps(int mask0, int mask1)
+ {
+ int selection = 0;
+ int count = 0;
+
+ while (count < MaxIROperands && mask0 != 0)
+ {
+ int mask = mask0 & -mask0;
+
+ selection |= mask;
+
+ mask0 &= ~mask;
+
+ count++;
+ }
+
+ while (count < MaxIROperands && mask1 != 0)
+ {
+ int mask = mask1 & -mask1;
+
+ selection |= mask;
+
+ mask1 &= ~mask;
+
+ count++;
+ }
+
+ Debug.Assert(count == MaxIROperands, "No enough registers for spill temps.");
+
+ return selection;
+ }
+
+ private static Operand GetSpillTemp(Operand local, int freeMask, ref int useMask)
+ {
+ int selectedReg = BitOperations.TrailingZeroCount(freeMask & ~useMask);
+
+ useMask |= 1 << selectedReg;
+
+ return Register(selectedReg, local.Type.ToRegisterType(), local.Type);
+ }
+
+ private static int UsesCount(Operand local)
+ {
+ return local.AssignmentsCount + local.UsesCount;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/IRegisterAllocator.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/IRegisterAllocator.cs
new file mode 100644
index 00000000..8f236c25
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/RegisterAllocators/IRegisterAllocator.cs
@@ -0,0 +1,12 @@
+using ARMeilleure.Translation;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ interface IRegisterAllocator
+ {
+ AllocationResult RunPass(
+ ControlFlowGraph cfg,
+ StackAllocator stackAlloc,
+ RegisterMasks regMasks);
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs
new file mode 100644
index 00000000..d80157af
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs
@@ -0,0 +1,1101 @@
+using ARMeilleure.Common;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using System.Numerics;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ // Based on:
+ // "Linear Scan Register Allocation for the Java(tm) HotSpot Client Compiler".
+ // http://www.christianwimmer.at/Publications/Wimmer04a/Wimmer04a.pdf
+ class LinearScanAllocator : IRegisterAllocator
+ {
+ private const int InstructionGap = 2;
+ private const int InstructionGapMask = InstructionGap - 1;
+
+ private HashSet<int> _blockEdges;
+ private LiveRange[] _blockRanges;
+ private BitMap[] _blockLiveIn;
+
+ private List<LiveInterval> _intervals;
+ private LiveInterval[] _parentIntervals;
+
+ private List<(IntrusiveList<Operation>, Operation)> _operationNodes;
+ private int _operationsCount;
+
+ private class AllocationContext
+ {
+ public RegisterMasks Masks { get; }
+
+ public StackAllocator StackAlloc { get; }
+
+ public BitMap Active { get; }
+ public BitMap Inactive { get; }
+
+ public int IntUsedRegisters { get; set; }
+ public int VecUsedRegisters { get; set; }
+
+ private readonly int[] _intFreePositions;
+ private readonly int[] _vecFreePositions;
+ private readonly int _intFreePositionsCount;
+ private readonly int _vecFreePositionsCount;
+
+ public AllocationContext(StackAllocator stackAlloc, RegisterMasks masks, int intervalsCount)
+ {
+ StackAlloc = stackAlloc;
+ Masks = masks;
+
+ Active = new BitMap(Allocators.Default, intervalsCount);
+ Inactive = new BitMap(Allocators.Default, intervalsCount);
+
+ PopulateFreePositions(RegisterType.Integer, out _intFreePositions, out _intFreePositionsCount);
+ PopulateFreePositions(RegisterType.Vector, out _vecFreePositions, out _vecFreePositionsCount);
+
+ void PopulateFreePositions(RegisterType type, out int[] positions, out int count)
+ {
+ positions = new int[masks.RegistersCount];
+ count = BitOperations.PopCount((uint)masks.GetAvailableRegisters(type));
+
+ int mask = masks.GetAvailableRegisters(type);
+
+ for (int i = 0; i < positions.Length; i++)
+ {
+ if ((mask & (1 << i)) != 0)
+ {
+ positions[i] = int.MaxValue;
+ }
+ }
+ }
+ }
+
+ public void GetFreePositions(RegisterType type, in Span<int> positions, out int count)
+ {
+ if (type == RegisterType.Integer)
+ {
+ _intFreePositions.CopyTo(positions);
+
+ count = _intFreePositionsCount;
+ }
+ else
+ {
+ Debug.Assert(type == RegisterType.Vector);
+
+ _vecFreePositions.CopyTo(positions);
+
+ count = _vecFreePositionsCount;
+ }
+ }
+
+ public void MoveActiveToInactive(int bit)
+ {
+ Move(Active, Inactive, bit);
+ }
+
+ public void MoveInactiveToActive(int bit)
+ {
+ Move(Inactive, Active, bit);
+ }
+
+ private static void Move(BitMap source, BitMap dest, int bit)
+ {
+ source.Clear(bit);
+
+ dest.Set(bit);
+ }
+ }
+
+ public AllocationResult RunPass(
+ ControlFlowGraph cfg,
+ StackAllocator stackAlloc,
+ RegisterMasks regMasks)
+ {
+ NumberLocals(cfg, regMasks.RegistersCount);
+
+ var context = new AllocationContext(stackAlloc, regMasks, _intervals.Count);
+
+ BuildIntervals(cfg, context);
+
+ for (int index = 0; index < _intervals.Count; index++)
+ {
+ LiveInterval current = _intervals[index];
+
+ if (current.IsEmpty)
+ {
+ continue;
+ }
+
+ if (current.IsFixed)
+ {
+ context.Active.Set(index);
+
+ if (current.IsFixedAndUsed)
+ {
+ if (current.Register.Type == RegisterType.Integer)
+ {
+ context.IntUsedRegisters |= 1 << current.Register.Index;
+ }
+ else /* if (interval.Register.Type == RegisterType.Vector) */
+ {
+ context.VecUsedRegisters |= 1 << current.Register.Index;
+ }
+ }
+
+ continue;
+ }
+
+ AllocateInterval(context, current, index, regMasks.RegistersCount);
+ }
+
+ for (int index = regMasks.RegistersCount * 2; index < _intervals.Count; index++)
+ {
+ if (!_intervals[index].IsSpilled)
+ {
+ ReplaceLocalWithRegister(_intervals[index]);
+ }
+ }
+
+ InsertSplitCopies();
+ InsertSplitCopiesAtEdges(cfg);
+
+ return new AllocationResult(context.IntUsedRegisters, context.VecUsedRegisters, context.StackAlloc.TotalSize);
+ }
+
+ private void AllocateInterval(AllocationContext context, LiveInterval current, int cIndex, int registersCount)
+ {
+ // Check active intervals that already ended.
+ foreach (int iIndex in context.Active)
+ {
+ LiveInterval interval = _intervals[iIndex];
+
+ interval.Forward(current.GetStart());
+
+ if (interval.GetEnd() < current.GetStart())
+ {
+ context.Active.Clear(iIndex);
+ }
+ else if (!interval.Overlaps(current.GetStart()))
+ {
+ context.MoveActiveToInactive(iIndex);
+ }
+ }
+
+ // Check inactive intervals that already ended or were reactivated.
+ foreach (int iIndex in context.Inactive)
+ {
+ LiveInterval interval = _intervals[iIndex];
+
+ interval.Forward(current.GetStart());
+
+ if (interval.GetEnd() < current.GetStart())
+ {
+ context.Inactive.Clear(iIndex);
+ }
+ else if (interval.Overlaps(current.GetStart()))
+ {
+ context.MoveInactiveToActive(iIndex);
+ }
+ }
+
+ if (!TryAllocateRegWithoutSpill(context, current, cIndex, registersCount))
+ {
+ AllocateRegWithSpill(context, current, cIndex, registersCount);
+ }
+ }
+
+ private bool TryAllocateRegWithoutSpill(AllocationContext context, LiveInterval current, int cIndex, int registersCount)
+ {
+ RegisterType regType = current.Local.Type.ToRegisterType();
+
+ Span<int> freePositions = stackalloc int[registersCount];
+
+ context.GetFreePositions(regType, freePositions, out int freePositionsCount);
+
+ foreach (int iIndex in context.Active)
+ {
+ LiveInterval interval = _intervals[iIndex];
+ Register reg = interval.Register;
+
+ if (reg.Type == regType)
+ {
+ freePositions[reg.Index] = 0;
+ freePositionsCount--;
+ }
+ }
+
+ // If all registers are already active, return early. No point in inspecting the inactive set to look for
+ // holes.
+ if (freePositionsCount == 0)
+ {
+ return false;
+ }
+
+ foreach (int iIndex in context.Inactive)
+ {
+ LiveInterval interval = _intervals[iIndex];
+ Register reg = interval.Register;
+
+ ref int freePosition = ref freePositions[reg.Index];
+
+ if (reg.Type == regType && freePosition != 0)
+ {
+ int overlapPosition = interval.GetOverlapPosition(current);
+
+ if (overlapPosition != LiveInterval.NotFound && freePosition > overlapPosition)
+ {
+ freePosition = overlapPosition;
+ }
+ }
+ }
+
+ int selectedReg = GetHighestValueIndex(freePositions);
+ int selectedNextUse = freePositions[selectedReg];
+
+ // Intervals starts and ends at odd positions, unless they span an entire
+ // block, in this case they will have ranges at a even position.
+ // When a interval is loaded from the stack to a register, we can only
+ // do the split at a odd position, because otherwise the split interval
+ // that is inserted on the list to be processed may clobber a register
+ // used by the instruction at the same position as the split.
+ // The problem only happens when a interval ends exactly at this instruction,
+ // because otherwise they would interfere, and the register wouldn't be selected.
+ // When the interval is aligned and the above happens, there's no problem as
+ // the instruction that is actually with the last use is the one
+ // before that position.
+ selectedNextUse &= ~InstructionGapMask;
+
+ if (selectedNextUse <= current.GetStart())
+ {
+ return false;
+ }
+ else if (selectedNextUse < current.GetEnd())
+ {
+ LiveInterval splitChild = current.Split(selectedNextUse);
+
+ if (splitChild.UsesCount != 0)
+ {
+ Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position.");
+
+ InsertInterval(splitChild, registersCount);
+ }
+ else
+ {
+ Spill(context, splitChild);
+ }
+ }
+
+ current.Register = new Register(selectedReg, regType);
+
+ if (regType == RegisterType.Integer)
+ {
+ context.IntUsedRegisters |= 1 << selectedReg;
+ }
+ else /* if (regType == RegisterType.Vector) */
+ {
+ context.VecUsedRegisters |= 1 << selectedReg;
+ }
+
+ context.Active.Set(cIndex);
+
+ return true;
+ }
+
+ private void AllocateRegWithSpill(AllocationContext context, LiveInterval current, int cIndex, int registersCount)
+ {
+ RegisterType regType = current.Local.Type.ToRegisterType();
+
+ Span<int> usePositions = stackalloc int[registersCount];
+ Span<int> blockedPositions = stackalloc int[registersCount];
+
+ context.GetFreePositions(regType, usePositions, out _);
+ context.GetFreePositions(regType, blockedPositions, out _);
+
+ foreach (int iIndex in context.Active)
+ {
+ LiveInterval interval = _intervals[iIndex];
+ Register reg = interval.Register;
+
+ if (reg.Type == regType)
+ {
+ ref int usePosition = ref usePositions[reg.Index];
+ ref int blockedPosition = ref blockedPositions[reg.Index];
+
+ if (interval.IsFixed)
+ {
+ usePosition = 0;
+ blockedPosition = 0;
+ }
+ else
+ {
+ int nextUse = interval.NextUseAfter(current.GetStart());
+
+ if (nextUse != LiveInterval.NotFound && usePosition > nextUse)
+ {
+ usePosition = nextUse;
+ }
+ }
+ }
+ }
+
+ foreach (int iIndex in context.Inactive)
+ {
+ LiveInterval interval = _intervals[iIndex];
+ Register reg = interval.Register;
+
+ if (reg.Type == regType)
+ {
+ ref int usePosition = ref usePositions[reg.Index];
+ ref int blockedPosition = ref blockedPositions[reg.Index];
+
+ if (interval.IsFixed)
+ {
+ int overlapPosition = interval.GetOverlapPosition(current);
+
+ if (overlapPosition != LiveInterval.NotFound)
+ {
+ blockedPosition = Math.Min(blockedPosition, overlapPosition);
+ usePosition = Math.Min(usePosition, overlapPosition);
+ }
+ }
+ else if (interval.Overlaps(current))
+ {
+ int nextUse = interval.NextUseAfter(current.GetStart());
+
+ if (nextUse != LiveInterval.NotFound && usePosition > nextUse)
+ {
+ usePosition = nextUse;
+ }
+ }
+ }
+ }
+
+ int selectedReg = GetHighestValueIndex(usePositions);
+ int currentFirstUse = current.FirstUse();
+
+ Debug.Assert(currentFirstUse >= 0, "Current interval has no uses.");
+
+ if (usePositions[selectedReg] < currentFirstUse)
+ {
+ // All intervals on inactive and active are being used before current,
+ // so spill the current interval.
+ Debug.Assert(currentFirstUse > current.GetStart(), "Trying to spill a interval currently being used.");
+
+ LiveInterval splitChild = current.Split(currentFirstUse);
+
+ Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position.");
+
+ InsertInterval(splitChild, registersCount);
+
+ Spill(context, current);
+ }
+ else if (blockedPositions[selectedReg] > current.GetEnd())
+ {
+ // Spill made the register available for the entire current lifetime,
+ // so we only need to split the intervals using the selected register.
+ current.Register = new Register(selectedReg, regType);
+
+ SplitAndSpillOverlappingIntervals(context, current, registersCount);
+
+ context.Active.Set(cIndex);
+ }
+ else
+ {
+ // There are conflicts even after spill due to the use of fixed registers
+ // that can't be spilled, so we need to also split current at the point of
+ // the first fixed register use.
+ current.Register = new Register(selectedReg, regType);
+
+ int splitPosition = blockedPositions[selectedReg] & ~InstructionGapMask;
+
+ Debug.Assert(splitPosition > current.GetStart(), "Trying to split a interval at a invalid position.");
+
+ LiveInterval splitChild = current.Split(splitPosition);
+
+ if (splitChild.UsesCount != 0)
+ {
+ Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position.");
+
+ InsertInterval(splitChild, registersCount);
+ }
+ else
+ {
+ Spill(context, splitChild);
+ }
+
+ SplitAndSpillOverlappingIntervals(context, current, registersCount);
+
+ context.Active.Set(cIndex);
+ }
+ }
+
+ private static int GetHighestValueIndex(Span<int> span)
+ {
+ int highest = int.MinValue;
+
+ int selected = 0;
+
+ for (int index = 0; index < span.Length; index++)
+ {
+ int current = span[index];
+
+ if (highest < current)
+ {
+ highest = current;
+ selected = index;
+
+ if (current == int.MaxValue)
+ {
+ break;
+ }
+ }
+ }
+
+ return selected;
+ }
+
+ private void SplitAndSpillOverlappingIntervals(AllocationContext context, LiveInterval current, int registersCount)
+ {
+ foreach (int iIndex in context.Active)
+ {
+ LiveInterval interval = _intervals[iIndex];
+
+ if (!interval.IsFixed && interval.Register == current.Register)
+ {
+ SplitAndSpillOverlappingInterval(context, current, interval, registersCount);
+
+ context.Active.Clear(iIndex);
+ }
+ }
+
+ foreach (int iIndex in context.Inactive)
+ {
+ LiveInterval interval = _intervals[iIndex];
+
+ if (!interval.IsFixed && interval.Register == current.Register && interval.Overlaps(current))
+ {
+ SplitAndSpillOverlappingInterval(context, current, interval, registersCount);
+
+ context.Inactive.Clear(iIndex);
+ }
+ }
+ }
+
+ private void SplitAndSpillOverlappingInterval(
+ AllocationContext context,
+ LiveInterval current,
+ LiveInterval interval,
+ int registersCount)
+ {
+ // If there's a next use after the start of the current interval,
+ // we need to split the spilled interval twice, and re-insert it
+ // on the "pending" list to ensure that it will get a new register
+ // on that use position.
+ int nextUse = interval.NextUseAfter(current.GetStart());
+
+ LiveInterval splitChild;
+
+ if (interval.GetStart() < current.GetStart())
+ {
+ splitChild = interval.Split(current.GetStart());
+ }
+ else
+ {
+ splitChild = interval;
+ }
+
+ if (nextUse != -1)
+ {
+ Debug.Assert(nextUse > current.GetStart(), "Trying to spill a interval currently being used.");
+
+ if (nextUse > splitChild.GetStart())
+ {
+ LiveInterval right = splitChild.Split(nextUse);
+
+ Spill(context, splitChild);
+
+ splitChild = right;
+ }
+
+ InsertInterval(splitChild, registersCount);
+ }
+ else
+ {
+ Spill(context, splitChild);
+ }
+ }
+
+ private void InsertInterval(LiveInterval interval, int registersCount)
+ {
+ Debug.Assert(interval.UsesCount != 0, "Trying to insert a interval without uses.");
+ Debug.Assert(!interval.IsEmpty, "Trying to insert a empty interval.");
+ Debug.Assert(!interval.IsSpilled, "Trying to insert a spilled interval.");
+
+ int startIndex = registersCount * 2;
+
+ int insertIndex = _intervals.BinarySearch(startIndex, _intervals.Count - startIndex, interval, null);
+
+ if (insertIndex < 0)
+ {
+ insertIndex = ~insertIndex;
+ }
+
+ _intervals.Insert(insertIndex, interval);
+ }
+
+ private void Spill(AllocationContext context, LiveInterval interval)
+ {
+ Debug.Assert(!interval.IsFixed, "Trying to spill a fixed interval.");
+ Debug.Assert(interval.UsesCount == 0, "Trying to spill a interval with uses.");
+
+ // We first check if any of the siblings were spilled, if so we can reuse
+ // the stack offset. Otherwise, we allocate a new space on the stack.
+ // This prevents stack-to-stack copies being necessary for a split interval.
+ if (!interval.TrySpillWithSiblingOffset())
+ {
+ interval.Spill(context.StackAlloc.Allocate(interval.Local.Type));
+ }
+ }
+
+ private void InsertSplitCopies()
+ {
+ Dictionary<int, CopyResolver> copyResolvers = new Dictionary<int, CopyResolver>();
+
+ CopyResolver GetCopyResolver(int position)
+ {
+ if (!copyResolvers.TryGetValue(position, out CopyResolver copyResolver))
+ {
+ copyResolver = new CopyResolver();
+
+ copyResolvers.Add(position, copyResolver);
+ }
+
+ return copyResolver;
+ }
+
+ foreach (LiveInterval interval in _intervals.Where(x => x.IsSplit))
+ {
+ LiveInterval previous = interval;
+
+ foreach (LiveInterval splitChild in interval.SplitChildren())
+ {
+ int splitPosition = splitChild.GetStart();
+
+ if (!_blockEdges.Contains(splitPosition) && previous.GetEnd() == splitPosition)
+ {
+ GetCopyResolver(splitPosition).AddSplit(previous, splitChild);
+ }
+
+ previous = splitChild;
+ }
+ }
+
+ foreach (KeyValuePair<int, CopyResolver> kv in copyResolvers)
+ {
+ CopyResolver copyResolver = kv.Value;
+
+ if (!copyResolver.HasCopy)
+ {
+ continue;
+ }
+
+ int splitPosition = kv.Key;
+
+ (IntrusiveList<Operation> nodes, Operation node) = GetOperationNode(splitPosition);
+
+ Operation[] sequence = copyResolver.Sequence();
+
+ nodes.AddBefore(node, sequence[0]);
+
+ node = sequence[0];
+
+ for (int index = 1; index < sequence.Length; index++)
+ {
+ nodes.AddAfter(node, sequence[index]);
+
+ node = sequence[index];
+ }
+ }
+ }
+
+ private void InsertSplitCopiesAtEdges(ControlFlowGraph cfg)
+ {
+ int blocksCount = cfg.Blocks.Count;
+
+ bool IsSplitEdgeBlock(BasicBlock block)
+ {
+ return block.Index >= blocksCount;
+ }
+
+ // Reset iterators to beginning because GetSplitChild depends on the state of the iterator.
+ foreach (LiveInterval interval in _intervals)
+ {
+ interval.Reset();
+ }
+
+ for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
+ {
+ if (IsSplitEdgeBlock(block))
+ {
+ continue;
+ }
+
+ bool hasSingleOrNoSuccessor = block.SuccessorsCount <= 1;
+
+ for (int i = 0; i < block.SuccessorsCount; i++)
+ {
+ BasicBlock successor = block.GetSuccessor(i);
+
+ int succIndex = successor.Index;
+
+ // If the current node is a split node, then the actual successor node
+ // (the successor before the split) should be right after it.
+ if (IsSplitEdgeBlock(successor))
+ {
+ succIndex = successor.GetSuccessor(0).Index;
+ }
+
+ CopyResolver copyResolver = null;
+
+ foreach (int iIndex in _blockLiveIn[succIndex])
+ {
+ LiveInterval interval = _parentIntervals[iIndex];
+
+ if (!interval.IsSplit)
+ {
+ continue;
+ }
+
+ int lEnd = _blockRanges[block.Index].End - 1;
+ int rStart = _blockRanges[succIndex].Start;
+
+ LiveInterval left = interval.GetSplitChild(lEnd);
+ LiveInterval right = interval.GetSplitChild(rStart);
+
+ if (left != default && right != default && left != right)
+ {
+ if (copyResolver == null)
+ {
+ copyResolver = new CopyResolver();
+ }
+
+ copyResolver.AddSplit(left, right);
+ }
+ }
+
+ if (copyResolver == null || !copyResolver.HasCopy)
+ {
+ continue;
+ }
+
+ Operation[] sequence = copyResolver.Sequence();
+
+ if (hasSingleOrNoSuccessor)
+ {
+ foreach (Operation operation in sequence)
+ {
+ block.Append(operation);
+ }
+ }
+ else if (successor.Predecessors.Count == 1)
+ {
+ successor.Operations.AddFirst(sequence[0]);
+
+ Operation prependNode = sequence[0];
+
+ for (int index = 1; index < sequence.Length; index++)
+ {
+ Operation operation = sequence[index];
+
+ successor.Operations.AddAfter(prependNode, operation);
+
+ prependNode = operation;
+ }
+ }
+ else
+ {
+ // Split the critical edge.
+ BasicBlock splitBlock = cfg.SplitEdge(block, successor);
+
+ foreach (Operation operation in sequence)
+ {
+ splitBlock.Append(operation);
+ }
+ }
+ }
+ }
+ }
+
+ private void ReplaceLocalWithRegister(LiveInterval current)
+ {
+ Operand register = GetRegister(current);
+
+ foreach (int usePosition in current.UsePositions())
+ {
+ (_, Operation operation) = GetOperationNode(usePosition);
+
+ for (int index = 0; index < operation.SourcesCount; index++)
+ {
+ Operand source = operation.GetSource(index);
+
+ if (source == current.Local)
+ {
+ operation.SetSource(index, register);
+ }
+ else if (source.Kind == OperandKind.Memory)
+ {
+ MemoryOperand memOp = source.GetMemory();
+
+ if (memOp.BaseAddress == current.Local)
+ {
+ memOp.BaseAddress = register;
+ }
+
+ if (memOp.Index == current.Local)
+ {
+ memOp.Index = register;
+ }
+ }
+ }
+
+ for (int index = 0; index < operation.DestinationsCount; index++)
+ {
+ Operand dest = operation.GetDestination(index);
+
+ if (dest == current.Local)
+ {
+ operation.SetDestination(index, register);
+ }
+ }
+ }
+ }
+
+ private static Operand GetRegister(LiveInterval interval)
+ {
+ Debug.Assert(!interval.IsSpilled, "Spilled intervals are not allowed.");
+
+ return Operand.Factory.Register(
+ interval.Register.Index,
+ interval.Register.Type,
+ interval.Local.Type);
+ }
+
+ private (IntrusiveList<Operation>, Operation) GetOperationNode(int position)
+ {
+ return _operationNodes[position / InstructionGap];
+ }
+
+ private void NumberLocals(ControlFlowGraph cfg, int registersCount)
+ {
+ _operationNodes = new List<(IntrusiveList<Operation>, Operation)>();
+ _intervals = new List<LiveInterval>();
+
+ for (int index = 0; index < registersCount; index++)
+ {
+ _intervals.Add(new LiveInterval(new Register(index, RegisterType.Integer)));
+ _intervals.Add(new LiveInterval(new Register(index, RegisterType.Vector)));
+ }
+
+ // The "visited" state is stored in the MSB of the local's value.
+ const ulong VisitedMask = 1ul << 63;
+
+ bool IsVisited(Operand local)
+ {
+ return (local.GetValueUnsafe() & VisitedMask) != 0;
+ }
+
+ void SetVisited(Operand local)
+ {
+ local.GetValueUnsafe() |= VisitedMask;
+ }
+
+ _operationsCount = 0;
+
+ for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--)
+ {
+ BasicBlock block = cfg.PostOrderBlocks[index];
+
+ for (Operation node = block.Operations.First; node != default; node = node.ListNext)
+ {
+ _operationNodes.Add((block.Operations, node));
+
+ for (int i = 0; i < node.DestinationsCount; i++)
+ {
+ Operand dest = node.GetDestination(i);
+
+ if (dest.Kind == OperandKind.LocalVariable && !IsVisited(dest))
+ {
+ dest.NumberLocal(_intervals.Count);
+
+ _intervals.Add(new LiveInterval(dest));
+
+ SetVisited(dest);
+ }
+ }
+ }
+
+ _operationsCount += block.Operations.Count * InstructionGap;
+
+ if (block.Operations.Count == 0)
+ {
+ // Pretend we have a dummy instruction on the empty block.
+ _operationNodes.Add((default, default));
+
+ _operationsCount += InstructionGap;
+ }
+ }
+
+ _parentIntervals = _intervals.ToArray();
+ }
+
+ private void BuildIntervals(ControlFlowGraph cfg, AllocationContext context)
+ {
+ _blockRanges = new LiveRange[cfg.Blocks.Count];
+
+ int mapSize = _intervals.Count;
+
+ BitMap[] blkLiveGen = new BitMap[cfg.Blocks.Count];
+ BitMap[] blkLiveKill = new BitMap[cfg.Blocks.Count];
+
+ // Compute local live sets.
+ for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
+ {
+ BitMap liveGen = new BitMap(Allocators.Default, mapSize);
+ BitMap liveKill = new BitMap(Allocators.Default, mapSize);
+
+ for (Operation node = block.Operations.First; node != default; node = node.ListNext)
+ {
+ for (int i = 0; i < node.SourcesCount; i++)
+ {
+ VisitSource(node.GetSource(i));
+ }
+
+ for (int i = 0; i < node.DestinationsCount; i++)
+ {
+ VisitDestination(node.GetDestination(i));
+ }
+
+ void VisitSource(Operand source)
+ {
+ if (IsLocalOrRegister(source.Kind))
+ {
+ int id = GetOperandId(source);
+
+ if (!liveKill.IsSet(id))
+ {
+ liveGen.Set(id);
+ }
+ }
+ else if (source.Kind == OperandKind.Memory)
+ {
+ MemoryOperand memOp = source.GetMemory();
+
+ if (memOp.BaseAddress != default)
+ {
+ VisitSource(memOp.BaseAddress);
+ }
+
+ if (memOp.Index != default)
+ {
+ VisitSource(memOp.Index);
+ }
+ }
+ }
+
+ void VisitDestination(Operand dest)
+ {
+ liveKill.Set(GetOperandId(dest));
+ }
+ }
+
+ blkLiveGen [block.Index] = liveGen;
+ blkLiveKill[block.Index] = liveKill;
+ }
+
+ // Compute global live sets.
+ BitMap[] blkLiveIn = new BitMap[cfg.Blocks.Count];
+ BitMap[] blkLiveOut = new BitMap[cfg.Blocks.Count];
+
+ for (int index = 0; index < cfg.Blocks.Count; index++)
+ {
+ blkLiveIn [index] = new BitMap(Allocators.Default, mapSize);
+ blkLiveOut[index] = new BitMap(Allocators.Default, mapSize);
+ }
+
+ bool modified;
+
+ do
+ {
+ modified = false;
+
+ for (int index = 0; index < cfg.PostOrderBlocks.Length; index++)
+ {
+ BasicBlock block = cfg.PostOrderBlocks[index];
+
+ BitMap liveOut = blkLiveOut[block.Index];
+
+ for (int i = 0; i < block.SuccessorsCount; i++)
+ {
+ BasicBlock succ = block.GetSuccessor(i);
+
+ modified |= liveOut.Set(blkLiveIn[succ.Index]);
+ }
+
+ BitMap liveIn = blkLiveIn[block.Index];
+
+ liveIn.Set (liveOut);
+ liveIn.Clear(blkLiveKill[block.Index]);
+ liveIn.Set (blkLiveGen [block.Index]);
+ }
+ }
+ while (modified);
+
+ _blockLiveIn = blkLiveIn;
+
+ _blockEdges = new HashSet<int>();
+
+ // Compute lifetime intervals.
+ int operationPos = _operationsCount;
+
+ for (int index = 0; index < cfg.PostOrderBlocks.Length; index++)
+ {
+ BasicBlock block = cfg.PostOrderBlocks[index];
+
+ // We handle empty blocks by pretending they have a dummy instruction,
+ // because otherwise the block would have the same start and end position,
+ // and this is not valid.
+ int instCount = Math.Max(block.Operations.Count, 1);
+
+ int blockStart = operationPos - instCount * InstructionGap;
+ int blockEnd = operationPos;
+
+ _blockRanges[block.Index] = new LiveRange(blockStart, blockEnd);
+
+ _blockEdges.Add(blockStart);
+
+ BitMap liveOut = blkLiveOut[block.Index];
+
+ foreach (int id in liveOut)
+ {
+ _intervals[id].AddRange(blockStart, blockEnd);
+ }
+
+ if (block.Operations.Count == 0)
+ {
+ operationPos -= InstructionGap;
+
+ continue;
+ }
+
+ for (Operation node = block.Operations.Last; node != default; node = node.ListPrevious)
+ {
+ operationPos -= InstructionGap;
+
+ for (int i = 0; i < node.DestinationsCount; i++)
+ {
+ VisitDestination(node.GetDestination(i));
+ }
+
+ for (int i = 0; i < node.SourcesCount; i++)
+ {
+ VisitSource(node.GetSource(i));
+ }
+
+ if (node.Instruction == Instruction.Call)
+ {
+ AddIntervalCallerSavedReg(context.Masks.IntCallerSavedRegisters, operationPos, RegisterType.Integer);
+ AddIntervalCallerSavedReg(context.Masks.VecCallerSavedRegisters, operationPos, RegisterType.Vector);
+ }
+
+ void VisitSource(Operand source)
+ {
+ if (IsLocalOrRegister(source.Kind))
+ {
+ LiveInterval interval = _intervals[GetOperandId(source)];
+
+ interval.AddRange(blockStart, operationPos + 1);
+ interval.AddUsePosition(operationPos);
+ }
+ else if (source.Kind == OperandKind.Memory)
+ {
+ MemoryOperand memOp = source.GetMemory();
+
+ if (memOp.BaseAddress != default)
+ {
+ VisitSource(memOp.BaseAddress);
+ }
+
+ if (memOp.Index != default)
+ {
+ VisitSource(memOp.Index);
+ }
+ }
+ }
+
+ void VisitDestination(Operand dest)
+ {
+ LiveInterval interval = _intervals[GetOperandId(dest)];
+
+ if (interval.IsFixed)
+ {
+ interval.IsFixedAndUsed = true;
+ }
+
+ interval.SetStart(operationPos + 1);
+ interval.AddUsePosition(operationPos + 1);
+ }
+ }
+ }
+
+ foreach (LiveInterval interval in _parentIntervals)
+ {
+ interval.Reset();
+ }
+ }
+
+ private void AddIntervalCallerSavedReg(int mask, int operationPos, RegisterType regType)
+ {
+ while (mask != 0)
+ {
+ int regIndex = BitOperations.TrailingZeroCount(mask);
+
+ Register callerSavedReg = new Register(regIndex, regType);
+
+ LiveInterval interval = _intervals[GetRegisterId(callerSavedReg)];
+
+ interval.AddRange(operationPos + 1, operationPos + InstructionGap);
+
+ mask &= ~(1 << regIndex);
+ }
+ }
+
+ private static int GetOperandId(Operand operand)
+ {
+ if (operand.Kind == OperandKind.LocalVariable)
+ {
+ return operand.GetLocalNumber();
+ }
+ else if (operand.Kind == OperandKind.Register)
+ {
+ return GetRegisterId(operand.GetRegister());
+ }
+ else
+ {
+ throw new ArgumentException($"Invalid operand kind \"{operand.Kind}\".");
+ }
+ }
+
+ private static int GetRegisterId(Register register)
+ {
+ return (register.Index << 1) | (register.Type == RegisterType.Vector ? 1 : 0);
+ }
+
+ private static bool IsLocalOrRegister(OperandKind kind)
+ {
+ return kind == OperandKind.LocalVariable ||
+ kind == OperandKind.Register;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs
new file mode 100644
index 00000000..d739ad28
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs
@@ -0,0 +1,396 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ unsafe readonly struct LiveInterval : IComparable<LiveInterval>
+ {
+ public const int NotFound = -1;
+
+ private struct Data
+ {
+ public int End;
+ public int SpillOffset;
+
+ public LiveRange FirstRange;
+ public LiveRange PrevRange;
+ public LiveRange CurrRange;
+
+ public LiveInterval Parent;
+
+ public UseList Uses;
+ public LiveIntervalList Children;
+
+ public Operand Local;
+ public Register Register;
+
+ public bool IsFixed;
+ public bool IsFixedAndUsed;
+ }
+
+ private readonly Data* _data;
+
+ private ref int End => ref _data->End;
+ private ref LiveRange FirstRange => ref _data->FirstRange;
+ private ref LiveRange CurrRange => ref _data->CurrRange;
+ private ref LiveRange PrevRange => ref _data->PrevRange;
+ private ref LiveInterval Parent => ref _data->Parent;
+ private ref UseList Uses => ref _data->Uses;
+ private ref LiveIntervalList Children => ref _data->Children;
+
+ public Operand Local => _data->Local;
+ public ref Register Register => ref _data->Register;
+ public ref int SpillOffset => ref _data->SpillOffset;
+
+ public bool IsFixed => _data->IsFixed;
+ public ref bool IsFixedAndUsed => ref _data->IsFixedAndUsed;
+ public bool IsEmpty => FirstRange == default;
+ public bool IsSplit => Children.Count != 0;
+ public bool IsSpilled => SpillOffset != -1;
+
+ public int UsesCount => Uses.Count;
+
+ public LiveInterval(Operand local = default, LiveInterval parent = default)
+ {
+ _data = Allocators.LiveIntervals.Allocate<Data>();
+ *_data = default;
+
+ _data->IsFixed = false;
+ _data->Local = local;
+
+ Parent = parent == default ? this : parent;
+ Uses = new UseList();
+ Children = new LiveIntervalList();
+
+ FirstRange = default;
+ CurrRange = default;
+ PrevRange = default;
+
+ SpillOffset = -1;
+ }
+
+ public LiveInterval(Register register) : this(local: default, parent: default)
+ {
+ _data->IsFixed = true;
+
+ Register = register;
+ }
+
+ public void Reset()
+ {
+ PrevRange = default;
+ CurrRange = FirstRange;
+ }
+
+ public void Forward(int position)
+ {
+ LiveRange prev = PrevRange;
+ LiveRange curr = CurrRange;
+
+ while (curr != default && curr.Start < position && !curr.Overlaps(position))
+ {
+ prev = curr;
+ curr = curr.Next;
+ }
+
+ PrevRange = prev;
+ CurrRange = curr;
+ }
+
+ public int GetStart()
+ {
+ Debug.Assert(!IsEmpty, "Empty LiveInterval cannot have a start position.");
+
+ return FirstRange.Start;
+ }
+
+ public void SetStart(int position)
+ {
+ if (FirstRange != default)
+ {
+ Debug.Assert(position != FirstRange.End);
+
+ FirstRange.Start = position;
+ }
+ else
+ {
+ FirstRange = new LiveRange(position, position + 1);
+ End = position + 1;
+ }
+ }
+
+ public int GetEnd()
+ {
+ Debug.Assert(!IsEmpty, "Empty LiveInterval cannot have an end position.");
+
+ return End;
+ }
+
+ public void AddRange(int start, int end)
+ {
+ Debug.Assert(start < end, $"Invalid range start position {start}, {end}");
+
+ if (FirstRange != default)
+ {
+ // If the new range ends exactly where the first range start, then coalesce together.
+ if (end == FirstRange.Start)
+ {
+ FirstRange.Start = start;
+
+ return;
+ }
+ // If the new range is already contained, then coalesce together.
+ else if (FirstRange.Overlaps(start, end))
+ {
+ FirstRange.Start = Math.Min(FirstRange.Start, start);
+ FirstRange.End = Math.Max(FirstRange.End, end);
+ End = Math.Max(End, end);
+
+ Debug.Assert(FirstRange.Next == default || !FirstRange.Overlaps(FirstRange.Next));
+ return;
+ }
+ }
+
+ FirstRange = new LiveRange(start, end, FirstRange);
+ End = Math.Max(End, end);
+
+ Debug.Assert(FirstRange.Next == default || !FirstRange.Overlaps(FirstRange.Next));
+ }
+
+ public void AddUsePosition(int position)
+ {
+ Uses.Add(position);
+ }
+
+ public bool Overlaps(int position)
+ {
+ LiveRange curr = CurrRange;
+
+ while (curr != default && curr.Start <= position)
+ {
+ if (curr.Overlaps(position))
+ {
+ return true;
+ }
+
+ curr = curr.Next;
+ }
+
+ return false;
+ }
+
+ public bool Overlaps(LiveInterval other)
+ {
+ return GetOverlapPosition(other) != NotFound;
+ }
+
+ public int GetOverlapPosition(LiveInterval other)
+ {
+ LiveRange a = CurrRange;
+ LiveRange b = other.CurrRange;
+
+ while (a != default)
+ {
+ while (b != default && b.Start < a.Start)
+ {
+ if (a.Overlaps(b))
+ {
+ return a.Start;
+ }
+
+ b = b.Next;
+ }
+
+ if (b == default)
+ {
+ break;
+ }
+ else if (a.Overlaps(b))
+ {
+ return a.Start;
+ }
+
+ a = a.Next;
+ }
+
+ return NotFound;
+ }
+
+ public ReadOnlySpan<LiveInterval> SplitChildren()
+ {
+ return Parent.Children.Span;
+ }
+
+ public ReadOnlySpan<int> UsePositions()
+ {
+ return Uses.Span;
+ }
+
+ public int FirstUse()
+ {
+ return Uses.FirstUse;
+ }
+
+ public int NextUseAfter(int position)
+ {
+ return Uses.NextUse(position);
+ }
+
+ public LiveInterval Split(int position)
+ {
+ LiveInterval result = new(Local, Parent);
+ result.End = End;
+
+ LiveRange prev = PrevRange;
+ LiveRange curr = CurrRange;
+
+ while (curr != default && curr.Start < position && !curr.Overlaps(position))
+ {
+ prev = curr;
+ curr = curr.Next;
+ }
+
+ if (curr.Start >= position)
+ {
+ prev.Next = default;
+
+ result.FirstRange = curr;
+
+ End = prev.End;
+ }
+ else
+ {
+ result.FirstRange = new LiveRange(position, curr.End, curr.Next);
+
+ curr.End = position;
+ curr.Next = default;
+
+ End = curr.End;
+ }
+
+ result.Uses = Uses.Split(position);
+
+ AddSplitChild(result);
+
+ Debug.Assert(!IsEmpty, "Left interval is empty after split.");
+ Debug.Assert(!result.IsEmpty, "Right interval is empty after split.");
+
+ // Make sure the iterator in the new split is pointing to the start.
+ result.Reset();
+
+ return result;
+ }
+
+ private void AddSplitChild(LiveInterval child)
+ {
+ Debug.Assert(!child.IsEmpty, "Trying to insert an empty interval.");
+
+ Parent.Children.Add(child);
+ }
+
+ public LiveInterval GetSplitChild(int position)
+ {
+ if (Overlaps(position))
+ {
+ return this;
+ }
+
+ foreach (LiveInterval splitChild in SplitChildren())
+ {
+ if (splitChild.Overlaps(position))
+ {
+ return splitChild;
+ }
+ else if (splitChild.GetStart() > position)
+ {
+ break;
+ }
+ }
+
+ return default;
+ }
+
+ public bool TrySpillWithSiblingOffset()
+ {
+ foreach (LiveInterval splitChild in SplitChildren())
+ {
+ if (splitChild.IsSpilled)
+ {
+ Spill(splitChild.SpillOffset);
+
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ public void Spill(int offset)
+ {
+ SpillOffset = offset;
+ }
+
+ public int CompareTo(LiveInterval interval)
+ {
+ if (FirstRange == default || interval.FirstRange == default)
+ {
+ return 0;
+ }
+
+ return GetStart().CompareTo(interval.GetStart());
+ }
+
+ public bool Equals(LiveInterval interval)
+ {
+ return interval._data == _data;
+ }
+
+ public override bool Equals(object obj)
+ {
+ return obj is LiveInterval interval && Equals(interval);
+ }
+
+ public static bool operator ==(LiveInterval a, LiveInterval b)
+ {
+ return a.Equals(b);
+ }
+
+ public static bool operator !=(LiveInterval a, LiveInterval b)
+ {
+ return !a.Equals(b);
+ }
+
+ public override int GetHashCode()
+ {
+ return HashCode.Combine((IntPtr)_data);
+ }
+
+ public override string ToString()
+ {
+ LiveInterval self = this;
+
+ IEnumerable<string> GetRanges()
+ {
+ LiveRange curr = self.CurrRange;
+
+ while (curr != default)
+ {
+ if (curr == self.CurrRange)
+ {
+ yield return "*" + curr;
+ }
+ else
+ {
+ yield return curr.ToString();
+ }
+
+ curr = curr.Next;
+ }
+ }
+
+ return string.Join(", ", GetRanges());
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/LiveIntervalList.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/LiveIntervalList.cs
new file mode 100644
index 00000000..06b979ea
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/RegisterAllocators/LiveIntervalList.cs
@@ -0,0 +1,40 @@
+using System;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ unsafe struct LiveIntervalList
+ {
+ private LiveInterval* _items;
+ private int _count;
+ private int _capacity;
+
+ public int Count => _count;
+ public Span<LiveInterval> Span => new(_items, _count);
+
+ public void Add(LiveInterval interval)
+ {
+ if (_count + 1 > _capacity)
+ {
+ var oldSpan = Span;
+
+ _capacity = Math.Max(4, _capacity * 2);
+ _items = Allocators.References.Allocate<LiveInterval>((uint)_capacity);
+
+ var newSpan = Span;
+
+ oldSpan.CopyTo(newSpan);
+ }
+
+ int position = interval.GetStart();
+ int i = _count - 1;
+
+ while (i >= 0 && _items[i].GetStart() > position)
+ {
+ _items[i + 1] = _items[i--];
+ }
+
+ _items[i + 1] = interval;
+ _count++;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/LiveRange.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/LiveRange.cs
new file mode 100644
index 00000000..e38b5190
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/RegisterAllocators/LiveRange.cs
@@ -0,0 +1,74 @@
+using System;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ unsafe readonly struct LiveRange : IEquatable<LiveRange>
+ {
+ private struct Data
+ {
+ public int Start;
+ public int End;
+ public LiveRange Next;
+ }
+
+ private readonly Data* _data;
+
+ public ref int Start => ref _data->Start;
+ public ref int End => ref _data->End;
+ public ref LiveRange Next => ref _data->Next;
+
+ public LiveRange(int start, int end, LiveRange next = default)
+ {
+ _data = Allocators.LiveRanges.Allocate<Data>();
+
+ Start = start;
+ End = end;
+ Next = next;
+ }
+
+ public bool Overlaps(int start, int end)
+ {
+ return Start < end && start < End;
+ }
+
+ public bool Overlaps(LiveRange range)
+ {
+ return Start < range.End && range.Start < End;
+ }
+
+ public bool Overlaps(int position)
+ {
+ return position >= Start && position < End;
+ }
+
+ public bool Equals(LiveRange range)
+ {
+ return range._data == _data;
+ }
+
+ public override bool Equals(object obj)
+ {
+ return obj is LiveRange range && Equals(range);
+ }
+
+ public static bool operator ==(LiveRange a, LiveRange b)
+ {
+ return a.Equals(b);
+ }
+
+ public static bool operator !=(LiveRange a, LiveRange b)
+ {
+ return !a.Equals(b);
+ }
+
+ public override int GetHashCode()
+ {
+ return HashCode.Combine((IntPtr)_data);
+ }
+
+ public override string ToString()
+ {
+ return $"[{Start}, {End})";
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs
new file mode 100644
index 00000000..bc948f95
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs
@@ -0,0 +1,50 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ readonly struct RegisterMasks
+ {
+ public int IntAvailableRegisters { get; }
+ public int VecAvailableRegisters { get; }
+ public int IntCallerSavedRegisters { get; }
+ public int VecCallerSavedRegisters { get; }
+ public int IntCalleeSavedRegisters { get; }
+ public int VecCalleeSavedRegisters { get; }
+ public int RegistersCount { get; }
+
+ public RegisterMasks(
+ int intAvailableRegisters,
+ int vecAvailableRegisters,
+ int intCallerSavedRegisters,
+ int vecCallerSavedRegisters,
+ int intCalleeSavedRegisters,
+ int vecCalleeSavedRegisters,
+ int registersCount)
+ {
+ IntAvailableRegisters = intAvailableRegisters;
+ VecAvailableRegisters = vecAvailableRegisters;
+ IntCallerSavedRegisters = intCallerSavedRegisters;
+ VecCallerSavedRegisters = vecCallerSavedRegisters;
+ IntCalleeSavedRegisters = intCalleeSavedRegisters;
+ VecCalleeSavedRegisters = vecCalleeSavedRegisters;
+ RegistersCount = registersCount;
+ }
+
+ public int GetAvailableRegisters(RegisterType type)
+ {
+ if (type == RegisterType.Integer)
+ {
+ return IntAvailableRegisters;
+ }
+ else if (type == RegisterType.Vector)
+ {
+ return VecAvailableRegisters;
+ }
+ else
+ {
+ throw new ArgumentException($"Invalid register type \"{type}\".");
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/StackAllocator.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/StackAllocator.cs
new file mode 100644
index 00000000..038312fe
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/RegisterAllocators/StackAllocator.cs
@@ -0,0 +1,25 @@
+using ARMeilleure.IntermediateRepresentation;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ class StackAllocator
+ {
+ private int _offset;
+
+ public int TotalSize => _offset;
+
+ public int Allocate(OperandType type)
+ {
+ return Allocate(type.GetSizeInBytes());
+ }
+
+ public int Allocate(int sizeInBytes)
+ {
+ int offset = _offset;
+
+ _offset += sizeInBytes;
+
+ return offset;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/UseList.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/UseList.cs
new file mode 100644
index 00000000..c89f0854
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/RegisterAllocators/UseList.cs
@@ -0,0 +1,84 @@
+using System;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ unsafe struct UseList
+ {
+ private int* _items;
+ private int _capacity;
+ private int _count;
+
+ public int Count => _count;
+ public int FirstUse => _count > 0 ? _items[_count - 1] : LiveInterval.NotFound;
+ public Span<int> Span => new(_items, _count);
+
+ public void Add(int position)
+ {
+ if (_count + 1 > _capacity)
+ {
+ var oldSpan = Span;
+
+ _capacity = Math.Max(4, _capacity * 2);
+ _items = Allocators.Default.Allocate<int>((uint)_capacity);
+
+ var newSpan = Span;
+
+ oldSpan.CopyTo(newSpan);
+ }
+
+ // Use positions are usually inserted in descending order, so inserting in descending order is faster,
+ // since the number of half exchanges is reduced.
+ int i = _count - 1;
+
+ while (i >= 0 && _items[i] < position)
+ {
+ _items[i + 1] = _items[i--];
+ }
+
+ _items[i + 1] = position;
+ _count++;
+ }
+
+ public int NextUse(int position)
+ {
+ int index = NextUseIndex(position);
+
+ return index != LiveInterval.NotFound ? _items[index] : LiveInterval.NotFound;
+ }
+
+ public int NextUseIndex(int position)
+ {
+ int i = _count - 1;
+
+ if (i == -1 || position > _items[0])
+ {
+ return LiveInterval.NotFound;
+ }
+
+ while (i >= 0 && _items[i] < position)
+ {
+ i--;
+ }
+
+ return i;
+ }
+
+ public UseList Split(int position)
+ {
+ int index = NextUseIndex(position);
+
+ // Since the list is in descending order, the new split list takes the front of the list and the current
+ // list takes the back of the list.
+ UseList result = new();
+ result._count = index + 1;
+ result._capacity = result._count;
+ result._items = _items;
+
+ _count = _count - result._count;
+ _capacity = _count;
+ _items = _items + result._count;
+
+ return result;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/Unwinding/UnwindInfo.cs b/src/ARMeilleure/CodeGen/Unwinding/UnwindInfo.cs
new file mode 100644
index 00000000..3d0bc21d
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Unwinding/UnwindInfo.cs
@@ -0,0 +1,16 @@
+namespace ARMeilleure.CodeGen.Unwinding
+{
+ struct UnwindInfo
+ {
+ public const int Stride = 4; // Bytes.
+
+ public UnwindPushEntry[] PushEntries { get; }
+ public int PrologSize { get; }
+
+ public UnwindInfo(UnwindPushEntry[] pushEntries, int prologSize)
+ {
+ PushEntries = pushEntries;
+ PrologSize = prologSize;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/Unwinding/UnwindPseudoOp.cs b/src/ARMeilleure/CodeGen/Unwinding/UnwindPseudoOp.cs
new file mode 100644
index 00000000..4a8288a2
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Unwinding/UnwindPseudoOp.cs
@@ -0,0 +1,11 @@
+namespace ARMeilleure.CodeGen.Unwinding
+{
+ enum UnwindPseudoOp
+ {
+ PushReg = 0,
+ SetFrame = 1,
+ AllocStack = 2,
+ SaveReg = 3,
+ SaveXmm128 = 4
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/Unwinding/UnwindPushEntry.cs b/src/ARMeilleure/CodeGen/Unwinding/UnwindPushEntry.cs
new file mode 100644
index 00000000..fd8ea402
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Unwinding/UnwindPushEntry.cs
@@ -0,0 +1,20 @@
+namespace ARMeilleure.CodeGen.Unwinding
+{
+ struct UnwindPushEntry
+ {
+ public const int Stride = 16; // Bytes.
+
+ public UnwindPseudoOp PseudoOp { get; }
+ public int PrologOffset { get; }
+ public int RegIndex { get; }
+ public int StackOffsetOrAllocSize { get; }
+
+ public UnwindPushEntry(UnwindPseudoOp pseudoOp, int prologOffset, int regIndex = -1, int stackOffsetOrAllocSize = -1)
+ {
+ PseudoOp = pseudoOp;
+ PrologOffset = prologOffset;
+ RegIndex = regIndex;
+ StackOffsetOrAllocSize = stackOffsetOrAllocSize;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/X86/Assembler.cs b/src/ARMeilleure/CodeGen/X86/Assembler.cs
new file mode 100644
index 00000000..67736a31
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/Assembler.cs
@@ -0,0 +1,1559 @@
+using ARMeilleure.CodeGen.Linking;
+using ARMeilleure.IntermediateRepresentation;
+using Ryujinx.Common.Memory;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+using System.Runtime.InteropServices;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ partial class Assembler
+ {
+ private const int ReservedBytesForJump = 1;
+
+ private const int OpModRMBits = 24;
+
+ private const byte RexPrefix = 0x40;
+ private const byte RexWPrefix = 0x48;
+ private const byte LockPrefix = 0xf0;
+
+ private const int MaxRegNumber = 15;
+
+ private struct Jump
+ {
+ public bool IsConditional { get; }
+ public X86Condition Condition { get; }
+ public Operand JumpLabel { get; }
+ public long? JumpTarget { get; set; }
+ public long JumpPosition { get; }
+ public long Offset { get; set; }
+ public int InstSize { get; set; }
+
+ public Jump(Operand jumpLabel, long jumpPosition)
+ {
+ IsConditional = false;
+ Condition = 0;
+ JumpLabel = jumpLabel;
+ JumpTarget = null;
+ JumpPosition = jumpPosition;
+
+ Offset = 0;
+ InstSize = 0;
+ }
+
+ public Jump(X86Condition condition, Operand jumpLabel, long jumpPosition)
+ {
+ IsConditional = true;
+ Condition = condition;
+ JumpLabel = jumpLabel;
+ JumpTarget = null;
+ JumpPosition = jumpPosition;
+
+ Offset = 0;
+ InstSize = 0;
+ }
+ }
+
+ private struct Reloc
+ {
+ public int JumpIndex { get; set; }
+ public int Position { get; set; }
+ public Symbol Symbol { get; set; }
+ }
+
+ private readonly List<Jump> _jumps;
+ private readonly List<Reloc> _relocs;
+ private readonly Dictionary<Operand, long> _labels;
+ private readonly Stream _stream;
+
+ public bool HasRelocs => _relocs != null;
+
+ public Assembler(Stream stream, bool relocatable)
+ {
+ _stream = stream;
+ _labels = new Dictionary<Operand, long>();
+ _jumps = new List<Jump>();
+
+ _relocs = relocatable ? new List<Reloc>() : null;
+ }
+
+ public void MarkLabel(Operand label)
+ {
+ _labels.Add(label, _stream.Position);
+ }
+
+ public void Add(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Add);
+ }
+
+ public void Addsd(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Addsd);
+ }
+
+ public void Addss(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Addss);
+ }
+
+ public void And(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.And);
+ }
+
+ public void Bsr(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Bsr);
+ }
+
+ public void Bswap(Operand dest)
+ {
+ WriteInstruction(dest, default, dest.Type, X86Instruction.Bswap);
+ }
+
+ public void Call(Operand dest)
+ {
+ WriteInstruction(dest, default, OperandType.None, X86Instruction.Call);
+ }
+
+ public void Cdq()
+ {
+ WriteByte(0x99);
+ }
+
+ public void Cmovcc(Operand dest, Operand source, OperandType type, X86Condition condition)
+ {
+ ref readonly InstructionInfo info = ref _instTable[(int)X86Instruction.Cmovcc];
+
+ WriteOpCode(dest, default, source, type, info.Flags, info.OpRRM | (int)condition, rrm: true);
+ }
+
+ public void Cmp(Operand src1, Operand src2, OperandType type)
+ {
+ WriteInstruction(src1, src2, type, X86Instruction.Cmp);
+ }
+
+ public void Cqo()
+ {
+ WriteByte(0x48);
+ WriteByte(0x99);
+ }
+
+ public void Cmpxchg(Operand memOp, Operand src)
+ {
+ Debug.Assert(memOp.Kind == OperandKind.Memory);
+
+ WriteByte(LockPrefix);
+
+ WriteInstruction(memOp, src, src.Type, X86Instruction.Cmpxchg);
+ }
+
+ public void Cmpxchg16(Operand memOp, Operand src)
+ {
+ Debug.Assert(memOp.Kind == OperandKind.Memory);
+
+ WriteByte(LockPrefix);
+ WriteByte(0x66);
+
+ WriteInstruction(memOp, src, src.Type, X86Instruction.Cmpxchg);
+ }
+
+ public void Cmpxchg16b(Operand memOp)
+ {
+ Debug.Assert(memOp.Kind == OperandKind.Memory);
+
+ WriteByte(LockPrefix);
+
+ WriteInstruction(memOp, default, OperandType.None, X86Instruction.Cmpxchg16b);
+ }
+
+ public void Cmpxchg8(Operand memOp, Operand src)
+ {
+ Debug.Assert(memOp.Kind == OperandKind.Memory);
+
+ WriteByte(LockPrefix);
+
+ WriteInstruction(memOp, src, src.Type, X86Instruction.Cmpxchg8);
+ }
+
+ public void Comisd(Operand src1, Operand src2)
+ {
+ WriteInstruction(src1, default, src2, X86Instruction.Comisd);
+ }
+
+ public void Comiss(Operand src1, Operand src2)
+ {
+ WriteInstruction(src1, default, src2, X86Instruction.Comiss);
+ }
+
+ public void Cvtsd2ss(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Cvtsd2ss);
+ }
+
+ public void Cvtsi2sd(Operand dest, Operand src1, Operand src2, OperandType type)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Cvtsi2sd, type);
+ }
+
+ public void Cvtsi2ss(Operand dest, Operand src1, Operand src2, OperandType type)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Cvtsi2ss, type);
+ }
+
+ public void Cvtss2sd(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Cvtss2sd);
+ }
+
+ public void Div(Operand source)
+ {
+ WriteInstruction(default, source, source.Type, X86Instruction.Div);
+ }
+
+ public void Divsd(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Divsd);
+ }
+
+ public void Divss(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Divss);
+ }
+
+ public void Idiv(Operand source)
+ {
+ WriteInstruction(default, source, source.Type, X86Instruction.Idiv);
+ }
+
+ public void Imul(Operand source)
+ {
+ WriteInstruction(default, source, source.Type, X86Instruction.Imul128);
+ }
+
+ public void Imul(Operand dest, Operand source, OperandType type)
+ {
+ if (source.Kind != OperandKind.Register)
+ {
+ throw new ArgumentException($"Invalid source operand kind \"{source.Kind}\".");
+ }
+
+ WriteInstruction(dest, source, type, X86Instruction.Imul);
+ }
+
+ public void Imul(Operand dest, Operand src1, Operand src2, OperandType type)
+ {
+ ref readonly InstructionInfo info = ref _instTable[(int)X86Instruction.Imul];
+
+ if (src2.Kind != OperandKind.Constant)
+ {
+ throw new ArgumentException($"Invalid source 2 operand kind \"{src2.Kind}\".");
+ }
+
+ if (IsImm8(src2.Value, src2.Type) && info.OpRMImm8 != BadOp)
+ {
+ WriteOpCode(dest, default, src1, type, info.Flags, info.OpRMImm8, rrm: true);
+
+ WriteByte(src2.AsByte());
+ }
+ else if (IsImm32(src2.Value, src2.Type) && info.OpRMImm32 != BadOp)
+ {
+ WriteOpCode(dest, default, src1, type, info.Flags, info.OpRMImm32, rrm: true);
+
+ WriteInt32(src2.AsInt32());
+ }
+ else
+ {
+ throw new ArgumentException($"Failed to encode constant 0x{src2.Value:X}.");
+ }
+ }
+
+ public void Insertps(Operand dest, Operand src1, Operand src2, byte imm)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Insertps);
+
+ WriteByte(imm);
+ }
+
+ public void Jcc(X86Condition condition, Operand dest)
+ {
+ if (dest.Kind == OperandKind.Label)
+ {
+ _jumps.Add(new Jump(condition, dest, _stream.Position));
+
+ // ReservedBytesForJump
+ WriteByte(0);
+ }
+ else
+ {
+ throw new ArgumentException("Destination operand must be of kind Label", nameof(dest));
+ }
+ }
+
+ public void Jcc(X86Condition condition, long offset)
+ {
+ if (ConstFitsOnS8(offset))
+ {
+ WriteByte((byte)(0x70 | (int)condition));
+
+ WriteByte((byte)offset);
+ }
+ else if (ConstFitsOnS32(offset))
+ {
+ WriteByte(0x0f);
+ WriteByte((byte)(0x80 | (int)condition));
+
+ WriteInt32((int)offset);
+ }
+ else
+ {
+ throw new ArgumentOutOfRangeException(nameof(offset));
+ }
+ }
+
+ public void Jmp(long offset)
+ {
+ if (ConstFitsOnS8(offset))
+ {
+ WriteByte(0xeb);
+
+ WriteByte((byte)offset);
+ }
+ else if (ConstFitsOnS32(offset))
+ {
+ WriteByte(0xe9);
+
+ WriteInt32((int)offset);
+ }
+ else
+ {
+ throw new ArgumentOutOfRangeException(nameof(offset));
+ }
+ }
+
+ public void Jmp(Operand dest)
+ {
+ if (dest.Kind == OperandKind.Label)
+ {
+ _jumps.Add(new Jump(dest, _stream.Position));
+
+ // ReservedBytesForJump
+ WriteByte(0);
+ }
+ else
+ {
+ WriteInstruction(dest, default, OperandType.None, X86Instruction.Jmp);
+ }
+ }
+
+ public void Ldmxcsr(Operand dest)
+ {
+ WriteInstruction(dest, default, OperandType.I32, X86Instruction.Ldmxcsr);
+ }
+
+ public void Lea(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Lea);
+ }
+
+ public void LockOr(Operand dest, Operand source, OperandType type)
+ {
+ WriteByte(LockPrefix);
+ WriteInstruction(dest, source, type, X86Instruction.Or);
+ }
+
+ public void Mov(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Mov);
+ }
+
+ public void Mov16(Operand dest, Operand source)
+ {
+ WriteInstruction(dest, source, OperandType.None, X86Instruction.Mov16);
+ }
+
+ public void Mov8(Operand dest, Operand source)
+ {
+ WriteInstruction(dest, source, OperandType.None, X86Instruction.Mov8);
+ }
+
+ public void Movd(Operand dest, Operand source)
+ {
+ ref readonly InstructionInfo info = ref _instTable[(int)X86Instruction.Movd];
+
+ if (source.Type.IsInteger() || source.Kind == OperandKind.Memory)
+ {
+ WriteOpCode(dest, default, source, OperandType.None, info.Flags, info.OpRRM, rrm: true);
+ }
+ else
+ {
+ WriteOpCode(dest, default, source, OperandType.None, info.Flags, info.OpRMR);
+ }
+ }
+
+ public void Movdqu(Operand dest, Operand source)
+ {
+ WriteInstruction(dest, default, source, X86Instruction.Movdqu);
+ }
+
+ public void Movhlps(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Movhlps);
+ }
+
+ public void Movlhps(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Movlhps);
+ }
+
+ public void Movq(Operand dest, Operand source)
+ {
+ ref readonly InstructionInfo info = ref _instTable[(int)X86Instruction.Movd];
+
+ InstructionFlags flags = info.Flags | InstructionFlags.RexW;
+
+ if (source.Type.IsInteger() || source.Kind == OperandKind.Memory)
+ {
+ WriteOpCode(dest, default, source, OperandType.None, flags, info.OpRRM, rrm: true);
+ }
+ else if (dest.Type.IsInteger() || dest.Kind == OperandKind.Memory)
+ {
+ WriteOpCode(dest, default, source, OperandType.None, flags, info.OpRMR);
+ }
+ else
+ {
+ WriteInstruction(dest, source, OperandType.None, X86Instruction.Movq);
+ }
+ }
+
+ public void Movsd(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Movsd);
+ }
+
+ public void Movss(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Movss);
+ }
+
+ public void Movsx16(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Movsx16);
+ }
+
+ public void Movsx32(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Movsx32);
+ }
+
+ public void Movsx8(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Movsx8);
+ }
+
+ public void Movzx16(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Movzx16);
+ }
+
+ public void Movzx8(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Movzx8);
+ }
+
+ public void Mul(Operand source)
+ {
+ WriteInstruction(default, source, source.Type, X86Instruction.Mul128);
+ }
+
+ public void Mulsd(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Mulsd);
+ }
+
+ public void Mulss(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Mulss);
+ }
+
+ public void Neg(Operand dest)
+ {
+ WriteInstruction(dest, default, dest.Type, X86Instruction.Neg);
+ }
+
+ public void Not(Operand dest)
+ {
+ WriteInstruction(dest, default, dest.Type, X86Instruction.Not);
+ }
+
+ public void Or(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Or);
+ }
+
+ public void Pclmulqdq(Operand dest, Operand source, byte imm)
+ {
+ WriteInstruction(dest, default, source, X86Instruction.Pclmulqdq);
+
+ WriteByte(imm);
+ }
+
+ public void Pcmpeqw(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Pcmpeqw);
+ }
+
+ public void Pextrb(Operand dest, Operand source, byte imm)
+ {
+ WriteInstruction(dest, default, source, X86Instruction.Pextrb);
+
+ WriteByte(imm);
+ }
+
+ public void Pextrd(Operand dest, Operand source, byte imm)
+ {
+ WriteInstruction(dest, default, source, X86Instruction.Pextrd);
+
+ WriteByte(imm);
+ }
+
+ public void Pextrq(Operand dest, Operand source, byte imm)
+ {
+ WriteInstruction(dest, default, source, X86Instruction.Pextrq);
+
+ WriteByte(imm);
+ }
+
+ public void Pextrw(Operand dest, Operand source, byte imm)
+ {
+ WriteInstruction(dest, default, source, X86Instruction.Pextrw);
+
+ WriteByte(imm);
+ }
+
+ public void Pinsrb(Operand dest, Operand src1, Operand src2, byte imm)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Pinsrb);
+
+ WriteByte(imm);
+ }
+
+ public void Pinsrd(Operand dest, Operand src1, Operand src2, byte imm)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Pinsrd);
+
+ WriteByte(imm);
+ }
+
+ public void Pinsrq(Operand dest, Operand src1, Operand src2, byte imm)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Pinsrq);
+
+ WriteByte(imm);
+ }
+
+ public void Pinsrw(Operand dest, Operand src1, Operand src2, byte imm)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Pinsrw);
+
+ WriteByte(imm);
+ }
+
+ public void Pop(Operand dest)
+ {
+ if (dest.Kind == OperandKind.Register)
+ {
+ WriteCompactInst(dest, 0x58);
+ }
+ else
+ {
+ WriteInstruction(dest, default, dest.Type, X86Instruction.Pop);
+ }
+ }
+
+ public void Popcnt(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Popcnt);
+ }
+
+ public void Pshufd(Operand dest, Operand source, byte imm)
+ {
+ WriteInstruction(dest, default, source, X86Instruction.Pshufd);
+
+ WriteByte(imm);
+ }
+
+ public void Push(Operand source)
+ {
+ if (source.Kind == OperandKind.Register)
+ {
+ WriteCompactInst(source, 0x50);
+ }
+ else
+ {
+ WriteInstruction(default, source, source.Type, X86Instruction.Push);
+ }
+ }
+
+ public void Return()
+ {
+ WriteByte(0xc3);
+ }
+
+ public void Ror(Operand dest, Operand source, OperandType type)
+ {
+ WriteShiftInst(dest, source, type, X86Instruction.Ror);
+ }
+
+ public void Sar(Operand dest, Operand source, OperandType type)
+ {
+ WriteShiftInst(dest, source, type, X86Instruction.Sar);
+ }
+
+ public void Shl(Operand dest, Operand source, OperandType type)
+ {
+ WriteShiftInst(dest, source, type, X86Instruction.Shl);
+ }
+
+ public void Shr(Operand dest, Operand source, OperandType type)
+ {
+ WriteShiftInst(dest, source, type, X86Instruction.Shr);
+ }
+
+ public void Setcc(Operand dest, X86Condition condition)
+ {
+ ref readonly InstructionInfo info = ref _instTable[(int)X86Instruction.Setcc];
+
+ WriteOpCode(dest, default, default, OperandType.None, info.Flags, info.OpRRM | (int)condition);
+ }
+
+ public void Stmxcsr(Operand dest)
+ {
+ WriteInstruction(dest, default, OperandType.I32, X86Instruction.Stmxcsr);
+ }
+
+ public void Sub(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Sub);
+ }
+
+ public void Subsd(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Subsd);
+ }
+
+ public void Subss(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Subss);
+ }
+
+ public void Test(Operand src1, Operand src2, OperandType type)
+ {
+ WriteInstruction(src1, src2, type, X86Instruction.Test);
+ }
+
+ public void Xor(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Xor);
+ }
+
+ public void Xorps(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Xorps);
+ }
+
+ public void WriteInstruction(
+ X86Instruction inst,
+ Operand dest,
+ Operand source,
+ OperandType type = OperandType.None)
+ {
+ WriteInstruction(dest, default, source, inst, type);
+ }
+
+ public void WriteInstruction(X86Instruction inst, Operand dest, Operand src1, Operand src2)
+ {
+ if (src2.Kind == OperandKind.Constant)
+ {
+ WriteInstruction(src1, dest, src2, inst);
+ }
+ else
+ {
+ WriteInstruction(dest, src1, src2, inst);
+ }
+ }
+
+ public void WriteInstruction(
+ X86Instruction inst,
+ Operand dest,
+ Operand src1,
+ Operand src2,
+ OperandType type)
+ {
+ WriteInstruction(dest, src1, src2, inst, type);
+ }
+
+ public void WriteInstruction(X86Instruction inst, Operand dest, Operand source, byte imm)
+ {
+ WriteInstruction(dest, default, source, inst);
+
+ WriteByte(imm);
+ }
+
+ public void WriteInstruction(
+ X86Instruction inst,
+ Operand dest,
+ Operand src1,
+ Operand src2,
+ Operand src3)
+ {
+ // 3+ operands can only be encoded with the VEX encoding scheme.
+ Debug.Assert(HardwareCapabilities.SupportsVexEncoding);
+
+ WriteInstruction(dest, src1, src2, inst);
+
+ WriteByte((byte)(src3.AsByte() << 4));
+ }
+
+ public void WriteInstruction(
+ X86Instruction inst,
+ Operand dest,
+ Operand src1,
+ Operand src2,
+ byte imm)
+ {
+ WriteInstruction(dest, src1, src2, inst);
+
+ WriteByte(imm);
+ }
+
+ private void WriteShiftInst(Operand dest, Operand source, OperandType type, X86Instruction inst)
+ {
+ if (source.Kind == OperandKind.Register)
+ {
+ X86Register shiftReg = (X86Register)source.GetRegister().Index;
+
+ Debug.Assert(shiftReg == X86Register.Rcx, $"Invalid shift register \"{shiftReg}\".");
+
+ source = default;
+ }
+ else if (source.Kind == OperandKind.Constant)
+ {
+ source = Operand.Factory.Const((int)source.Value & (dest.Type == OperandType.I32 ? 0x1f : 0x3f));
+ }
+
+ WriteInstruction(dest, source, type, inst);
+ }
+
+ private void WriteInstruction(Operand dest, Operand source, OperandType type, X86Instruction inst)
+ {
+ ref readonly InstructionInfo info = ref _instTable[(int)inst];
+
+ if (source != default)
+ {
+ if (source.Kind == OperandKind.Constant)
+ {
+ ulong imm = source.Value;
+
+ if (inst == X86Instruction.Mov8)
+ {
+ WriteOpCode(dest, default, default, type, info.Flags, info.OpRMImm8);
+
+ WriteByte((byte)imm);
+ }
+ else if (inst == X86Instruction.Mov16)
+ {
+ WriteOpCode(dest, default, default, type, info.Flags, info.OpRMImm32);
+
+ WriteInt16((short)imm);
+ }
+ else if (IsImm8(imm, type) && info.OpRMImm8 != BadOp)
+ {
+ WriteOpCode(dest, default, default, type, info.Flags, info.OpRMImm8);
+
+ WriteByte((byte)imm);
+ }
+ else if (!source.Relocatable && IsImm32(imm, type) && info.OpRMImm32 != BadOp)
+ {
+ WriteOpCode(dest, default, default, type, info.Flags, info.OpRMImm32);
+
+ WriteInt32((int)imm);
+ }
+ else if (dest != default && dest.Kind == OperandKind.Register && info.OpRImm64 != BadOp)
+ {
+ int rexPrefix = GetRexPrefix(dest, source, type, rrm: false);
+
+ if (rexPrefix != 0)
+ {
+ WriteByte((byte)rexPrefix);
+ }
+
+ WriteByte((byte)(info.OpRImm64 + (dest.GetRegister().Index & 0b111)));
+
+ if (HasRelocs && source.Relocatable)
+ {
+ _relocs.Add(new Reloc
+ {
+ JumpIndex = _jumps.Count - 1,
+ Position = (int)_stream.Position,
+ Symbol = source.Symbol
+ });
+ }
+
+ WriteUInt64(imm);
+ }
+ else
+ {
+ throw new ArgumentException($"Failed to encode constant 0x{imm:X}.");
+ }
+ }
+ else if (source.Kind == OperandKind.Register && info.OpRMR != BadOp)
+ {
+ WriteOpCode(dest, default, source, type, info.Flags, info.OpRMR);
+ }
+ else if (info.OpRRM != BadOp)
+ {
+ WriteOpCode(dest, default, source, type, info.Flags, info.OpRRM, rrm: true);
+ }
+ else
+ {
+ throw new ArgumentException($"Invalid source operand kind \"{source.Kind}\".");
+ }
+ }
+ else if (info.OpRRM != BadOp)
+ {
+ WriteOpCode(dest, default, source, type, info.Flags, info.OpRRM, rrm: true);
+ }
+ else if (info.OpRMR != BadOp)
+ {
+ WriteOpCode(dest, default, source, type, info.Flags, info.OpRMR);
+ }
+ else
+ {
+ throw new ArgumentNullException(nameof(source));
+ }
+ }
+
+ private void WriteInstruction(
+ Operand dest,
+ Operand src1,
+ Operand src2,
+ X86Instruction inst,
+ OperandType type = OperandType.None)
+ {
+ ref readonly InstructionInfo info = ref _instTable[(int)inst];
+
+ if (src2 != default)
+ {
+ if (src2.Kind == OperandKind.Constant)
+ {
+ ulong imm = src2.Value;
+
+ if ((byte)imm == imm && info.OpRMImm8 != BadOp)
+ {
+ WriteOpCode(dest, src1, default, type, info.Flags, info.OpRMImm8);
+
+ WriteByte((byte)imm);
+ }
+ else
+ {
+ throw new ArgumentException($"Failed to encode constant 0x{imm:X}.");
+ }
+ }
+ else if (src2.Kind == OperandKind.Register && info.OpRMR != BadOp)
+ {
+ WriteOpCode(dest, src1, src2, type, info.Flags, info.OpRMR);
+ }
+ else if (info.OpRRM != BadOp)
+ {
+ WriteOpCode(dest, src1, src2, type, info.Flags, info.OpRRM, rrm: true);
+ }
+ else
+ {
+ throw new ArgumentException($"Invalid source operand kind \"{src2.Kind}\".");
+ }
+ }
+ else if (info.OpRRM != BadOp)
+ {
+ WriteOpCode(dest, src1, src2, type, info.Flags, info.OpRRM, rrm: true);
+ }
+ else if (info.OpRMR != BadOp)
+ {
+ WriteOpCode(dest, src1, src2, type, info.Flags, info.OpRMR);
+ }
+ else
+ {
+ throw new ArgumentNullException(nameof(src2));
+ }
+ }
+
+ private void WriteOpCode(
+ Operand dest,
+ Operand src1,
+ Operand src2,
+ OperandType type,
+ InstructionFlags flags,
+ int opCode,
+ bool rrm = false)
+ {
+ int rexPrefix = GetRexPrefix(dest, src2, type, rrm);
+
+ if ((flags & InstructionFlags.RexW) != 0)
+ {
+ rexPrefix |= RexWPrefix;
+ }
+
+ int modRM = (opCode >> OpModRMBits) << 3;
+
+ MemoryOperand memOp = default;
+ bool hasMemOp = false;
+
+ if (dest != default)
+ {
+ if (dest.Kind == OperandKind.Register)
+ {
+ int regIndex = dest.GetRegister().Index;
+
+ modRM |= (regIndex & 0b111) << (rrm ? 3 : 0);
+
+ if ((flags & InstructionFlags.Reg8Dest) != 0 && regIndex >= 4)
+ {
+ rexPrefix |= RexPrefix;
+ }
+ }
+ else if (dest.Kind == OperandKind.Memory)
+ {
+ memOp = dest.GetMemory();
+ hasMemOp = true;
+ }
+ else
+ {
+ throw new ArgumentException("Invalid destination operand kind \"" + dest.Kind + "\".");
+ }
+ }
+
+ if (src2 != default)
+ {
+ if (src2.Kind == OperandKind.Register)
+ {
+ int regIndex = src2.GetRegister().Index;
+
+ modRM |= (regIndex & 0b111) << (rrm ? 0 : 3);
+
+ if ((flags & InstructionFlags.Reg8Src) != 0 && regIndex >= 4)
+ {
+ rexPrefix |= RexPrefix;
+ }
+ }
+ else if (src2.Kind == OperandKind.Memory && !hasMemOp)
+ {
+ memOp = src2.GetMemory();
+ hasMemOp = true;
+ }
+ else
+ {
+ throw new ArgumentException("Invalid source operand kind \"" + src2.Kind + "\".");
+ }
+ }
+
+ bool needsSibByte = false;
+ bool needsDisplacement = false;
+
+ int sib = 0;
+
+ if (hasMemOp)
+ {
+ // Either source or destination is a memory operand.
+ Register baseReg = memOp.BaseAddress.GetRegister();
+
+ X86Register baseRegLow = (X86Register)(baseReg.Index & 0b111);
+
+ needsSibByte = memOp.Index != default || baseRegLow == X86Register.Rsp;
+ needsDisplacement = memOp.Displacement != 0 || baseRegLow == X86Register.Rbp;
+
+ if (needsDisplacement)
+ {
+ if (ConstFitsOnS8(memOp.Displacement))
+ {
+ modRM |= 0x40;
+ }
+ else /* if (ConstFitsOnS32(memOp.Displacement)) */
+ {
+ modRM |= 0x80;
+ }
+ }
+
+ if (baseReg.Index >= 8)
+ {
+ Debug.Assert((uint)baseReg.Index <= MaxRegNumber);
+
+ rexPrefix |= RexPrefix | (baseReg.Index >> 3);
+ }
+
+ if (needsSibByte)
+ {
+ sib = (int)baseRegLow;
+
+ if (memOp.Index != default)
+ {
+ int indexReg = memOp.Index.GetRegister().Index;
+
+ Debug.Assert(indexReg != (int)X86Register.Rsp, "Using RSP as index register on the memory operand is not allowed.");
+
+ if (indexReg >= 8)
+ {
+ Debug.Assert((uint)indexReg <= MaxRegNumber);
+
+ rexPrefix |= RexPrefix | (indexReg >> 3) << 1;
+ }
+
+ sib |= (indexReg & 0b111) << 3;
+ }
+ else
+ {
+ sib |= 0b100 << 3;
+ }
+
+ sib |= (int)memOp.Scale << 6;
+
+ modRM |= 0b100;
+ }
+ else
+ {
+ modRM |= (int)baseRegLow;
+ }
+ }
+ else
+ {
+ // Source and destination are registers.
+ modRM |= 0xc0;
+ }
+
+ Debug.Assert(opCode != BadOp, "Invalid opcode value.");
+
+ if ((flags & InstructionFlags.Evex) != 0 && HardwareCapabilities.SupportsEvexEncoding)
+ {
+ WriteEvexInst(dest, src1, src2, type, flags, opCode);
+
+ opCode &= 0xff;
+ }
+ else if ((flags & InstructionFlags.Vex) != 0 && HardwareCapabilities.SupportsVexEncoding)
+ {
+ // In a vex encoding, only one prefix can be active at a time. The active prefix is encoded in the second byte using two bits.
+
+ int vexByte2 = (flags & InstructionFlags.PrefixMask) switch
+ {
+ InstructionFlags.Prefix66 => 1,
+ InstructionFlags.PrefixF3 => 2,
+ InstructionFlags.PrefixF2 => 3,
+ _ => 0
+ };
+
+ if (src1 != default)
+ {
+ vexByte2 |= (src1.GetRegister().Index ^ 0xf) << 3;
+ }
+ else
+ {
+ vexByte2 |= 0b1111 << 3;
+ }
+
+ ushort opCodeHigh = (ushort)(opCode >> 8);
+
+ if ((rexPrefix & 0b1011) == 0 && opCodeHigh == 0xf)
+ {
+ // Two-byte form.
+ WriteByte(0xc5);
+
+ vexByte2 |= (~rexPrefix & 4) << 5;
+
+ WriteByte((byte)vexByte2);
+ }
+ else
+ {
+ // Three-byte form.
+ WriteByte(0xc4);
+
+ int vexByte1 = (~rexPrefix & 7) << 5;
+
+ switch (opCodeHigh)
+ {
+ case 0xf: vexByte1 |= 1; break;
+ case 0xf38: vexByte1 |= 2; break;
+ case 0xf3a: vexByte1 |= 3; break;
+
+ default: Debug.Assert(false, $"Failed to VEX encode opcode 0x{opCode:X}."); break;
+ }
+
+ vexByte2 |= (rexPrefix & 8) << 4;
+
+ WriteByte((byte)vexByte1);
+ WriteByte((byte)vexByte2);
+ }
+
+ opCode &= 0xff;
+ }
+ else
+ {
+ if (flags.HasFlag(InstructionFlags.Prefix66))
+ {
+ WriteByte(0x66);
+ }
+
+ if (flags.HasFlag(InstructionFlags.PrefixF2))
+ {
+ WriteByte(0xf2);
+ }
+
+ if (flags.HasFlag(InstructionFlags.PrefixF3))
+ {
+ WriteByte(0xf3);
+ }
+
+ if (rexPrefix != 0)
+ {
+ WriteByte((byte)rexPrefix);
+ }
+ }
+
+ if (dest != default && (flags & InstructionFlags.RegOnly) != 0)
+ {
+ opCode += dest.GetRegister().Index & 7;
+ }
+
+ if ((opCode & 0xff0000) != 0)
+ {
+ WriteByte((byte)(opCode >> 16));
+ }
+
+ if ((opCode & 0xff00) != 0)
+ {
+ WriteByte((byte)(opCode >> 8));
+ }
+
+ WriteByte((byte)opCode);
+
+ if ((flags & InstructionFlags.RegOnly) == 0)
+ {
+ WriteByte((byte)modRM);
+
+ if (needsSibByte)
+ {
+ WriteByte((byte)sib);
+ }
+
+ if (needsDisplacement)
+ {
+ if (ConstFitsOnS8(memOp.Displacement))
+ {
+ WriteByte((byte)memOp.Displacement);
+ }
+ else /* if (ConstFitsOnS32(memOp.Displacement)) */
+ {
+ WriteInt32(memOp.Displacement);
+ }
+ }
+ }
+ }
+
+ private void WriteEvexInst(
+ Operand dest,
+ Operand src1,
+ Operand src2,
+ OperandType type,
+ InstructionFlags flags,
+ int opCode,
+ bool broadcast = false,
+ int registerWidth = 128,
+ int maskRegisterIdx = 0,
+ bool zeroElements = false)
+ {
+ int op1Idx = dest.GetRegister().Index;
+ int op2Idx = src1.GetRegister().Index;
+ int op3Idx = src2.GetRegister().Index;
+
+ WriteByte(0x62);
+
+ // P0
+ // Extend operand 1 register
+ bool r = (op1Idx & 8) == 0;
+ // Extend operand 3 register
+ bool x = (op3Idx & 16) == 0;
+ // Extend operand 3 register
+ bool b = (op3Idx & 8) == 0;
+ // Extend operand 1 register
+ bool rp = (op1Idx & 16) == 0;
+ // Escape code index
+ byte mm = 0b00;
+
+ switch ((ushort)(opCode >> 8))
+ {
+ case 0xf00: mm = 0b01; break;
+ case 0xf38: mm = 0b10; break;
+ case 0xf3a: mm = 0b11; break;
+
+ default: Debug.Fail($"Failed to EVEX encode opcode 0x{opCode:X}."); break;
+ }
+
+ WriteByte(
+ (byte)(
+ (r ? 0x80 : 0) |
+ (x ? 0x40 : 0) |
+ (b ? 0x20 : 0) |
+ (rp ? 0x10 : 0) |
+ mm));
+
+ // P1
+ // Specify 64-bit lane mode
+ bool w = Is64Bits(type);
+ // Operand 2 register index
+ byte vvvv = (byte)(~op2Idx & 0b1111);
+ // Opcode prefix
+ byte pp = (flags & InstructionFlags.PrefixMask) switch
+ {
+ InstructionFlags.Prefix66 => 0b01,
+ InstructionFlags.PrefixF3 => 0b10,
+ InstructionFlags.PrefixF2 => 0b11,
+ _ => 0
+ };
+ WriteByte(
+ (byte)(
+ (w ? 0x80 : 0) |
+ (vvvv << 3) |
+ 0b100 |
+ pp));
+
+ // P2
+ // Mask register determines what elements to zero, rather than what elements to merge
+ bool z = zeroElements;
+ // Specifies register-width
+ byte ll = 0b00;
+ switch (registerWidth)
+ {
+ case 128: ll = 0b00; break;
+ case 256: ll = 0b01; break;
+ case 512: ll = 0b10; break;
+
+ default: Debug.Fail($"Invalid EVEX vector register width {registerWidth}."); break;
+ }
+ // Embedded broadcast in the case of a memory operand
+ bool bcast = broadcast;
+ // Extend operand 2 register
+ bool vp = (op2Idx & 16) == 0;
+ // Mask register index
+ Debug.Assert(maskRegisterIdx < 8, $"Invalid mask register index {maskRegisterIdx}.");
+ byte aaa = (byte)(maskRegisterIdx & 0b111);
+
+ WriteByte(
+ (byte)(
+ (z ? 0x80 : 0) |
+ (ll << 5) |
+ (bcast ? 0x10 : 0) |
+ (vp ? 8 : 0) |
+ aaa));
+ }
+
+ private void WriteCompactInst(Operand operand, int opCode)
+ {
+ int regIndex = operand.GetRegister().Index;
+
+ if (regIndex >= 8)
+ {
+ WriteByte(0x41);
+ }
+
+ WriteByte((byte)(opCode + (regIndex & 0b111)));
+ }
+
+ private static int GetRexPrefix(Operand dest, Operand source, OperandType type, bool rrm)
+ {
+ int rexPrefix = 0;
+
+ if (Is64Bits(type))
+ {
+ rexPrefix = RexWPrefix;
+ }
+
+ void SetRegisterHighBit(Register reg, int bit)
+ {
+ if (reg.Index >= 8)
+ {
+ rexPrefix |= RexPrefix | (reg.Index >> 3) << bit;
+ }
+ }
+
+ if (dest != default && dest.Kind == OperandKind.Register)
+ {
+ SetRegisterHighBit(dest.GetRegister(), rrm ? 2 : 0);
+ }
+
+ if (source != default && source.Kind == OperandKind.Register)
+ {
+ SetRegisterHighBit(source.GetRegister(), rrm ? 0 : 2);
+ }
+
+ return rexPrefix;
+ }
+
+ public (byte[], RelocInfo) GetCode()
+ {
+ var jumps = CollectionsMarshal.AsSpan(_jumps);
+ var relocs = CollectionsMarshal.AsSpan(_relocs);
+
+ // Write jump relative offsets.
+ bool modified;
+
+ do
+ {
+ modified = false;
+
+ for (int i = 0; i < jumps.Length; i++)
+ {
+ ref Jump jump = ref jumps[i];
+
+ // If jump target not resolved yet, resolve it.
+ if (jump.JumpTarget == null)
+ {
+ jump.JumpTarget = _labels[jump.JumpLabel];
+ }
+
+ long jumpTarget = jump.JumpTarget.Value;
+ long offset = jumpTarget - jump.JumpPosition;
+
+ if (offset < 0)
+ {
+ for (int j = i - 1; j >= 0; j--)
+ {
+ ref Jump jump2 = ref jumps[j];
+
+ if (jump2.JumpPosition < jumpTarget)
+ {
+ break;
+ }
+
+ offset -= jump2.InstSize - ReservedBytesForJump;
+ }
+ }
+ else
+ {
+ for (int j = i + 1; j < jumps.Length; j++)
+ {
+ ref Jump jump2 = ref jumps[j];
+
+ if (jump2.JumpPosition >= jumpTarget)
+ {
+ break;
+ }
+
+ offset += jump2.InstSize - ReservedBytesForJump;
+ }
+
+ offset -= ReservedBytesForJump;
+ }
+
+ if (jump.IsConditional)
+ {
+ jump.InstSize = GetJccLength(offset);
+ }
+ else
+ {
+ jump.InstSize = GetJmpLength(offset);
+ }
+
+ // The jump is relative to the next instruction, not the current one.
+ // Since we didn't know the next instruction address when calculating
+ // the offset (as the size of the current jump instruction was not known),
+ // we now need to compensate the offset with the jump instruction size.
+ // It's also worth noting that:
+ // - This is only needed for backward jumps.
+ // - The GetJmpLength and GetJccLength also compensates the offset
+ // internally when computing the jump instruction size.
+ if (offset < 0)
+ {
+ offset -= jump.InstSize;
+ }
+
+ if (jump.Offset != offset)
+ {
+ jump.Offset = offset;
+
+ modified = true;
+ }
+ }
+ }
+ while (modified);
+
+ // Write the code, ignoring the dummy bytes after jumps, into a new stream.
+ _stream.Seek(0, SeekOrigin.Begin);
+
+ using var codeStream = MemoryStreamManager.Shared.GetStream();
+ var assembler = new Assembler(codeStream, HasRelocs);
+
+ bool hasRelocs = HasRelocs;
+ int relocIndex = 0;
+ int relocOffset = 0;
+ var relocEntries = hasRelocs
+ ? new RelocEntry[relocs.Length]
+ : Array.Empty<RelocEntry>();
+
+ for (int i = 0; i < jumps.Length; i++)
+ {
+ ref Jump jump = ref jumps[i];
+
+ // If has relocations, calculate their new positions compensating for jumps.
+ if (hasRelocs)
+ {
+ relocOffset += jump.InstSize - ReservedBytesForJump;
+
+ for (; relocIndex < relocEntries.Length; relocIndex++)
+ {
+ ref Reloc reloc = ref relocs[relocIndex];
+
+ if (reloc.JumpIndex > i)
+ {
+ break;
+ }
+
+ relocEntries[relocIndex] = new RelocEntry(reloc.Position + relocOffset, reloc.Symbol);
+ }
+ }
+
+ Span<byte> buffer = new byte[jump.JumpPosition - _stream.Position];
+
+ _stream.Read(buffer);
+ _stream.Seek(ReservedBytesForJump, SeekOrigin.Current);
+
+ codeStream.Write(buffer);
+
+ if (jump.IsConditional)
+ {
+ assembler.Jcc(jump.Condition, jump.Offset);
+ }
+ else
+ {
+ assembler.Jmp(jump.Offset);
+ }
+ }
+
+ // Write remaining relocations. This case happens when there are no jumps assembled.
+ for (; relocIndex < relocEntries.Length; relocIndex++)
+ {
+ ref Reloc reloc = ref relocs[relocIndex];
+
+ relocEntries[relocIndex] = new RelocEntry(reloc.Position + relocOffset, reloc.Symbol);
+ }
+
+ _stream.CopyTo(codeStream);
+
+ var code = codeStream.ToArray();
+ var relocInfo = new RelocInfo(relocEntries);
+
+ return (code, relocInfo);
+ }
+
+ private static bool Is64Bits(OperandType type)
+ {
+ return type == OperandType.I64 || type == OperandType.FP64;
+ }
+
+ private static bool IsImm8(ulong immediate, OperandType type)
+ {
+ long value = type == OperandType.I32 ? (int)immediate : (long)immediate;
+
+ return ConstFitsOnS8(value);
+ }
+
+ private static bool IsImm32(ulong immediate, OperandType type)
+ {
+ long value = type == OperandType.I32 ? (int)immediate : (long)immediate;
+
+ return ConstFitsOnS32(value);
+ }
+
+ private static int GetJccLength(long offset)
+ {
+ if (ConstFitsOnS8(offset < 0 ? offset - 2 : offset))
+ {
+ return 2;
+ }
+ else if (ConstFitsOnS32(offset < 0 ? offset - 6 : offset))
+ {
+ return 6;
+ }
+ else
+ {
+ throw new ArgumentOutOfRangeException(nameof(offset));
+ }
+ }
+
+ private static int GetJmpLength(long offset)
+ {
+ if (ConstFitsOnS8(offset < 0 ? offset - 2 : offset))
+ {
+ return 2;
+ }
+ else if (ConstFitsOnS32(offset < 0 ? offset - 5 : offset))
+ {
+ return 5;
+ }
+ else
+ {
+ throw new ArgumentOutOfRangeException(nameof(offset));
+ }
+ }
+
+ private static bool ConstFitsOnS8(long value)
+ {
+ return value == (sbyte)value;
+ }
+
+ private static bool ConstFitsOnS32(long value)
+ {
+ return value == (int)value;
+ }
+
+ private void WriteInt16(short value)
+ {
+ WriteUInt16((ushort)value);
+ }
+
+ private void WriteInt32(int value)
+ {
+ WriteUInt32((uint)value);
+ }
+
+ private void WriteByte(byte value)
+ {
+ _stream.WriteByte(value);
+ }
+
+ private void WriteUInt16(ushort value)
+ {
+ _stream.WriteByte((byte)(value >> 0));
+ _stream.WriteByte((byte)(value >> 8));
+ }
+
+ private void WriteUInt32(uint value)
+ {
+ _stream.WriteByte((byte)(value >> 0));
+ _stream.WriteByte((byte)(value >> 8));
+ _stream.WriteByte((byte)(value >> 16));
+ _stream.WriteByte((byte)(value >> 24));
+ }
+
+ private void WriteUInt64(ulong value)
+ {
+ _stream.WriteByte((byte)(value >> 0));
+ _stream.WriteByte((byte)(value >> 8));
+ _stream.WriteByte((byte)(value >> 16));
+ _stream.WriteByte((byte)(value >> 24));
+ _stream.WriteByte((byte)(value >> 32));
+ _stream.WriteByte((byte)(value >> 40));
+ _stream.WriteByte((byte)(value >> 48));
+ _stream.WriteByte((byte)(value >> 56));
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/X86/AssemblerTable.cs b/src/ARMeilleure/CodeGen/X86/AssemblerTable.cs
new file mode 100644
index 00000000..e6a2ff07
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/AssemblerTable.cs
@@ -0,0 +1,295 @@
+using System;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ partial class Assembler
+ {
+ public static bool SupportsVexPrefix(X86Instruction inst)
+ {
+ return _instTable[(int)inst].Flags.HasFlag(InstructionFlags.Vex);
+ }
+
+ private const int BadOp = 0;
+
+ [Flags]
+ private enum InstructionFlags
+ {
+ None = 0,
+ RegOnly = 1 << 0,
+ Reg8Src = 1 << 1,
+ Reg8Dest = 1 << 2,
+ RexW = 1 << 3,
+ Vex = 1 << 4,
+ Evex = 1 << 5,
+
+ PrefixBit = 16,
+ PrefixMask = 7 << PrefixBit,
+ Prefix66 = 1 << PrefixBit,
+ PrefixF3 = 2 << PrefixBit,
+ PrefixF2 = 4 << PrefixBit
+ }
+
+ private readonly struct InstructionInfo
+ {
+ public int OpRMR { get; }
+ public int OpRMImm8 { get; }
+ public int OpRMImm32 { get; }
+ public int OpRImm64 { get; }
+ public int OpRRM { get; }
+
+ public InstructionFlags Flags { get; }
+
+ public InstructionInfo(
+ int opRMR,
+ int opRMImm8,
+ int opRMImm32,
+ int opRImm64,
+ int opRRM,
+ InstructionFlags flags)
+ {
+ OpRMR = opRMR;
+ OpRMImm8 = opRMImm8;
+ OpRMImm32 = opRMImm32;
+ OpRImm64 = opRImm64;
+ OpRRM = opRRM;
+ Flags = flags;
+ }
+ }
+
+ private readonly static InstructionInfo[] _instTable;
+
+ static Assembler()
+ {
+ _instTable = new InstructionInfo[(int)X86Instruction.Count];
+
+ // Name RM/R RM/I8 RM/I32 R/I64 R/RM Flags
+ Add(X86Instruction.Add, new InstructionInfo(0x00000001, 0x00000083, 0x00000081, BadOp, 0x00000003, InstructionFlags.None));
+ Add(X86Instruction.Addpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Addps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex));
+ Add(X86Instruction.Addsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Addss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Aesdec, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38de, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Aesdeclast, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38df, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Aesenc, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38dc, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Aesenclast, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38dd, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Aesimc, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38db, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.And, new InstructionInfo(0x00000021, 0x04000083, 0x04000081, BadOp, 0x00000023, InstructionFlags.None));
+ Add(X86Instruction.Andnpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f55, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Andnps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f55, InstructionFlags.Vex));
+ Add(X86Instruction.Andpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f54, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Andps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f54, InstructionFlags.Vex));
+ Add(X86Instruction.Blendvpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3815, InstructionFlags.Prefix66));
+ Add(X86Instruction.Blendvps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3814, InstructionFlags.Prefix66));
+ Add(X86Instruction.Bsr, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fbd, InstructionFlags.None));
+ Add(X86Instruction.Bswap, new InstructionInfo(0x00000fc8, BadOp, BadOp, BadOp, BadOp, InstructionFlags.RegOnly));
+ Add(X86Instruction.Call, new InstructionInfo(0x020000ff, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Cmovcc, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f40, InstructionFlags.None));
+ Add(X86Instruction.Cmp, new InstructionInfo(0x00000039, 0x07000083, 0x07000081, BadOp, 0x0000003b, InstructionFlags.None));
+ Add(X86Instruction.Cmppd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Cmpps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstructionFlags.Vex));
+ Add(X86Instruction.Cmpsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Cmpss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Cmpxchg, new InstructionInfo(0x00000fb1, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Cmpxchg16b, new InstructionInfo(0x01000fc7, BadOp, BadOp, BadOp, BadOp, InstructionFlags.RexW));
+ Add(X86Instruction.Cmpxchg8, new InstructionInfo(0x00000fb0, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Reg8Src));
+ Add(X86Instruction.Comisd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2f, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Comiss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2f, InstructionFlags.Vex));
+ Add(X86Instruction.Crc32, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38f1, InstructionFlags.PrefixF2));
+ Add(X86Instruction.Crc32_16, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38f1, InstructionFlags.PrefixF2 | InstructionFlags.Prefix66));
+ Add(X86Instruction.Crc32_8, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38f0, InstructionFlags.PrefixF2 | InstructionFlags.Reg8Src));
+ Add(X86Instruction.Cvtdq2pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe6, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Cvtdq2ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5b, InstructionFlags.Vex));
+ Add(X86Instruction.Cvtpd2dq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe6, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Cvtpd2ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5a, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Cvtps2dq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5b, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Cvtps2pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5a, InstructionFlags.Vex));
+ Add(X86Instruction.Cvtsd2si, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2d, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Cvtsd2ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5a, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Cvtsi2sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2a, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Cvtsi2ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2a, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Cvtss2sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5a, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Cvtss2si, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2d, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Div, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x060000f7, InstructionFlags.None));
+ Add(X86Instruction.Divpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Divps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstructionFlags.Vex));
+ Add(X86Instruction.Divsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Divss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Gf2p8affineqb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3ace, InstructionFlags.Prefix66));
+ Add(X86Instruction.Haddpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f7c, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Haddps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f7c, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Idiv, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x070000f7, InstructionFlags.None));
+ Add(X86Instruction.Imul, new InstructionInfo(BadOp, 0x0000006b, 0x00000069, BadOp, 0x00000faf, InstructionFlags.None));
+ Add(X86Instruction.Imul128, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x050000f7, InstructionFlags.None));
+ Add(X86Instruction.Insertps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a21, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Jmp, new InstructionInfo(0x040000ff, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Ldmxcsr, new InstructionInfo(0x02000fae, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex));
+ Add(X86Instruction.Lea, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x0000008d, InstructionFlags.None));
+ Add(X86Instruction.Maxpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Maxps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex));
+ Add(X86Instruction.Maxsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Maxss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Minpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Minps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstructionFlags.Vex));
+ Add(X86Instruction.Minsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Minss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Mov, new InstructionInfo(0x00000089, BadOp, 0x000000c7, 0x000000b8, 0x0000008b, InstructionFlags.None));
+ Add(X86Instruction.Mov16, new InstructionInfo(0x00000089, BadOp, 0x000000c7, BadOp, 0x0000008b, InstructionFlags.Prefix66));
+ Add(X86Instruction.Mov8, new InstructionInfo(0x00000088, 0x000000c6, BadOp, BadOp, 0x0000008a, InstructionFlags.Reg8Src | InstructionFlags.Reg8Dest));
+ Add(X86Instruction.Movd, new InstructionInfo(0x00000f7e, BadOp, BadOp, BadOp, 0x00000f6e, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Movdqu, new InstructionInfo(0x00000f7f, BadOp, BadOp, BadOp, 0x00000f6f, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Movhlps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f12, InstructionFlags.Vex));
+ Add(X86Instruction.Movlhps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f16, InstructionFlags.Vex));
+ Add(X86Instruction.Movq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f7e, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Movsd, new InstructionInfo(0x00000f11, BadOp, BadOp, BadOp, 0x00000f10, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Movss, new InstructionInfo(0x00000f11, BadOp, BadOp, BadOp, 0x00000f10, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Movsx16, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fbf, InstructionFlags.None));
+ Add(X86Instruction.Movsx32, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000063, InstructionFlags.None));
+ Add(X86Instruction.Movsx8, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fbe, InstructionFlags.Reg8Src));
+ Add(X86Instruction.Movzx16, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fb7, InstructionFlags.None));
+ Add(X86Instruction.Movzx8, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fb6, InstructionFlags.Reg8Src));
+ Add(X86Instruction.Mul128, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x040000f7, InstructionFlags.None));
+ Add(X86Instruction.Mulpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Mulps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstructionFlags.Vex));
+ Add(X86Instruction.Mulsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Mulss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Neg, new InstructionInfo(0x030000f7, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Not, new InstructionInfo(0x020000f7, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Or, new InstructionInfo(0x00000009, 0x01000083, 0x01000081, BadOp, 0x0000000b, InstructionFlags.None));
+ Add(X86Instruction.Paddb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffc, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Paddd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffe, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Paddq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fd4, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Paddw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffd, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Palignr, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a0f, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pand, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fdb, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pandn, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fdf, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pavgb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe0, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pavgw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe3, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pblendvb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3810, InstructionFlags.Prefix66));
+ Add(X86Instruction.Pclmulqdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a44, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pcmpeqb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f74, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pcmpeqd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f76, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pcmpeqq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3829, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pcmpeqw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f75, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pcmpgtb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f64, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pcmpgtd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f66, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pcmpgtq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3837, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pcmpgtw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f65, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pextrb, new InstructionInfo(0x000f3a14, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pextrd, new InstructionInfo(0x000f3a16, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pextrq, new InstructionInfo(0x000f3a16, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.RexW | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pextrw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc5, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pinsrb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a20, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pinsrd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a22, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pinsrq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a22, InstructionFlags.Vex | InstructionFlags.RexW | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pinsrw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc4, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmaxsb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383c, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmaxsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383d, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmaxsw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fee, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmaxub, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fde, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmaxud, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383f, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmaxuw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383e, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pminsb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3838, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pminsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3839, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pminsw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fea, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pminub, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fda, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pminud, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383b, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pminuw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383a, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmovsxbw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3820, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmovsxdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3825, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmovsxwd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3823, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmovzxbw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3830, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmovzxdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3835, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmovzxwd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3833, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmulld, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3840, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmullw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fd5, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pop, new InstructionInfo(0x0000008f, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Popcnt, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fb8, InstructionFlags.PrefixF3));
+ Add(X86Instruction.Por, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000feb, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pshufb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3800, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pshufd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f70, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pslld, new InstructionInfo(BadOp, 0x06000f72, BadOp, BadOp, 0x00000ff2, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pslldq, new InstructionInfo(BadOp, 0x07000f73, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psllq, new InstructionInfo(BadOp, 0x06000f73, BadOp, BadOp, 0x00000ff3, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psllw, new InstructionInfo(BadOp, 0x06000f71, BadOp, BadOp, 0x00000ff1, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psrad, new InstructionInfo(BadOp, 0x04000f72, BadOp, BadOp, 0x00000fe2, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psraw, new InstructionInfo(BadOp, 0x04000f71, BadOp, BadOp, 0x00000fe1, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psrld, new InstructionInfo(BadOp, 0x02000f72, BadOp, BadOp, 0x00000fd2, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psrlq, new InstructionInfo(BadOp, 0x02000f73, BadOp, BadOp, 0x00000fd3, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psrldq, new InstructionInfo(BadOp, 0x03000f73, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psrlw, new InstructionInfo(BadOp, 0x02000f71, BadOp, BadOp, 0x00000fd1, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psubb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ff8, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psubd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffa, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psubq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffb, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psubw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ff9, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Punpckhbw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f68, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Punpckhdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f6a, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Punpckhqdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f6d, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Punpckhwd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f69, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Punpcklbw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f60, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Punpckldq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f62, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Punpcklqdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f6c, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Punpcklwd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f61, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Push, new InstructionInfo(BadOp, 0x0000006a, 0x00000068, BadOp, 0x060000ff, InstructionFlags.None));
+ Add(X86Instruction.Pxor, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fef, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Rcpps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f53, InstructionFlags.Vex));
+ Add(X86Instruction.Rcpss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f53, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Ror, new InstructionInfo(0x010000d3, 0x010000c1, BadOp, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Roundpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a09, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Roundps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a08, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Roundsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a0b, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Roundss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a0a, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Rsqrtps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f52, InstructionFlags.Vex));
+ Add(X86Instruction.Rsqrtss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f52, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Sar, new InstructionInfo(0x070000d3, 0x070000c1, BadOp, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Setcc, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f90, InstructionFlags.Reg8Dest));
+ Add(X86Instruction.Sha256Msg1, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38cc, InstructionFlags.None));
+ Add(X86Instruction.Sha256Msg2, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38cd, InstructionFlags.None));
+ Add(X86Instruction.Sha256Rnds2, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38cb, InstructionFlags.None));
+ Add(X86Instruction.Shl, new InstructionInfo(0x040000d3, 0x040000c1, BadOp, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Shr, new InstructionInfo(0x050000d3, 0x050000c1, BadOp, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Shufpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc6, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Shufps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc6, InstructionFlags.Vex));
+ Add(X86Instruction.Sqrtpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Sqrtps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex));
+ Add(X86Instruction.Sqrtsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Sqrtss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Stmxcsr, new InstructionInfo(0x03000fae, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex));
+ Add(X86Instruction.Sub, new InstructionInfo(0x00000029, 0x05000083, 0x05000081, BadOp, 0x0000002b, InstructionFlags.None));
+ Add(X86Instruction.Subpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Subps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstructionFlags.Vex));
+ Add(X86Instruction.Subsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Subss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Test, new InstructionInfo(0x00000085, BadOp, 0x000000f7, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Unpckhpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f15, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Unpckhps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f15, InstructionFlags.Vex));
+ Add(X86Instruction.Unpcklpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f14, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Unpcklps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f14, InstructionFlags.Vex));
+ Add(X86Instruction.Vblendvpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4b, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Vblendvps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4a, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Vcvtph2ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3813, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Vcvtps2ph, new InstructionInfo(0x000f3a1d, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Vfmadd231pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b8, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
+ Add(X86Instruction.Vfmadd231ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b8, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Vfmadd231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b9, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
+ Add(X86Instruction.Vfmadd231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b9, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Vfmsub231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bb, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
+ Add(X86Instruction.Vfmsub231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bb, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Vfnmadd231pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bc, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
+ Add(X86Instruction.Vfnmadd231ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bc, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Vfnmadd231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bd, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
+ Add(X86Instruction.Vfnmadd231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bd, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Vfnmsub231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bf, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
+ Add(X86Instruction.Vfnmsub231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bf, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Vpblendvb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4c, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Vpternlogd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a25, InstructionFlags.Evex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Xor, new InstructionInfo(0x00000031, 0x06000083, 0x06000081, BadOp, 0x00000033, InstructionFlags.None));
+ Add(X86Instruction.Xorpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Xorps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex));
+
+ static void Add(X86Instruction inst, in InstructionInfo info)
+ {
+ _instTable[(int)inst] = info;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/X86/CallConvName.cs b/src/ARMeilleure/CodeGen/X86/CallConvName.cs
new file mode 100644
index 00000000..be367628
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/CallConvName.cs
@@ -0,0 +1,8 @@
+namespace ARMeilleure.CodeGen.X86
+{
+ enum CallConvName
+ {
+ SystemV,
+ Windows
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/X86/CallingConvention.cs b/src/ARMeilleure/CodeGen/X86/CallingConvention.cs
new file mode 100644
index 00000000..953fef5b
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/CallingConvention.cs
@@ -0,0 +1,158 @@
+using System;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ static class CallingConvention
+ {
+ private const int RegistersMask = 0xffff;
+
+ public static int GetIntAvailableRegisters()
+ {
+ return RegistersMask & ~(1 << (int)X86Register.Rsp);
+ }
+
+ public static int GetVecAvailableRegisters()
+ {
+ return RegistersMask;
+ }
+
+ public static int GetIntCallerSavedRegisters()
+ {
+ if (GetCurrentCallConv() == CallConvName.Windows)
+ {
+ return (1 << (int)X86Register.Rax) |
+ (1 << (int)X86Register.Rcx) |
+ (1 << (int)X86Register.Rdx) |
+ (1 << (int)X86Register.R8) |
+ (1 << (int)X86Register.R9) |
+ (1 << (int)X86Register.R10) |
+ (1 << (int)X86Register.R11);
+ }
+ else /* if (GetCurrentCallConv() == CallConvName.SystemV) */
+ {
+ return (1 << (int)X86Register.Rax) |
+ (1 << (int)X86Register.Rcx) |
+ (1 << (int)X86Register.Rdx) |
+ (1 << (int)X86Register.Rsi) |
+ (1 << (int)X86Register.Rdi) |
+ (1 << (int)X86Register.R8) |
+ (1 << (int)X86Register.R9) |
+ (1 << (int)X86Register.R10) |
+ (1 << (int)X86Register.R11);
+ }
+ }
+
+ public static int GetVecCallerSavedRegisters()
+ {
+ if (GetCurrentCallConv() == CallConvName.Windows)
+ {
+ return (1 << (int)X86Register.Xmm0) |
+ (1 << (int)X86Register.Xmm1) |
+ (1 << (int)X86Register.Xmm2) |
+ (1 << (int)X86Register.Xmm3) |
+ (1 << (int)X86Register.Xmm4) |
+ (1 << (int)X86Register.Xmm5);
+ }
+ else /* if (GetCurrentCallConv() == CallConvName.SystemV) */
+ {
+ return RegistersMask;
+ }
+ }
+
+ public static int GetIntCalleeSavedRegisters()
+ {
+ return GetIntCallerSavedRegisters() ^ RegistersMask;
+ }
+
+ public static int GetVecCalleeSavedRegisters()
+ {
+ return GetVecCallerSavedRegisters() ^ RegistersMask;
+ }
+
+ public static int GetArgumentsOnRegsCount()
+ {
+ return 4;
+ }
+
+ public static int GetIntArgumentsOnRegsCount()
+ {
+ return 6;
+ }
+
+ public static int GetVecArgumentsOnRegsCount()
+ {
+ return 8;
+ }
+
+ public static X86Register GetIntArgumentRegister(int index)
+ {
+ if (GetCurrentCallConv() == CallConvName.Windows)
+ {
+ switch (index)
+ {
+ case 0: return X86Register.Rcx;
+ case 1: return X86Register.Rdx;
+ case 2: return X86Register.R8;
+ case 3: return X86Register.R9;
+ }
+ }
+ else /* if (GetCurrentCallConv() == CallConvName.SystemV) */
+ {
+ switch (index)
+ {
+ case 0: return X86Register.Rdi;
+ case 1: return X86Register.Rsi;
+ case 2: return X86Register.Rdx;
+ case 3: return X86Register.Rcx;
+ case 4: return X86Register.R8;
+ case 5: return X86Register.R9;
+ }
+ }
+
+ throw new ArgumentOutOfRangeException(nameof(index));
+ }
+
+ public static X86Register GetVecArgumentRegister(int index)
+ {
+ int count;
+
+ if (GetCurrentCallConv() == CallConvName.Windows)
+ {
+ count = 4;
+ }
+ else /* if (GetCurrentCallConv() == CallConvName.SystemV) */
+ {
+ count = 8;
+ }
+
+ if ((uint)index < count)
+ {
+ return X86Register.Xmm0 + index;
+ }
+
+ throw new ArgumentOutOfRangeException(nameof(index));
+ }
+
+ public static X86Register GetIntReturnRegister()
+ {
+ return X86Register.Rax;
+ }
+
+ public static X86Register GetIntReturnRegisterHigh()
+ {
+ return X86Register.Rdx;
+ }
+
+ public static X86Register GetVecReturnRegister()
+ {
+ return X86Register.Xmm0;
+ }
+
+ public static CallConvName GetCurrentCallConv()
+ {
+ return OperatingSystem.IsWindows()
+ ? CallConvName.Windows
+ : CallConvName.SystemV;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/X86/CodeGenCommon.cs b/src/ARMeilleure/CodeGen/X86/CodeGenCommon.cs
new file mode 100644
index 00000000..237ecee4
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/CodeGenCommon.cs
@@ -0,0 +1,19 @@
+using ARMeilleure.IntermediateRepresentation;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ static class CodeGenCommon
+ {
+ public static bool IsLongConst(Operand op)
+ {
+ long value = op.Type == OperandType.I32 ? op.AsInt32() : op.AsInt64();
+
+ return !ConstFitsOnS32(value);
+ }
+
+ private static bool ConstFitsOnS32(long value)
+ {
+ return value == (int)value;
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/X86/CodeGenContext.cs b/src/ARMeilleure/CodeGen/X86/CodeGenContext.cs
new file mode 100644
index 00000000..89948724
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/CodeGenContext.cs
@@ -0,0 +1,105 @@
+using ARMeilleure.CodeGen.RegisterAllocators;
+using ARMeilleure.IntermediateRepresentation;
+using Ryujinx.Common.Memory;
+using System.IO;
+using System.Numerics;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ class CodeGenContext
+ {
+ private readonly Stream _stream;
+ private readonly Operand[] _blockLabels;
+
+ public int StreamOffset => (int)_stream.Length;
+
+ public AllocationResult AllocResult { get; }
+
+ public Assembler Assembler { get; }
+ public BasicBlock CurrBlock { get; private set; }
+
+ public int CallArgsRegionSize { get; }
+ public int XmmSaveRegionSize { get; }
+
+ public CodeGenContext(AllocationResult allocResult, int maxCallArgs, int blocksCount, bool relocatable)
+ {
+ _stream = MemoryStreamManager.Shared.GetStream();
+ _blockLabels = new Operand[blocksCount];
+
+ AllocResult = allocResult;
+ Assembler = new Assembler(_stream, relocatable);
+
+ CallArgsRegionSize = GetCallArgsRegionSize(allocResult, maxCallArgs, out int xmmSaveRegionSize);
+ XmmSaveRegionSize = xmmSaveRegionSize;
+ }
+
+ private static int GetCallArgsRegionSize(AllocationResult allocResult, int maxCallArgs, out int xmmSaveRegionSize)
+ {
+ // We need to add 8 bytes to the total size, as the call to this function already pushed 8 bytes (the
+ // return address).
+ int intMask = CallingConvention.GetIntCalleeSavedRegisters() & allocResult.IntUsedRegisters;
+ int vecMask = CallingConvention.GetVecCalleeSavedRegisters() & allocResult.VecUsedRegisters;
+
+ xmmSaveRegionSize = BitOperations.PopCount((uint)vecMask) * 16;
+
+ int calleeSaveRegionSize = BitOperations.PopCount((uint)intMask) * 8 + xmmSaveRegionSize + 8;
+
+ int argsCount = maxCallArgs;
+
+ if (argsCount < 0)
+ {
+ // When the function has no calls, argsCount is -1. In this case, we don't need to allocate the shadow
+ // space.
+ argsCount = 0;
+ }
+ else if (argsCount < 4)
+ {
+ // The ABI mandates that the space for at least 4 arguments is reserved on the stack (this is called
+ // shadow space).
+ argsCount = 4;
+ }
+
+ // TODO: Align XMM save region to 16 bytes because unwinding on Windows requires it.
+ int frameSize = calleeSaveRegionSize + allocResult.SpillRegionSize;
+
+ // TODO: Instead of always multiplying by 16 (the largest possible size of a variable, since a V128 has 16
+ // bytes), we should calculate the exact size consumed by the arguments passed to the called functions on
+ // the stack.
+ int callArgsAndFrameSize = frameSize + argsCount * 16;
+
+ // Ensure that the Stack Pointer will be aligned to 16 bytes.
+ callArgsAndFrameSize = (callArgsAndFrameSize + 0xf) & ~0xf;
+
+ return callArgsAndFrameSize - frameSize;
+ }
+
+ public void EnterBlock(BasicBlock block)
+ {
+ Assembler.MarkLabel(GetLabel(block));
+
+ CurrBlock = block;
+ }
+
+ public void JumpTo(BasicBlock target)
+ {
+ Assembler.Jmp(GetLabel(target));
+ }
+
+ public void JumpTo(X86Condition condition, BasicBlock target)
+ {
+ Assembler.Jcc(condition, GetLabel(target));
+ }
+
+ private Operand GetLabel(BasicBlock block)
+ {
+ ref Operand label = ref _blockLabels[block.Index];
+
+ if (label == default)
+ {
+ label = Operand.Factory.Label();
+ }
+
+ return label;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/X86/CodeGenerator.cs b/src/ARMeilleure/CodeGen/X86/CodeGenerator.cs
new file mode 100644
index 00000000..e7179b51
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/CodeGenerator.cs
@@ -0,0 +1,1865 @@
+using ARMeilleure.CodeGen.Linking;
+using ARMeilleure.CodeGen.Optimizations;
+using ARMeilleure.CodeGen.RegisterAllocators;
+using ARMeilleure.CodeGen.Unwinding;
+using ARMeilleure.Common;
+using ARMeilleure.Diagnostics;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Numerics;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ static class CodeGenerator
+ {
+ private const int RegistersCount = 16;
+ private const int PageSize = 0x1000;
+ private const int StackGuardSize = 0x2000;
+
+ private static readonly Action<CodeGenContext, Operation>[] _instTable;
+
+ static CodeGenerator()
+ {
+ _instTable = new Action<CodeGenContext, Operation>[EnumUtils.GetCount(typeof(Instruction))];
+
+ Add(Instruction.Add, GenerateAdd);
+ Add(Instruction.BitwiseAnd, GenerateBitwiseAnd);
+ Add(Instruction.BitwiseExclusiveOr, GenerateBitwiseExclusiveOr);
+ Add(Instruction.BitwiseNot, GenerateBitwiseNot);
+ Add(Instruction.BitwiseOr, GenerateBitwiseOr);
+ Add(Instruction.BranchIf, GenerateBranchIf);
+ Add(Instruction.ByteSwap, GenerateByteSwap);
+ Add(Instruction.Call, GenerateCall);
+ Add(Instruction.Clobber, GenerateClobber);
+ Add(Instruction.Compare, GenerateCompare);
+ Add(Instruction.CompareAndSwap, GenerateCompareAndSwap);
+ Add(Instruction.CompareAndSwap16, GenerateCompareAndSwap16);
+ Add(Instruction.CompareAndSwap8, GenerateCompareAndSwap8);
+ Add(Instruction.ConditionalSelect, GenerateConditionalSelect);
+ Add(Instruction.ConvertI64ToI32, GenerateConvertI64ToI32);
+ Add(Instruction.ConvertToFP, GenerateConvertToFP);
+ Add(Instruction.Copy, GenerateCopy);
+ Add(Instruction.CountLeadingZeros, GenerateCountLeadingZeros);
+ Add(Instruction.Divide, GenerateDivide);
+ Add(Instruction.DivideUI, GenerateDivideUI);
+ Add(Instruction.Fill, GenerateFill);
+ Add(Instruction.Load, GenerateLoad);
+ Add(Instruction.Load16, GenerateLoad16);
+ Add(Instruction.Load8, GenerateLoad8);
+ Add(Instruction.MemoryBarrier, GenerateMemoryBarrier);
+ Add(Instruction.Multiply, GenerateMultiply);
+ Add(Instruction.Multiply64HighSI, GenerateMultiply64HighSI);
+ Add(Instruction.Multiply64HighUI, GenerateMultiply64HighUI);
+ Add(Instruction.Negate, GenerateNegate);
+ Add(Instruction.Return, GenerateReturn);
+ Add(Instruction.RotateRight, GenerateRotateRight);
+ Add(Instruction.ShiftLeft, GenerateShiftLeft);
+ Add(Instruction.ShiftRightSI, GenerateShiftRightSI);
+ Add(Instruction.ShiftRightUI, GenerateShiftRightUI);
+ Add(Instruction.SignExtend16, GenerateSignExtend16);
+ Add(Instruction.SignExtend32, GenerateSignExtend32);
+ Add(Instruction.SignExtend8, GenerateSignExtend8);
+ Add(Instruction.Spill, GenerateSpill);
+ Add(Instruction.SpillArg, GenerateSpillArg);
+ Add(Instruction.StackAlloc, GenerateStackAlloc);
+ Add(Instruction.Store, GenerateStore);
+ Add(Instruction.Store16, GenerateStore16);
+ Add(Instruction.Store8, GenerateStore8);
+ Add(Instruction.Subtract, GenerateSubtract);
+ Add(Instruction.Tailcall, GenerateTailcall);
+ Add(Instruction.VectorCreateScalar, GenerateVectorCreateScalar);
+ Add(Instruction.VectorExtract, GenerateVectorExtract);
+ Add(Instruction.VectorExtract16, GenerateVectorExtract16);
+ Add(Instruction.VectorExtract8, GenerateVectorExtract8);
+ Add(Instruction.VectorInsert, GenerateVectorInsert);
+ Add(Instruction.VectorInsert16, GenerateVectorInsert16);
+ Add(Instruction.VectorInsert8, GenerateVectorInsert8);
+ Add(Instruction.VectorOne, GenerateVectorOne);
+ Add(Instruction.VectorZero, GenerateVectorZero);
+ Add(Instruction.VectorZeroUpper64, GenerateVectorZeroUpper64);
+ Add(Instruction.VectorZeroUpper96, GenerateVectorZeroUpper96);
+ Add(Instruction.ZeroExtend16, GenerateZeroExtend16);
+ Add(Instruction.ZeroExtend32, GenerateZeroExtend32);
+ Add(Instruction.ZeroExtend8, GenerateZeroExtend8);
+
+ static void Add(Instruction inst, Action<CodeGenContext, Operation> func)
+ {
+ _instTable[(int)inst] = func;
+ }
+ }
+
+ public static CompiledFunction Generate(CompilerContext cctx)
+ {
+ ControlFlowGraph cfg = cctx.Cfg;
+
+ Logger.StartPass(PassName.Optimization);
+
+ if (cctx.Options.HasFlag(CompilerOptions.Optimize))
+ {
+ if (cctx.Options.HasFlag(CompilerOptions.SsaForm))
+ {
+ Optimizer.RunPass(cfg);
+ }
+
+ BlockPlacement.RunPass(cfg);
+ }
+
+ X86Optimizer.RunPass(cfg);
+
+ Logger.EndPass(PassName.Optimization, cfg);
+
+ Logger.StartPass(PassName.PreAllocation);
+
+ StackAllocator stackAlloc = new();
+
+ PreAllocator.RunPass(cctx, stackAlloc, out int maxCallArgs);
+
+ Logger.EndPass(PassName.PreAllocation, cfg);
+
+ Logger.StartPass(PassName.RegisterAllocation);
+
+ if (cctx.Options.HasFlag(CompilerOptions.SsaForm))
+ {
+ Ssa.Deconstruct(cfg);
+ }
+
+ IRegisterAllocator regAlloc;
+
+ if (cctx.Options.HasFlag(CompilerOptions.Lsra))
+ {
+ regAlloc = new LinearScanAllocator();
+ }
+ else
+ {
+ regAlloc = new HybridAllocator();
+ }
+
+ RegisterMasks regMasks = new(
+ CallingConvention.GetIntAvailableRegisters(),
+ CallingConvention.GetVecAvailableRegisters(),
+ CallingConvention.GetIntCallerSavedRegisters(),
+ CallingConvention.GetVecCallerSavedRegisters(),
+ CallingConvention.GetIntCalleeSavedRegisters(),
+ CallingConvention.GetVecCalleeSavedRegisters(),
+ RegistersCount);
+
+ AllocationResult allocResult = regAlloc.RunPass(cfg, stackAlloc, regMasks);
+
+ Logger.EndPass(PassName.RegisterAllocation, cfg);
+
+ Logger.StartPass(PassName.CodeGeneration);
+
+ bool relocatable = (cctx.Options & CompilerOptions.Relocatable) != 0;
+
+ CodeGenContext context = new(allocResult, maxCallArgs, cfg.Blocks.Count, relocatable);
+
+ UnwindInfo unwindInfo = WritePrologue(context);
+
+ for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
+ {
+ context.EnterBlock(block);
+
+ for (Operation node = block.Operations.First; node != default; node = node.ListNext)
+ {
+ GenerateOperation(context, node);
+ }
+
+ if (block.SuccessorsCount == 0)
+ {
+ // The only blocks which can have 0 successors are exit blocks.
+ Operation last = block.Operations.Last;
+
+ Debug.Assert(last.Instruction == Instruction.Tailcall ||
+ last.Instruction == Instruction.Return);
+ }
+ else
+ {
+ BasicBlock succ = block.GetSuccessor(0);
+
+ if (succ != block.ListNext)
+ {
+ context.JumpTo(succ);
+ }
+ }
+ }
+
+ (byte[] code, RelocInfo relocInfo) = context.Assembler.GetCode();
+
+ Logger.EndPass(PassName.CodeGeneration);
+
+ return new CompiledFunction(code, unwindInfo, relocInfo);
+ }
+
+ private static void GenerateOperation(CodeGenContext context, Operation operation)
+ {
+ if (operation.Instruction == Instruction.Extended)
+ {
+ IntrinsicInfo info = IntrinsicTable.GetInfo(operation.Intrinsic);
+
+ switch (info.Type)
+ {
+ case IntrinsicType.Comis_:
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ switch (operation.Intrinsic)
+ {
+ case Intrinsic.X86Comisdeq:
+ context.Assembler.Comisd(src1, src2);
+ context.Assembler.Setcc(dest, X86Condition.Equal);
+ break;
+
+ case Intrinsic.X86Comisdge:
+ context.Assembler.Comisd(src1, src2);
+ context.Assembler.Setcc(dest, X86Condition.AboveOrEqual);
+ break;
+
+ case Intrinsic.X86Comisdlt:
+ context.Assembler.Comisd(src1, src2);
+ context.Assembler.Setcc(dest, X86Condition.Below);
+ break;
+
+ case Intrinsic.X86Comisseq:
+ context.Assembler.Comiss(src1, src2);
+ context.Assembler.Setcc(dest, X86Condition.Equal);
+ break;
+
+ case Intrinsic.X86Comissge:
+ context.Assembler.Comiss(src1, src2);
+ context.Assembler.Setcc(dest, X86Condition.AboveOrEqual);
+ break;
+
+ case Intrinsic.X86Comisslt:
+ context.Assembler.Comiss(src1, src2);
+ context.Assembler.Setcc(dest, X86Condition.Below);
+ break;
+ }
+
+ context.Assembler.Movzx8(dest, dest, OperandType.I32);
+
+ break;
+ }
+
+ case IntrinsicType.Mxcsr:
+ {
+ Operand offset = operation.GetSource(0);
+
+ Debug.Assert(offset.Kind == OperandKind.Constant);
+ Debug.Assert(offset.Type == OperandType.I32);
+
+ int offs = offset.AsInt32() + context.CallArgsRegionSize;
+
+ Operand rsp = Register(X86Register.Rsp);
+ Operand memOp = MemoryOp(OperandType.I32, rsp, default, Multiplier.x1, offs);
+
+ Debug.Assert(HardwareCapabilities.SupportsSse || HardwareCapabilities.SupportsVexEncoding);
+
+ if (operation.Intrinsic == Intrinsic.X86Ldmxcsr)
+ {
+ Operand bits = operation.GetSource(1);
+ Debug.Assert(bits.Type == OperandType.I32);
+
+ context.Assembler.Mov(memOp, bits, OperandType.I32);
+ context.Assembler.Ldmxcsr(memOp);
+ }
+ else if (operation.Intrinsic == Intrinsic.X86Stmxcsr)
+ {
+ Operand dest = operation.Destination;
+ Debug.Assert(dest.Type == OperandType.I32);
+
+ context.Assembler.Stmxcsr(memOp);
+ context.Assembler.Mov(dest, memOp, OperandType.I32);
+ }
+
+ break;
+ }
+
+ case IntrinsicType.PopCount:
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ EnsureSameType(dest, source);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.Popcnt(dest, source, dest.Type);
+
+ break;
+ }
+
+ case IntrinsicType.Unary:
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ EnsureSameType(dest, source);
+
+ Debug.Assert(!dest.Type.IsInteger());
+
+ context.Assembler.WriteInstruction(info.Inst, dest, source);
+
+ break;
+ }
+
+ case IntrinsicType.UnaryToGpr:
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && !source.Type.IsInteger());
+
+ if (operation.Intrinsic == Intrinsic.X86Cvtsi2si)
+ {
+ if (dest.Type == OperandType.I32)
+ {
+ context.Assembler.Movd(dest, source); // int _mm_cvtsi128_si32(__m128i a)
+ }
+ else /* if (dest.Type == OperandType.I64) */
+ {
+ context.Assembler.Movq(dest, source); // __int64 _mm_cvtsi128_si64(__m128i a)
+ }
+ }
+ else
+ {
+ context.Assembler.WriteInstruction(info.Inst, dest, source, dest.Type);
+ }
+
+ break;
+ }
+
+ case IntrinsicType.Binary:
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ EnsureSameType(dest, src1);
+
+ if (!HardwareCapabilities.SupportsVexEncoding)
+ {
+ EnsureSameReg(dest, src1);
+ }
+
+ Debug.Assert(!dest.Type.IsInteger());
+ Debug.Assert(!src2.Type.IsInteger() || src2.Kind == OperandKind.Constant);
+
+ context.Assembler.WriteInstruction(info.Inst, dest, src1, src2);
+
+ break;
+ }
+
+ case IntrinsicType.BinaryGpr:
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ EnsureSameType(dest, src1);
+
+ if (!HardwareCapabilities.SupportsVexEncoding)
+ {
+ EnsureSameReg(dest, src1);
+ }
+
+ Debug.Assert(!dest.Type.IsInteger() && src2.Type.IsInteger());
+
+ context.Assembler.WriteInstruction(info.Inst, dest, src1, src2, src2.Type);
+
+ break;
+ }
+
+ case IntrinsicType.Crc32:
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ EnsureSameReg(dest, src1);
+
+ Debug.Assert(dest.Type.IsInteger() && src1.Type.IsInteger() && src2.Type.IsInteger());
+
+ context.Assembler.WriteInstruction(info.Inst, dest, src2, dest.Type);
+
+ break;
+ }
+
+ case IntrinsicType.BinaryImm:
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ EnsureSameType(dest, src1);
+
+ if (!HardwareCapabilities.SupportsVexEncoding)
+ {
+ EnsureSameReg(dest, src1);
+ }
+
+ Debug.Assert(!dest.Type.IsInteger() && src2.Kind == OperandKind.Constant);
+
+ context.Assembler.WriteInstruction(info.Inst, dest, src1, src2.AsByte());
+
+ break;
+ }
+
+ case IntrinsicType.Ternary:
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+ Operand src3 = operation.GetSource(2);
+
+ EnsureSameType(dest, src1, src2, src3);
+
+ Debug.Assert(!dest.Type.IsInteger());
+
+ if (info.Inst == X86Instruction.Blendvpd && HardwareCapabilities.SupportsVexEncoding)
+ {
+ context.Assembler.WriteInstruction(X86Instruction.Vblendvpd, dest, src1, src2, src3);
+ }
+ else if (info.Inst == X86Instruction.Blendvps && HardwareCapabilities.SupportsVexEncoding)
+ {
+ context.Assembler.WriteInstruction(X86Instruction.Vblendvps, dest, src1, src2, src3);
+ }
+ else if (info.Inst == X86Instruction.Pblendvb && HardwareCapabilities.SupportsVexEncoding)
+ {
+ context.Assembler.WriteInstruction(X86Instruction.Vpblendvb, dest, src1, src2, src3);
+ }
+ else
+ {
+ EnsureSameReg(dest, src1);
+
+ Debug.Assert(src3.GetRegister().Index == 0);
+
+ context.Assembler.WriteInstruction(info.Inst, dest, src1, src2);
+ }
+
+ break;
+ }
+
+ case IntrinsicType.TernaryImm:
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+ Operand src3 = operation.GetSource(2);
+
+ EnsureSameType(dest, src1, src2);
+
+ if (!HardwareCapabilities.SupportsVexEncoding)
+ {
+ EnsureSameReg(dest, src1);
+ }
+
+ Debug.Assert(!dest.Type.IsInteger() && src3.Kind == OperandKind.Constant);
+
+ context.Assembler.WriteInstruction(info.Inst, dest, src1, src2, src3.AsByte());
+
+ break;
+ }
+
+ case IntrinsicType.Fma:
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+ Operand src3 = operation.GetSource(2);
+
+ Debug.Assert(HardwareCapabilities.SupportsVexEncoding);
+
+ Debug.Assert(dest.Kind == OperandKind.Register && src1.Kind == OperandKind.Register && src2.Kind == OperandKind.Register);
+ Debug.Assert(src3.Kind == OperandKind.Register || src3.Kind == OperandKind.Memory);
+
+ EnsureSameType(dest, src1, src2, src3);
+ Debug.Assert(dest.Type == OperandType.V128);
+
+ Debug.Assert(dest.Value == src1.Value);
+
+ context.Assembler.WriteInstruction(info.Inst, dest, src2, src3);
+
+ break;
+ }
+ }
+ }
+ else
+ {
+ Action<CodeGenContext, Operation> func = _instTable[(int)operation.Instruction];
+
+ if (func != null)
+ {
+ func(context, operation);
+ }
+ else
+ {
+ throw new ArgumentException($"Invalid instruction \"{operation.Instruction}\".");
+ }
+ }
+ }
+
+ private static void GenerateAdd(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ if (dest.Type.IsInteger())
+ {
+ // If Destination and Source 1 Operands are the same, perform a standard add as there are no benefits to using LEA.
+ if (dest.Kind == src1.Kind && dest.Value == src1.Value)
+ {
+ ValidateBinOp(dest, src1, src2);
+
+ context.Assembler.Add(dest, src2, dest.Type);
+ }
+ else
+ {
+ EnsureSameType(dest, src1, src2);
+
+ int offset;
+ Operand index;
+
+ if (src2.Kind == OperandKind.Constant)
+ {
+ offset = src2.AsInt32();
+ index = default;
+ }
+ else
+ {
+ offset = 0;
+ index = src2;
+ }
+
+ Operand memOp = MemoryOp(dest.Type, src1, index, Multiplier.x1, offset);
+
+ context.Assembler.Lea(dest, memOp, dest.Type);
+ }
+ }
+ else
+ {
+ ValidateBinOp(dest, src1, src2);
+
+ if (dest.Type == OperandType.FP32)
+ {
+ context.Assembler.Addss(dest, src1, src2);
+ }
+ else /* if (dest.Type == OperandType.FP64) */
+ {
+ context.Assembler.Addsd(dest, src1, src2);
+ }
+ }
+ }
+
+ private static void GenerateBitwiseAnd(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateBinOp(dest, src1, src2);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ // Note: GenerateCompareCommon makes the assumption that BitwiseAnd will emit only a single `and`
+ // instruction.
+ context.Assembler.And(dest, src2, dest.Type);
+ }
+
+ private static void GenerateBitwiseExclusiveOr(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateBinOp(dest, src1, src2);
+
+ if (dest.Type.IsInteger())
+ {
+ context.Assembler.Xor(dest, src2, dest.Type);
+ }
+ else
+ {
+ context.Assembler.Xorps(dest, src1, src2);
+ }
+ }
+
+ private static void GenerateBitwiseNot(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ ValidateUnOp(dest, source);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.Not(dest);
+ }
+
+ private static void GenerateBitwiseOr(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateBinOp(dest, src1, src2);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.Or(dest, src2, dest.Type);
+ }
+
+ private static void GenerateBranchIf(CodeGenContext context, Operation operation)
+ {
+ Operand comp = operation.GetSource(2);
+
+ Debug.Assert(comp.Kind == OperandKind.Constant);
+
+ var cond = ((Comparison)comp.AsInt32()).ToX86Condition();
+
+ GenerateCompareCommon(context, operation);
+
+ context.JumpTo(cond, context.CurrBlock.GetSuccessor(1));
+ }
+
+ private static void GenerateByteSwap(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ ValidateUnOp(dest, source);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.Bswap(dest);
+ }
+
+ private static void GenerateCall(CodeGenContext context, Operation operation)
+ {
+ context.Assembler.Call(operation.GetSource(0));
+ }
+
+ private static void GenerateClobber(CodeGenContext context, Operation operation)
+ {
+ // This is only used to indicate that a register is clobbered to the
+ // register allocator, we don't need to produce any code.
+ }
+
+ private static void GenerateCompare(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand comp = operation.GetSource(2);
+
+ Debug.Assert(dest.Type == OperandType.I32);
+ Debug.Assert(comp.Kind == OperandKind.Constant);
+
+ var cond = ((Comparison)comp.AsInt32()).ToX86Condition();
+
+ GenerateCompareCommon(context, operation);
+
+ context.Assembler.Setcc(dest, cond);
+ context.Assembler.Movzx8(dest, dest, OperandType.I32);
+ }
+
+ private static void GenerateCompareCommon(CodeGenContext context, Operation operation)
+ {
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ EnsureSameType(src1, src2);
+
+ Debug.Assert(src1.Type.IsInteger());
+
+ if (src2.Kind == OperandKind.Constant && src2.Value == 0)
+ {
+ if (MatchOperation(operation.ListPrevious, Instruction.BitwiseAnd, src1.Type, src1.GetRegister()))
+ {
+ // Since the `test` and `and` instruction set the status flags in the same way, we can omit the
+ // `test r,r` instruction when it is immediately preceded by an `and r,*` instruction.
+ //
+ // For example:
+ //
+ // and eax, 0x3
+ // test eax, eax
+ // jz .L0
+ //
+ // =>
+ //
+ // and eax, 0x3
+ // jz .L0
+ }
+ else
+ {
+ context.Assembler.Test(src1, src1, src1.Type);
+ }
+ }
+ else
+ {
+ context.Assembler.Cmp(src1, src2, src1.Type);
+ }
+ }
+
+ private static void GenerateCompareAndSwap(CodeGenContext context, Operation operation)
+ {
+ Operand src1 = operation.GetSource(0);
+
+ if (operation.SourcesCount == 5) // CompareAndSwap128 has 5 sources, compared to CompareAndSwap64/32's 3.
+ {
+ Operand memOp = MemoryOp(OperandType.I64, src1);
+
+ context.Assembler.Cmpxchg16b(memOp);
+ }
+ else
+ {
+ Operand src2 = operation.GetSource(1);
+ Operand src3 = operation.GetSource(2);
+
+ EnsureSameType(src2, src3);
+
+ Operand memOp = MemoryOp(src3.Type, src1);
+
+ context.Assembler.Cmpxchg(memOp, src3);
+ }
+ }
+
+ private static void GenerateCompareAndSwap16(CodeGenContext context, Operation operation)
+ {
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+ Operand src3 = operation.GetSource(2);
+
+ EnsureSameType(src2, src3);
+
+ Operand memOp = MemoryOp(src3.Type, src1);
+
+ context.Assembler.Cmpxchg16(memOp, src3);
+ }
+
+ private static void GenerateCompareAndSwap8(CodeGenContext context, Operation operation)
+ {
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+ Operand src3 = operation.GetSource(2);
+
+ EnsureSameType(src2, src3);
+
+ Operand memOp = MemoryOp(src3.Type, src1);
+
+ context.Assembler.Cmpxchg8(memOp, src3);
+ }
+
+ private static void GenerateConditionalSelect(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+ Operand src3 = operation.GetSource(2);
+
+ EnsureSameReg (dest, src3);
+ EnsureSameType(dest, src2, src3);
+
+ Debug.Assert(dest.Type.IsInteger());
+ Debug.Assert(src1.Type == OperandType.I32);
+
+ context.Assembler.Test (src1, src1, src1.Type);
+ context.Assembler.Cmovcc(dest, src2, dest.Type, X86Condition.NotEqual);
+ }
+
+ private static void GenerateConvertI64ToI32(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type == OperandType.I32 && source.Type == OperandType.I64);
+
+ context.Assembler.Mov(dest, source, OperandType.I32);
+ }
+
+ private static void GenerateConvertToFP(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type == OperandType.FP32 || dest.Type == OperandType.FP64);
+
+ if (dest.Type == OperandType.FP32)
+ {
+ Debug.Assert(source.Type.IsInteger() || source.Type == OperandType.FP64);
+
+ if (source.Type.IsInteger())
+ {
+ context.Assembler.Xorps (dest, dest, dest);
+ context.Assembler.Cvtsi2ss(dest, dest, source, source.Type);
+ }
+ else /* if (source.Type == OperandType.FP64) */
+ {
+ context.Assembler.Cvtsd2ss(dest, dest, source);
+
+ GenerateZeroUpper96(context, dest, dest);
+ }
+ }
+ else /* if (dest.Type == OperandType.FP64) */
+ {
+ Debug.Assert(source.Type.IsInteger() || source.Type == OperandType.FP32);
+
+ if (source.Type.IsInteger())
+ {
+ context.Assembler.Xorps (dest, dest, dest);
+ context.Assembler.Cvtsi2sd(dest, dest, source, source.Type);
+ }
+ else /* if (source.Type == OperandType.FP32) */
+ {
+ context.Assembler.Cvtss2sd(dest, dest, source);
+
+ GenerateZeroUpper64(context, dest, dest);
+ }
+ }
+ }
+
+ private static void GenerateCopy(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ EnsureSameType(dest, source);
+
+ Debug.Assert(dest.Type.IsInteger() || source.Kind != OperandKind.Constant);
+
+ // Moves to the same register are useless.
+ if (dest.Kind == source.Kind && dest.Value == source.Value)
+ {
+ return;
+ }
+
+ if (dest.Kind == OperandKind.Register &&
+ source.Kind == OperandKind.Constant && source.Value == 0)
+ {
+ // Assemble "mov reg, 0" as "xor reg, reg" as the later is more efficient.
+ context.Assembler.Xor(dest, dest, OperandType.I32);
+ }
+ else if (dest.Type.IsInteger())
+ {
+ context.Assembler.Mov(dest, source, dest.Type);
+ }
+ else
+ {
+ context.Assembler.Movdqu(dest, source);
+ }
+ }
+
+ private static void GenerateCountLeadingZeros(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ EnsureSameType(dest, source);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.Bsr(dest, source, dest.Type);
+
+ int operandSize = dest.Type == OperandType.I32 ? 32 : 64;
+ int operandMask = operandSize - 1;
+
+ // When the input operand is 0, the result is undefined, however the
+ // ZF flag is set. We are supposed to return the operand size on that
+ // case. So, add an additional jump to handle that case, by moving the
+ // operand size constant to the destination register.
+ Operand neLabel = Label();
+
+ context.Assembler.Jcc(X86Condition.NotEqual, neLabel);
+
+ context.Assembler.Mov(dest, Const(operandSize | operandMask), OperandType.I32);
+
+ context.Assembler.MarkLabel(neLabel);
+
+ // BSR returns the zero based index of the last bit set on the operand,
+ // starting from the least significant bit. However we are supposed to
+ // return the number of 0 bits on the high end. So, we invert the result
+ // of the BSR using XOR to get the correct value.
+ context.Assembler.Xor(dest, Const(operandMask), OperandType.I32);
+ }
+
+ private static void GenerateDivide(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand dividend = operation.GetSource(0);
+ Operand divisor = operation.GetSource(1);
+
+ if (!dest.Type.IsInteger())
+ {
+ ValidateBinOp(dest, dividend, divisor);
+ }
+
+ if (dest.Type.IsInteger())
+ {
+ divisor = operation.GetSource(2);
+
+ EnsureSameType(dest, divisor);
+
+ if (divisor.Type == OperandType.I32)
+ {
+ context.Assembler.Cdq();
+ }
+ else
+ {
+ context.Assembler.Cqo();
+ }
+
+ context.Assembler.Idiv(divisor);
+ }
+ else if (dest.Type == OperandType.FP32)
+ {
+ context.Assembler.Divss(dest, dividend, divisor);
+ }
+ else /* if (dest.Type == OperandType.FP64) */
+ {
+ context.Assembler.Divsd(dest, dividend, divisor);
+ }
+ }
+
+ private static void GenerateDivideUI(CodeGenContext context, Operation operation)
+ {
+ Operand divisor = operation.GetSource(2);
+
+ Operand rdx = Register(X86Register.Rdx);
+
+ Debug.Assert(divisor.Type.IsInteger());
+
+ context.Assembler.Xor(rdx, rdx, OperandType.I32);
+ context.Assembler.Div(divisor);
+ }
+
+ private static void GenerateFill(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand offset = operation.GetSource(0);
+
+ Debug.Assert(offset.Kind == OperandKind.Constant);
+
+ int offs = offset.AsInt32() + context.CallArgsRegionSize;
+
+ Operand rsp = Register(X86Register.Rsp);
+
+ Operand memOp = MemoryOp(dest.Type, rsp, default, Multiplier.x1, offs);
+
+ GenerateLoad(context, memOp, dest);
+ }
+
+ private static void GenerateLoad(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.Destination;
+ Operand address = Memory(operation.GetSource(0), value.Type);
+
+ GenerateLoad(context, address, value);
+ }
+
+ private static void GenerateLoad16(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.Destination;
+ Operand address = Memory(operation.GetSource(0), value.Type);
+
+ Debug.Assert(value.Type.IsInteger());
+
+ context.Assembler.Movzx16(value, address, value.Type);
+ }
+
+ private static void GenerateLoad8(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.Destination;
+ Operand address = Memory(operation.GetSource(0), value.Type);
+
+ Debug.Assert(value.Type.IsInteger());
+
+ context.Assembler.Movzx8(value, address, value.Type);
+ }
+
+ private static void GenerateMemoryBarrier(CodeGenContext context, Operation operation)
+ {
+ context.Assembler.LockOr(MemoryOp(OperandType.I64, Register(X86Register.Rsp)), Const(0), OperandType.I32);
+ }
+
+ private static void GenerateMultiply(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ if (src2.Kind != OperandKind.Constant)
+ {
+ EnsureSameReg(dest, src1);
+ }
+
+ EnsureSameType(dest, src1, src2);
+
+ if (dest.Type.IsInteger())
+ {
+ if (src2.Kind == OperandKind.Constant)
+ {
+ context.Assembler.Imul(dest, src1, src2, dest.Type);
+ }
+ else
+ {
+ context.Assembler.Imul(dest, src2, dest.Type);
+ }
+ }
+ else if (dest.Type == OperandType.FP32)
+ {
+ context.Assembler.Mulss(dest, src1, src2);
+ }
+ else /* if (dest.Type == OperandType.FP64) */
+ {
+ context.Assembler.Mulsd(dest, src1, src2);
+ }
+ }
+
+ private static void GenerateMultiply64HighSI(CodeGenContext context, Operation operation)
+ {
+ Operand source = operation.GetSource(1);
+
+ Debug.Assert(source.Type == OperandType.I64);
+
+ context.Assembler.Imul(source);
+ }
+
+ private static void GenerateMultiply64HighUI(CodeGenContext context, Operation operation)
+ {
+ Operand source = operation.GetSource(1);
+
+ Debug.Assert(source.Type == OperandType.I64);
+
+ context.Assembler.Mul(source);
+ }
+
+ private static void GenerateNegate(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ ValidateUnOp(dest, source);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.Neg(dest);
+ }
+
+ private static void GenerateReturn(CodeGenContext context, Operation operation)
+ {
+ WriteEpilogue(context);
+
+ context.Assembler.Return();
+ }
+
+ private static void GenerateRotateRight(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateShift(dest, src1, src2);
+
+ context.Assembler.Ror(dest, src2, dest.Type);
+ }
+
+ private static void GenerateShiftLeft(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateShift(dest, src1, src2);
+
+ context.Assembler.Shl(dest, src2, dest.Type);
+ }
+
+ private static void GenerateShiftRightSI(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateShift(dest, src1, src2);
+
+ context.Assembler.Sar(dest, src2, dest.Type);
+ }
+
+ private static void GenerateShiftRightUI(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateShift(dest, src1, src2);
+
+ context.Assembler.Shr(dest, src2, dest.Type);
+ }
+
+ private static void GenerateSignExtend16(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ context.Assembler.Movsx16(dest, source, dest.Type);
+ }
+
+ private static void GenerateSignExtend32(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ context.Assembler.Movsx32(dest, source, dest.Type);
+ }
+
+ private static void GenerateSignExtend8(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ context.Assembler.Movsx8(dest, source, dest.Type);
+ }
+
+ private static void GenerateSpill(CodeGenContext context, Operation operation)
+ {
+ GenerateSpill(context, operation, context.CallArgsRegionSize);
+ }
+
+ private static void GenerateSpillArg(CodeGenContext context, Operation operation)
+ {
+ GenerateSpill(context, operation, 0);
+ }
+
+ private static void GenerateSpill(CodeGenContext context, Operation operation, int baseOffset)
+ {
+ Operand offset = operation.GetSource(0);
+ Operand source = operation.GetSource(1);
+
+ Debug.Assert(offset.Kind == OperandKind.Constant);
+
+ int offs = offset.AsInt32() + baseOffset;
+
+ Operand rsp = Register(X86Register.Rsp);
+
+ Operand memOp = MemoryOp(source.Type, rsp, default, Multiplier.x1, offs);
+
+ GenerateStore(context, memOp, source);
+ }
+
+ private static void GenerateStackAlloc(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand offset = operation.GetSource(0);
+
+ Debug.Assert(offset.Kind == OperandKind.Constant);
+
+ int offs = offset.AsInt32() + context.CallArgsRegionSize;
+
+ Operand rsp = Register(X86Register.Rsp);
+
+ Operand memOp = MemoryOp(OperandType.I64, rsp, default, Multiplier.x1, offs);
+
+ context.Assembler.Lea(dest, memOp, OperandType.I64);
+ }
+
+ private static void GenerateStore(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.GetSource(1);
+ Operand address = Memory(operation.GetSource(0), value.Type);
+
+ GenerateStore(context, address, value);
+ }
+
+ private static void GenerateStore16(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.GetSource(1);
+ Operand address = Memory(operation.GetSource(0), value.Type);
+
+ Debug.Assert(value.Type.IsInteger());
+
+ context.Assembler.Mov16(address, value);
+ }
+
+ private static void GenerateStore8(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.GetSource(1);
+ Operand address = Memory(operation.GetSource(0), value.Type);
+
+ Debug.Assert(value.Type.IsInteger());
+
+ context.Assembler.Mov8(address, value);
+ }
+
+ private static void GenerateSubtract(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateBinOp(dest, src1, src2);
+
+ if (dest.Type.IsInteger())
+ {
+ context.Assembler.Sub(dest, src2, dest.Type);
+ }
+ else if (dest.Type == OperandType.FP32)
+ {
+ context.Assembler.Subss(dest, src1, src2);
+ }
+ else /* if (dest.Type == OperandType.FP64) */
+ {
+ context.Assembler.Subsd(dest, src1, src2);
+ }
+ }
+
+ private static void GenerateTailcall(CodeGenContext context, Operation operation)
+ {
+ WriteEpilogue(context);
+
+ context.Assembler.Jmp(operation.GetSource(0));
+ }
+
+ private static void GenerateVectorCreateScalar(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(!dest.Type.IsInteger() && source.Type.IsInteger());
+
+ if (source.Type == OperandType.I32)
+ {
+ context.Assembler.Movd(dest, source); // (__m128i _mm_cvtsi32_si128(int a))
+ }
+ else /* if (source.Type == OperandType.I64) */
+ {
+ context.Assembler.Movq(dest, source); // (__m128i _mm_cvtsi64_si128(__int64 a))
+ }
+ }
+
+ private static void GenerateVectorExtract(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination; //Value
+ Operand src1 = operation.GetSource(0); //Vector
+ Operand src2 = operation.GetSource(1); //Index
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src2.Kind == OperandKind.Constant);
+
+ byte index = src2.AsByte();
+
+ Debug.Assert(index < OperandType.V128.GetSizeInBytes() / dest.Type.GetSizeInBytes());
+
+ if (dest.Type == OperandType.I32)
+ {
+ if (index == 0)
+ {
+ context.Assembler.Movd(dest, src1);
+ }
+ else if (HardwareCapabilities.SupportsSse41)
+ {
+ context.Assembler.Pextrd(dest, src1, index);
+ }
+ else
+ {
+ int mask0 = 0b11_10_01_00;
+ int mask1 = 0b11_10_01_00;
+
+ mask0 = BitUtils.RotateRight(mask0, index * 2, 8);
+ mask1 = BitUtils.RotateRight(mask1, 8 - index * 2, 8);
+
+ context.Assembler.Pshufd(src1, src1, (byte)mask0);
+ context.Assembler.Movd (dest, src1);
+ context.Assembler.Pshufd(src1, src1, (byte)mask1);
+ }
+ }
+ else if (dest.Type == OperandType.I64)
+ {
+ if (index == 0)
+ {
+ context.Assembler.Movq(dest, src1);
+ }
+ else if (HardwareCapabilities.SupportsSse41)
+ {
+ context.Assembler.Pextrq(dest, src1, index);
+ }
+ else
+ {
+ const byte mask = 0b01_00_11_10;
+
+ context.Assembler.Pshufd(src1, src1, mask);
+ context.Assembler.Movq (dest, src1);
+ context.Assembler.Pshufd(src1, src1, mask);
+ }
+ }
+ else
+ {
+ // Floating-point types.
+ if ((index >= 2 && dest.Type == OperandType.FP32) ||
+ (index == 1 && dest.Type == OperandType.FP64))
+ {
+ context.Assembler.Movhlps(dest, dest, src1);
+ context.Assembler.Movq (dest, dest);
+ }
+ else
+ {
+ context.Assembler.Movq(dest, src1);
+ }
+
+ if (dest.Type == OperandType.FP32)
+ {
+ context.Assembler.Pshufd(dest, dest, (byte)(0xfc | (index & 1)));
+ }
+ }
+ }
+
+ private static void GenerateVectorExtract16(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination; //Value
+ Operand src1 = operation.GetSource(0); //Vector
+ Operand src2 = operation.GetSource(1); //Index
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src2.Kind == OperandKind.Constant);
+
+ byte index = src2.AsByte();
+
+ Debug.Assert(index < 8);
+
+ context.Assembler.Pextrw(dest, src1, index);
+ }
+
+ private static void GenerateVectorExtract8(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination; //Value
+ Operand src1 = operation.GetSource(0); //Vector
+ Operand src2 = operation.GetSource(1); //Index
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src2.Kind == OperandKind.Constant);
+
+ byte index = src2.AsByte();
+
+ Debug.Assert(index < 16);
+
+ if (HardwareCapabilities.SupportsSse41)
+ {
+ context.Assembler.Pextrb(dest, src1, index);
+ }
+ else
+ {
+ context.Assembler.Pextrw(dest, src1, (byte)(index >> 1));
+
+ if ((index & 1) != 0)
+ {
+ context.Assembler.Shr(dest, Const(8), OperandType.I32);
+ }
+ else
+ {
+ context.Assembler.Movzx8(dest, dest, OperandType.I32);
+ }
+ }
+ }
+
+ private static void GenerateVectorInsert(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0); //Vector
+ Operand src2 = operation.GetSource(1); //Value
+ Operand src3 = operation.GetSource(2); //Index
+
+ if (!HardwareCapabilities.SupportsVexEncoding)
+ {
+ EnsureSameReg(dest, src1);
+ }
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src3.Kind == OperandKind.Constant);
+
+ byte index = src3.AsByte();
+
+ void InsertIntSse2(int words)
+ {
+ if (dest.GetRegister() != src1.GetRegister())
+ {
+ context.Assembler.Movdqu(dest, src1);
+ }
+
+ for (int word = 0; word < words; word++)
+ {
+ // Insert lower 16-bits.
+ context.Assembler.Pinsrw(dest, dest, src2, (byte)(index * words + word));
+
+ // Move next word down.
+ context.Assembler.Ror(src2, Const(16), src2.Type);
+ }
+ }
+
+ if (src2.Type == OperandType.I32)
+ {
+ Debug.Assert(index < 4);
+
+ if (HardwareCapabilities.SupportsSse41)
+ {
+ context.Assembler.Pinsrd(dest, src1, src2, index);
+ }
+ else
+ {
+ InsertIntSse2(2);
+ }
+ }
+ else if (src2.Type == OperandType.I64)
+ {
+ Debug.Assert(index < 2);
+
+ if (HardwareCapabilities.SupportsSse41)
+ {
+ context.Assembler.Pinsrq(dest, src1, src2, index);
+ }
+ else
+ {
+ InsertIntSse2(4);
+ }
+ }
+ else if (src2.Type == OperandType.FP32)
+ {
+ Debug.Assert(index < 4);
+
+ if (index != 0)
+ {
+ if (HardwareCapabilities.SupportsSse41)
+ {
+ context.Assembler.Insertps(dest, src1, src2, (byte)(index << 4));
+ }
+ else
+ {
+ if (src1.GetRegister() == src2.GetRegister())
+ {
+ int mask = 0b11_10_01_00;
+
+ mask &= ~(0b11 << index * 2);
+
+ context.Assembler.Pshufd(dest, src1, (byte)mask);
+ }
+ else
+ {
+ int mask0 = 0b11_10_01_00;
+ int mask1 = 0b11_10_01_00;
+
+ mask0 = BitUtils.RotateRight(mask0, index * 2, 8);
+ mask1 = BitUtils.RotateRight(mask1, 8 - index * 2, 8);
+
+ context.Assembler.Pshufd(src1, src1, (byte)mask0); // Lane to be inserted in position 0.
+ context.Assembler.Movss (dest, src1, src2); // dest[127:0] = src1[127:32] | src2[31:0]
+ context.Assembler.Pshufd(dest, dest, (byte)mask1); // Inserted lane in original position.
+
+ if (dest.GetRegister() != src1.GetRegister())
+ {
+ context.Assembler.Pshufd(src1, src1, (byte)mask1); // Restore src1.
+ }
+ }
+ }
+ }
+ else
+ {
+ context.Assembler.Movss(dest, src1, src2);
+ }
+ }
+ else /* if (src2.Type == OperandType.FP64) */
+ {
+ Debug.Assert(index < 2);
+
+ if (index != 0)
+ {
+ context.Assembler.Movlhps(dest, src1, src2);
+ }
+ else
+ {
+ context.Assembler.Movsd(dest, src1, src2);
+ }
+ }
+ }
+
+ private static void GenerateVectorInsert16(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0); //Vector
+ Operand src2 = operation.GetSource(1); //Value
+ Operand src3 = operation.GetSource(2); //Index
+
+ if (!HardwareCapabilities.SupportsVexEncoding)
+ {
+ EnsureSameReg(dest, src1);
+ }
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src3.Kind == OperandKind.Constant);
+
+ byte index = src3.AsByte();
+
+ context.Assembler.Pinsrw(dest, src1, src2, index);
+ }
+
+ private static void GenerateVectorInsert8(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0); //Vector
+ Operand src2 = operation.GetSource(1); //Value
+ Operand src3 = operation.GetSource(2); //Index
+
+ // It's not possible to emulate this instruction without
+ // SSE 4.1 support without the use of a temporary register,
+ // so we instead handle that case on the pre-allocator when
+ // SSE 4.1 is not supported on the CPU.
+ Debug.Assert(HardwareCapabilities.SupportsSse41);
+
+ if (!HardwareCapabilities.SupportsVexEncoding)
+ {
+ EnsureSameReg(dest, src1);
+ }
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src3.Kind == OperandKind.Constant);
+
+ byte index = src3.AsByte();
+
+ context.Assembler.Pinsrb(dest, src1, src2, index);
+ }
+
+ private static void GenerateVectorOne(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+
+ Debug.Assert(!dest.Type.IsInteger());
+
+ context.Assembler.Pcmpeqw(dest, dest, dest);
+ }
+
+ private static void GenerateVectorZero(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+
+ Debug.Assert(!dest.Type.IsInteger());
+
+ context.Assembler.Xorps(dest, dest, dest);
+ }
+
+ private static void GenerateVectorZeroUpper64(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type == OperandType.V128 && source.Type == OperandType.V128);
+
+ GenerateZeroUpper64(context, dest, source);
+ }
+
+ private static void GenerateVectorZeroUpper96(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type == OperandType.V128 && source.Type == OperandType.V128);
+
+ GenerateZeroUpper96(context, dest, source);
+ }
+
+ private static void GenerateZeroExtend16(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ context.Assembler.Movzx16(dest, source, OperandType.I32);
+ }
+
+ private static void GenerateZeroExtend32(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ // We can eliminate the move if source is already 32-bit and the registers are the same.
+ if (dest.Value == source.Value && source.Type == OperandType.I32)
+ {
+ return;
+ }
+
+ context.Assembler.Mov(dest, source, OperandType.I32);
+ }
+
+ private static void GenerateZeroExtend8(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ context.Assembler.Movzx8(dest, source, OperandType.I32);
+ }
+
+ private static void GenerateLoad(CodeGenContext context, Operand address, Operand value)
+ {
+ switch (value.Type)
+ {
+ case OperandType.I32: context.Assembler.Mov (value, address, OperandType.I32); break;
+ case OperandType.I64: context.Assembler.Mov (value, address, OperandType.I64); break;
+ case OperandType.FP32: context.Assembler.Movd (value, address); break;
+ case OperandType.FP64: context.Assembler.Movq (value, address); break;
+ case OperandType.V128: context.Assembler.Movdqu(value, address); break;
+
+ default: Debug.Assert(false); break;
+ }
+ }
+
+ private static void GenerateStore(CodeGenContext context, Operand address, Operand value)
+ {
+ switch (value.Type)
+ {
+ case OperandType.I32: context.Assembler.Mov (address, value, OperandType.I32); break;
+ case OperandType.I64: context.Assembler.Mov (address, value, OperandType.I64); break;
+ case OperandType.FP32: context.Assembler.Movd (address, value); break;
+ case OperandType.FP64: context.Assembler.Movq (address, value); break;
+ case OperandType.V128: context.Assembler.Movdqu(address, value); break;
+
+ default: Debug.Assert(false); break;
+ }
+ }
+
+ private static void GenerateZeroUpper64(CodeGenContext context, Operand dest, Operand source)
+ {
+ context.Assembler.Movq(dest, source);
+ }
+
+ private static void GenerateZeroUpper96(CodeGenContext context, Operand dest, Operand source)
+ {
+ context.Assembler.Movq(dest, source);
+ context.Assembler.Pshufd(dest, dest, 0xfc);
+ }
+
+ private static bool MatchOperation(Operation node, Instruction inst, OperandType destType, Register destReg)
+ {
+ if (node == default || node.DestinationsCount == 0)
+ {
+ return false;
+ }
+
+ if (node.Instruction != inst)
+ {
+ return false;
+ }
+
+ Operand dest = node.Destination;
+
+ return dest.Kind == OperandKind.Register &&
+ dest.Type == destType &&
+ dest.GetRegister() == destReg;
+ }
+
+ [Conditional("DEBUG")]
+ private static void ValidateUnOp(Operand dest, Operand source)
+ {
+ EnsureSameReg (dest, source);
+ EnsureSameType(dest, source);
+ }
+
+ [Conditional("DEBUG")]
+ private static void ValidateBinOp(Operand dest, Operand src1, Operand src2)
+ {
+ EnsureSameReg (dest, src1);
+ EnsureSameType(dest, src1, src2);
+ }
+
+ [Conditional("DEBUG")]
+ private static void ValidateShift(Operand dest, Operand src1, Operand src2)
+ {
+ EnsureSameReg (dest, src1);
+ EnsureSameType(dest, src1);
+
+ Debug.Assert(dest.Type.IsInteger() && src2.Type == OperandType.I32);
+ }
+
+ private static void EnsureSameReg(Operand op1, Operand op2)
+ {
+ if (!op1.Type.IsInteger() && HardwareCapabilities.SupportsVexEncoding)
+ {
+ return;
+ }
+
+ Debug.Assert(op1.Kind == OperandKind.Register || op1.Kind == OperandKind.Memory);
+ Debug.Assert(op1.Kind == op2.Kind);
+ Debug.Assert(op1.Value == op2.Value);
+ }
+
+ private static void EnsureSameType(Operand op1, Operand op2)
+ {
+ Debug.Assert(op1.Type == op2.Type);
+ }
+
+ private static void EnsureSameType(Operand op1, Operand op2, Operand op3)
+ {
+ Debug.Assert(op1.Type == op2.Type);
+ Debug.Assert(op1.Type == op3.Type);
+ }
+
+ private static void EnsureSameType(Operand op1, Operand op2, Operand op3, Operand op4)
+ {
+ Debug.Assert(op1.Type == op2.Type);
+ Debug.Assert(op1.Type == op3.Type);
+ Debug.Assert(op1.Type == op4.Type);
+ }
+
+ private static UnwindInfo WritePrologue(CodeGenContext context)
+ {
+ List<UnwindPushEntry> pushEntries = new List<UnwindPushEntry>();
+
+ Operand rsp = Register(X86Register.Rsp);
+
+ int mask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.IntUsedRegisters;
+
+ while (mask != 0)
+ {
+ int bit = BitOperations.TrailingZeroCount(mask);
+
+ context.Assembler.Push(Register((X86Register)bit));
+
+ pushEntries.Add(new UnwindPushEntry(UnwindPseudoOp.PushReg, context.StreamOffset, regIndex: bit));
+
+ mask &= ~(1 << bit);
+ }
+
+ int reservedStackSize = context.CallArgsRegionSize + context.AllocResult.SpillRegionSize;
+
+ reservedStackSize += context.XmmSaveRegionSize;
+
+ if (reservedStackSize >= StackGuardSize)
+ {
+ GenerateInlineStackProbe(context, reservedStackSize);
+ }
+
+ if (reservedStackSize != 0)
+ {
+ context.Assembler.Sub(rsp, Const(reservedStackSize), OperandType.I64);
+
+ pushEntries.Add(new UnwindPushEntry(UnwindPseudoOp.AllocStack, context.StreamOffset, stackOffsetOrAllocSize: reservedStackSize));
+ }
+
+ int offset = reservedStackSize;
+
+ mask = CallingConvention.GetVecCalleeSavedRegisters() & context.AllocResult.VecUsedRegisters;
+
+ while (mask != 0)
+ {
+ int bit = BitOperations.TrailingZeroCount(mask);
+
+ offset -= 16;
+
+ Operand memOp = MemoryOp(OperandType.V128, rsp, default, Multiplier.x1, offset);
+
+ context.Assembler.Movdqu(memOp, Xmm((X86Register)bit));
+
+ pushEntries.Add(new UnwindPushEntry(UnwindPseudoOp.SaveXmm128, context.StreamOffset, bit, offset));
+
+ mask &= ~(1 << bit);
+ }
+
+ return new UnwindInfo(pushEntries.ToArray(), context.StreamOffset);
+ }
+
+ private static void WriteEpilogue(CodeGenContext context)
+ {
+ Operand rsp = Register(X86Register.Rsp);
+
+ int reservedStackSize = context.CallArgsRegionSize + context.AllocResult.SpillRegionSize;
+
+ reservedStackSize += context.XmmSaveRegionSize;
+
+ int offset = reservedStackSize;
+
+ int mask = CallingConvention.GetVecCalleeSavedRegisters() & context.AllocResult.VecUsedRegisters;
+
+ while (mask != 0)
+ {
+ int bit = BitOperations.TrailingZeroCount(mask);
+
+ offset -= 16;
+
+ Operand memOp = MemoryOp(OperandType.V128, rsp, default, Multiplier.x1, offset);
+
+ context.Assembler.Movdqu(Xmm((X86Register)bit), memOp);
+
+ mask &= ~(1 << bit);
+ }
+
+ if (reservedStackSize != 0)
+ {
+ context.Assembler.Add(rsp, Const(reservedStackSize), OperandType.I64);
+ }
+
+ mask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.IntUsedRegisters;
+
+ while (mask != 0)
+ {
+ int bit = BitUtils.HighestBitSet(mask);
+
+ context.Assembler.Pop(Register((X86Register)bit));
+
+ mask &= ~(1 << bit);
+ }
+ }
+
+ private static void GenerateInlineStackProbe(CodeGenContext context, int size)
+ {
+ // Windows does lazy stack allocation, and there are just 2
+ // guard pages on the end of the stack. So, if the allocation
+ // size we make is greater than this guard size, we must ensure
+ // that the OS will map all pages that we'll use. We do that by
+ // doing a dummy read on those pages, forcing a page fault and
+ // the OS to map them. If they are already mapped, nothing happens.
+ const int pageMask = PageSize - 1;
+
+ size = (size + pageMask) & ~pageMask;
+
+ Operand rsp = Register(X86Register.Rsp);
+ Operand temp = Register(CallingConvention.GetIntReturnRegister());
+
+ for (int offset = PageSize; offset < size; offset += PageSize)
+ {
+ Operand memOp = MemoryOp(OperandType.I32, rsp, default, Multiplier.x1, -offset);
+
+ context.Assembler.Mov(temp, memOp, OperandType.I32);
+ }
+ }
+
+ private static Operand Memory(Operand operand, OperandType type)
+ {
+ if (operand.Kind == OperandKind.Memory)
+ {
+ return operand;
+ }
+
+ return MemoryOp(type, operand);
+ }
+
+ private static Operand Register(X86Register register, OperandType type = OperandType.I64)
+ {
+ return Operand.Factory.Register((int)register, RegisterType.Integer, type);
+ }
+
+ private static Operand Xmm(X86Register register)
+ {
+ return Operand.Factory.Register((int)register, RegisterType.Vector, OperandType.V128);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs b/src/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs
new file mode 100644
index 00000000..07cdcd09
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs
@@ -0,0 +1,144 @@
+using Ryujinx.Memory;
+using System;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics.X86;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ static class HardwareCapabilities
+ {
+ private delegate uint GetXcr0();
+
+ static HardwareCapabilities()
+ {
+ if (!X86Base.IsSupported)
+ {
+ return;
+ }
+
+ (int maxNum, _, _, _) = X86Base.CpuId(0x00000000, 0x00000000);
+
+ (_, _, int ecx1, int edx1) = X86Base.CpuId(0x00000001, 0x00000000);
+ FeatureInfo1Edx = (FeatureFlags1Edx)edx1;
+ FeatureInfo1Ecx = (FeatureFlags1Ecx)ecx1;
+
+ if (maxNum >= 7)
+ {
+ (_, int ebx7, int ecx7, _) = X86Base.CpuId(0x00000007, 0x00000000);
+ FeatureInfo7Ebx = (FeatureFlags7Ebx)ebx7;
+ FeatureInfo7Ecx = (FeatureFlags7Ecx)ecx7;
+ }
+
+ Xcr0InfoEax = (Xcr0FlagsEax)GetXcr0Eax();
+ }
+
+ private static uint GetXcr0Eax()
+ {
+ if (!FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Xsave))
+ {
+ // XSAVE feature required for xgetbv
+ return 0;
+ }
+
+ ReadOnlySpan<byte> asmGetXcr0 = new byte[]
+ {
+ 0x31, 0xc9, // xor ecx, ecx
+ 0xf, 0x01, 0xd0, // xgetbv
+ 0xc3, // ret
+ };
+
+ using MemoryBlock memGetXcr0 = new MemoryBlock((ulong)asmGetXcr0.Length);
+
+ memGetXcr0.Write(0, asmGetXcr0);
+
+ memGetXcr0.Reprotect(0, (ulong)asmGetXcr0.Length, MemoryPermission.ReadAndExecute);
+
+ var fGetXcr0 = Marshal.GetDelegateForFunctionPointer<GetXcr0>(memGetXcr0.Pointer);
+
+ return fGetXcr0();
+ }
+
+ [Flags]
+ public enum FeatureFlags1Edx
+ {
+ Sse = 1 << 25,
+ Sse2 = 1 << 26
+ }
+
+ [Flags]
+ public enum FeatureFlags1Ecx
+ {
+ Sse3 = 1 << 0,
+ Pclmulqdq = 1 << 1,
+ Ssse3 = 1 << 9,
+ Fma = 1 << 12,
+ Sse41 = 1 << 19,
+ Sse42 = 1 << 20,
+ Popcnt = 1 << 23,
+ Aes = 1 << 25,
+ Xsave = 1 << 26,
+ Osxsave = 1 << 27,
+ Avx = 1 << 28,
+ F16c = 1 << 29
+ }
+
+ [Flags]
+ public enum FeatureFlags7Ebx
+ {
+ Avx2 = 1 << 5,
+ Avx512f = 1 << 16,
+ Avx512dq = 1 << 17,
+ Sha = 1 << 29,
+ Avx512bw = 1 << 30,
+ Avx512vl = 1 << 31
+ }
+
+ [Flags]
+ public enum FeatureFlags7Ecx
+ {
+ Gfni = 1 << 8,
+ }
+
+ [Flags]
+ public enum Xcr0FlagsEax
+ {
+ Sse = 1 << 1,
+ YmmHi128 = 1 << 2,
+ Opmask = 1 << 5,
+ ZmmHi256 = 1 << 6,
+ Hi16Zmm = 1 << 7
+ }
+
+ public static FeatureFlags1Edx FeatureInfo1Edx { get; }
+ public static FeatureFlags1Ecx FeatureInfo1Ecx { get; }
+ public static FeatureFlags7Ebx FeatureInfo7Ebx { get; } = 0;
+ public static FeatureFlags7Ecx FeatureInfo7Ecx { get; } = 0;
+ public static Xcr0FlagsEax Xcr0InfoEax { get; } = 0;
+
+ public static bool SupportsSse => FeatureInfo1Edx.HasFlag(FeatureFlags1Edx.Sse);
+ public static bool SupportsSse2 => FeatureInfo1Edx.HasFlag(FeatureFlags1Edx.Sse2);
+ public static bool SupportsSse3 => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Sse3);
+ public static bool SupportsPclmulqdq => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Pclmulqdq);
+ public static bool SupportsSsse3 => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Ssse3);
+ public static bool SupportsFma => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Fma);
+ public static bool SupportsSse41 => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Sse41);
+ public static bool SupportsSse42 => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Sse42);
+ public static bool SupportsPopcnt => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Popcnt);
+ public static bool SupportsAesni => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Aes);
+ public static bool SupportsAvx => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Avx | FeatureFlags1Ecx.Xsave | FeatureFlags1Ecx.Osxsave) && Xcr0InfoEax.HasFlag(Xcr0FlagsEax.Sse | Xcr0FlagsEax.YmmHi128);
+ public static bool SupportsAvx2 => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx2) && SupportsAvx;
+ public static bool SupportsAvx512F => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512f) && FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Xsave | FeatureFlags1Ecx.Osxsave)
+ && Xcr0InfoEax.HasFlag(Xcr0FlagsEax.Sse | Xcr0FlagsEax.YmmHi128 | Xcr0FlagsEax.Opmask | Xcr0FlagsEax.ZmmHi256 | Xcr0FlagsEax.Hi16Zmm);
+ public static bool SupportsAvx512Vl => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512vl) && SupportsAvx512F;
+ public static bool SupportsAvx512Bw => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512bw) && SupportsAvx512F;
+ public static bool SupportsAvx512Dq => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512dq) && SupportsAvx512F;
+ public static bool SupportsF16c => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.F16c);
+ public static bool SupportsSha => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Sha);
+ public static bool SupportsGfni => FeatureInfo7Ecx.HasFlag(FeatureFlags7Ecx.Gfni);
+
+ public static bool ForceLegacySse { get; set; }
+
+ public static bool SupportsVexEncoding => SupportsAvx && !ForceLegacySse;
+ public static bool SupportsEvexEncoding => SupportsAvx512F && !ForceLegacySse;
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/X86/IntrinsicInfo.cs b/src/ARMeilleure/CodeGen/X86/IntrinsicInfo.cs
new file mode 100644
index 00000000..302bf4d3
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/IntrinsicInfo.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.CodeGen.X86
+{
+ readonly struct IntrinsicInfo
+ {
+ public X86Instruction Inst { get; }
+ public IntrinsicType Type { get; }
+
+ public IntrinsicInfo(X86Instruction inst, IntrinsicType type)
+ {
+ Inst = inst;
+ Type = type;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/X86/IntrinsicTable.cs b/src/ARMeilleure/CodeGen/X86/IntrinsicTable.cs
new file mode 100644
index 00000000..e3d94b7a
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/IntrinsicTable.cs
@@ -0,0 +1,200 @@
+using ARMeilleure.Common;
+using ARMeilleure.IntermediateRepresentation;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ static class IntrinsicTable
+ {
+ private static IntrinsicInfo[] _intrinTable;
+
+ static IntrinsicTable()
+ {
+ _intrinTable = new IntrinsicInfo[EnumUtils.GetCount(typeof(Intrinsic))];
+
+ Add(Intrinsic.X86Addpd, new IntrinsicInfo(X86Instruction.Addpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Addps, new IntrinsicInfo(X86Instruction.Addps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Addsd, new IntrinsicInfo(X86Instruction.Addsd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Addss, new IntrinsicInfo(X86Instruction.Addss, IntrinsicType.Binary));
+ Add(Intrinsic.X86Aesdec, new IntrinsicInfo(X86Instruction.Aesdec, IntrinsicType.Binary));
+ Add(Intrinsic.X86Aesdeclast, new IntrinsicInfo(X86Instruction.Aesdeclast, IntrinsicType.Binary));
+ Add(Intrinsic.X86Aesenc, new IntrinsicInfo(X86Instruction.Aesenc, IntrinsicType.Binary));
+ Add(Intrinsic.X86Aesenclast, new IntrinsicInfo(X86Instruction.Aesenclast, IntrinsicType.Binary));
+ Add(Intrinsic.X86Aesimc, new IntrinsicInfo(X86Instruction.Aesimc, IntrinsicType.Unary));
+ Add(Intrinsic.X86Andnpd, new IntrinsicInfo(X86Instruction.Andnpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Andnps, new IntrinsicInfo(X86Instruction.Andnps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Andpd, new IntrinsicInfo(X86Instruction.Andpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Andps, new IntrinsicInfo(X86Instruction.Andps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Blendvpd, new IntrinsicInfo(X86Instruction.Blendvpd, IntrinsicType.Ternary));
+ Add(Intrinsic.X86Blendvps, new IntrinsicInfo(X86Instruction.Blendvps, IntrinsicType.Ternary));
+ Add(Intrinsic.X86Cmppd, new IntrinsicInfo(X86Instruction.Cmppd, IntrinsicType.TernaryImm));
+ Add(Intrinsic.X86Cmpps, new IntrinsicInfo(X86Instruction.Cmpps, IntrinsicType.TernaryImm));
+ Add(Intrinsic.X86Cmpsd, new IntrinsicInfo(X86Instruction.Cmpsd, IntrinsicType.TernaryImm));
+ Add(Intrinsic.X86Cmpss, new IntrinsicInfo(X86Instruction.Cmpss, IntrinsicType.TernaryImm));
+ Add(Intrinsic.X86Comisdeq, new IntrinsicInfo(X86Instruction.Comisd, IntrinsicType.Comis_));
+ Add(Intrinsic.X86Comisdge, new IntrinsicInfo(X86Instruction.Comisd, IntrinsicType.Comis_));
+ Add(Intrinsic.X86Comisdlt, new IntrinsicInfo(X86Instruction.Comisd, IntrinsicType.Comis_));
+ Add(Intrinsic.X86Comisseq, new IntrinsicInfo(X86Instruction.Comiss, IntrinsicType.Comis_));
+ Add(Intrinsic.X86Comissge, new IntrinsicInfo(X86Instruction.Comiss, IntrinsicType.Comis_));
+ Add(Intrinsic.X86Comisslt, new IntrinsicInfo(X86Instruction.Comiss, IntrinsicType.Comis_));
+ Add(Intrinsic.X86Crc32, new IntrinsicInfo(X86Instruction.Crc32, IntrinsicType.Crc32));
+ Add(Intrinsic.X86Crc32_16, new IntrinsicInfo(X86Instruction.Crc32_16, IntrinsicType.Crc32));
+ Add(Intrinsic.X86Crc32_8, new IntrinsicInfo(X86Instruction.Crc32_8, IntrinsicType.Crc32));
+ Add(Intrinsic.X86Cvtdq2pd, new IntrinsicInfo(X86Instruction.Cvtdq2pd, IntrinsicType.Unary));
+ Add(Intrinsic.X86Cvtdq2ps, new IntrinsicInfo(X86Instruction.Cvtdq2ps, IntrinsicType.Unary));
+ Add(Intrinsic.X86Cvtpd2dq, new IntrinsicInfo(X86Instruction.Cvtpd2dq, IntrinsicType.Unary));
+ Add(Intrinsic.X86Cvtpd2ps, new IntrinsicInfo(X86Instruction.Cvtpd2ps, IntrinsicType.Unary));
+ Add(Intrinsic.X86Cvtps2dq, new IntrinsicInfo(X86Instruction.Cvtps2dq, IntrinsicType.Unary));
+ Add(Intrinsic.X86Cvtps2pd, new IntrinsicInfo(X86Instruction.Cvtps2pd, IntrinsicType.Unary));
+ Add(Intrinsic.X86Cvtsd2si, new IntrinsicInfo(X86Instruction.Cvtsd2si, IntrinsicType.UnaryToGpr));
+ Add(Intrinsic.X86Cvtsd2ss, new IntrinsicInfo(X86Instruction.Cvtsd2ss, IntrinsicType.Binary));
+ Add(Intrinsic.X86Cvtsi2sd, new IntrinsicInfo(X86Instruction.Cvtsi2sd, IntrinsicType.BinaryGpr));
+ Add(Intrinsic.X86Cvtsi2si, new IntrinsicInfo(X86Instruction.Movd, IntrinsicType.UnaryToGpr));
+ Add(Intrinsic.X86Cvtsi2ss, new IntrinsicInfo(X86Instruction.Cvtsi2ss, IntrinsicType.BinaryGpr));
+ Add(Intrinsic.X86Cvtss2sd, new IntrinsicInfo(X86Instruction.Cvtss2sd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Cvtss2si, new IntrinsicInfo(X86Instruction.Cvtss2si, IntrinsicType.UnaryToGpr));
+ Add(Intrinsic.X86Divpd, new IntrinsicInfo(X86Instruction.Divpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Divps, new IntrinsicInfo(X86Instruction.Divps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Divsd, new IntrinsicInfo(X86Instruction.Divsd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Divss, new IntrinsicInfo(X86Instruction.Divss, IntrinsicType.Binary));
+ Add(Intrinsic.X86Gf2p8affineqb, new IntrinsicInfo(X86Instruction.Gf2p8affineqb, IntrinsicType.TernaryImm));
+ Add(Intrinsic.X86Haddpd, new IntrinsicInfo(X86Instruction.Haddpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Haddps, new IntrinsicInfo(X86Instruction.Haddps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Insertps, new IntrinsicInfo(X86Instruction.Insertps, IntrinsicType.TernaryImm));
+ Add(Intrinsic.X86Ldmxcsr, new IntrinsicInfo(X86Instruction.None, IntrinsicType.Mxcsr));
+ Add(Intrinsic.X86Maxpd, new IntrinsicInfo(X86Instruction.Maxpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Maxps, new IntrinsicInfo(X86Instruction.Maxps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Maxsd, new IntrinsicInfo(X86Instruction.Maxsd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Maxss, new IntrinsicInfo(X86Instruction.Maxss, IntrinsicType.Binary));
+ Add(Intrinsic.X86Minpd, new IntrinsicInfo(X86Instruction.Minpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Minps, new IntrinsicInfo(X86Instruction.Minps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Minsd, new IntrinsicInfo(X86Instruction.Minsd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Minss, new IntrinsicInfo(X86Instruction.Minss, IntrinsicType.Binary));
+ Add(Intrinsic.X86Movhlps, new IntrinsicInfo(X86Instruction.Movhlps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Movlhps, new IntrinsicInfo(X86Instruction.Movlhps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Movss, new IntrinsicInfo(X86Instruction.Movss, IntrinsicType.Binary));
+ Add(Intrinsic.X86Mulpd, new IntrinsicInfo(X86Instruction.Mulpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Mulps, new IntrinsicInfo(X86Instruction.Mulps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Mulsd, new IntrinsicInfo(X86Instruction.Mulsd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Mulss, new IntrinsicInfo(X86Instruction.Mulss, IntrinsicType.Binary));
+ Add(Intrinsic.X86Paddb, new IntrinsicInfo(X86Instruction.Paddb, IntrinsicType.Binary));
+ Add(Intrinsic.X86Paddd, new IntrinsicInfo(X86Instruction.Paddd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Paddq, new IntrinsicInfo(X86Instruction.Paddq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Paddw, new IntrinsicInfo(X86Instruction.Paddw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Palignr, new IntrinsicInfo(X86Instruction.Palignr, IntrinsicType.TernaryImm));
+ Add(Intrinsic.X86Pand, new IntrinsicInfo(X86Instruction.Pand, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pandn, new IntrinsicInfo(X86Instruction.Pandn, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pavgb, new IntrinsicInfo(X86Instruction.Pavgb, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pavgw, new IntrinsicInfo(X86Instruction.Pavgw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pblendvb, new IntrinsicInfo(X86Instruction.Pblendvb, IntrinsicType.Ternary));
+ Add(Intrinsic.X86Pclmulqdq, new IntrinsicInfo(X86Instruction.Pclmulqdq, IntrinsicType.TernaryImm));
+ Add(Intrinsic.X86Pcmpeqb, new IntrinsicInfo(X86Instruction.Pcmpeqb, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pcmpeqd, new IntrinsicInfo(X86Instruction.Pcmpeqd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pcmpeqq, new IntrinsicInfo(X86Instruction.Pcmpeqq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pcmpeqw, new IntrinsicInfo(X86Instruction.Pcmpeqw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pcmpgtb, new IntrinsicInfo(X86Instruction.Pcmpgtb, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pcmpgtd, new IntrinsicInfo(X86Instruction.Pcmpgtd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pcmpgtq, new IntrinsicInfo(X86Instruction.Pcmpgtq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pcmpgtw, new IntrinsicInfo(X86Instruction.Pcmpgtw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pmaxsb, new IntrinsicInfo(X86Instruction.Pmaxsb, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pmaxsd, new IntrinsicInfo(X86Instruction.Pmaxsd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pmaxsw, new IntrinsicInfo(X86Instruction.Pmaxsw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pmaxub, new IntrinsicInfo(X86Instruction.Pmaxub, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pmaxud, new IntrinsicInfo(X86Instruction.Pmaxud, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pmaxuw, new IntrinsicInfo(X86Instruction.Pmaxuw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pminsb, new IntrinsicInfo(X86Instruction.Pminsb, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pminsd, new IntrinsicInfo(X86Instruction.Pminsd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pminsw, new IntrinsicInfo(X86Instruction.Pminsw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pminub, new IntrinsicInfo(X86Instruction.Pminub, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pminud, new IntrinsicInfo(X86Instruction.Pminud, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pminuw, new IntrinsicInfo(X86Instruction.Pminuw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pmovsxbw, new IntrinsicInfo(X86Instruction.Pmovsxbw, IntrinsicType.Unary));
+ Add(Intrinsic.X86Pmovsxdq, new IntrinsicInfo(X86Instruction.Pmovsxdq, IntrinsicType.Unary));
+ Add(Intrinsic.X86Pmovsxwd, new IntrinsicInfo(X86Instruction.Pmovsxwd, IntrinsicType.Unary));
+ Add(Intrinsic.X86Pmovzxbw, new IntrinsicInfo(X86Instruction.Pmovzxbw, IntrinsicType.Unary));
+ Add(Intrinsic.X86Pmovzxdq, new IntrinsicInfo(X86Instruction.Pmovzxdq, IntrinsicType.Unary));
+ Add(Intrinsic.X86Pmovzxwd, new IntrinsicInfo(X86Instruction.Pmovzxwd, IntrinsicType.Unary));
+ Add(Intrinsic.X86Pmulld, new IntrinsicInfo(X86Instruction.Pmulld, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pmullw, new IntrinsicInfo(X86Instruction.Pmullw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Popcnt, new IntrinsicInfo(X86Instruction.Popcnt, IntrinsicType.PopCount));
+ Add(Intrinsic.X86Por, new IntrinsicInfo(X86Instruction.Por, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pshufb, new IntrinsicInfo(X86Instruction.Pshufb, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pshufd, new IntrinsicInfo(X86Instruction.Pshufd, IntrinsicType.BinaryImm));
+ Add(Intrinsic.X86Pslld, new IntrinsicInfo(X86Instruction.Pslld, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pslldq, new IntrinsicInfo(X86Instruction.Pslldq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psllq, new IntrinsicInfo(X86Instruction.Psllq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psllw, new IntrinsicInfo(X86Instruction.Psllw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psrad, new IntrinsicInfo(X86Instruction.Psrad, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psraw, new IntrinsicInfo(X86Instruction.Psraw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psrld, new IntrinsicInfo(X86Instruction.Psrld, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psrlq, new IntrinsicInfo(X86Instruction.Psrlq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psrldq, new IntrinsicInfo(X86Instruction.Psrldq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psrlw, new IntrinsicInfo(X86Instruction.Psrlw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psubb, new IntrinsicInfo(X86Instruction.Psubb, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psubd, new IntrinsicInfo(X86Instruction.Psubd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psubq, new IntrinsicInfo(X86Instruction.Psubq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psubw, new IntrinsicInfo(X86Instruction.Psubw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Punpckhbw, new IntrinsicInfo(X86Instruction.Punpckhbw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Punpckhdq, new IntrinsicInfo(X86Instruction.Punpckhdq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Punpckhqdq, new IntrinsicInfo(X86Instruction.Punpckhqdq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Punpckhwd, new IntrinsicInfo(X86Instruction.Punpckhwd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Punpcklbw, new IntrinsicInfo(X86Instruction.Punpcklbw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Punpckldq, new IntrinsicInfo(X86Instruction.Punpckldq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Punpcklqdq, new IntrinsicInfo(X86Instruction.Punpcklqdq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Punpcklwd, new IntrinsicInfo(X86Instruction.Punpcklwd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pxor, new IntrinsicInfo(X86Instruction.Pxor, IntrinsicType.Binary));
+ Add(Intrinsic.X86Rcpps, new IntrinsicInfo(X86Instruction.Rcpps, IntrinsicType.Unary));
+ Add(Intrinsic.X86Rcpss, new IntrinsicInfo(X86Instruction.Rcpss, IntrinsicType.Unary));
+ Add(Intrinsic.X86Roundpd, new IntrinsicInfo(X86Instruction.Roundpd, IntrinsicType.BinaryImm));
+ Add(Intrinsic.X86Roundps, new IntrinsicInfo(X86Instruction.Roundps, IntrinsicType.BinaryImm));
+ Add(Intrinsic.X86Roundsd, new IntrinsicInfo(X86Instruction.Roundsd, IntrinsicType.BinaryImm));
+ Add(Intrinsic.X86Roundss, new IntrinsicInfo(X86Instruction.Roundss, IntrinsicType.BinaryImm));
+ Add(Intrinsic.X86Rsqrtps, new IntrinsicInfo(X86Instruction.Rsqrtps, IntrinsicType.Unary));
+ Add(Intrinsic.X86Rsqrtss, new IntrinsicInfo(X86Instruction.Rsqrtss, IntrinsicType.Unary));
+ Add(Intrinsic.X86Sha256Msg1, new IntrinsicInfo(X86Instruction.Sha256Msg1, IntrinsicType.Binary));
+ Add(Intrinsic.X86Sha256Msg2, new IntrinsicInfo(X86Instruction.Sha256Msg2, IntrinsicType.Binary));
+ Add(Intrinsic.X86Sha256Rnds2, new IntrinsicInfo(X86Instruction.Sha256Rnds2, IntrinsicType.Ternary));
+ Add(Intrinsic.X86Shufpd, new IntrinsicInfo(X86Instruction.Shufpd, IntrinsicType.TernaryImm));
+ Add(Intrinsic.X86Shufps, new IntrinsicInfo(X86Instruction.Shufps, IntrinsicType.TernaryImm));
+ Add(Intrinsic.X86Sqrtpd, new IntrinsicInfo(X86Instruction.Sqrtpd, IntrinsicType.Unary));
+ Add(Intrinsic.X86Sqrtps, new IntrinsicInfo(X86Instruction.Sqrtps, IntrinsicType.Unary));
+ Add(Intrinsic.X86Sqrtsd, new IntrinsicInfo(X86Instruction.Sqrtsd, IntrinsicType.Unary));
+ Add(Intrinsic.X86Sqrtss, new IntrinsicInfo(X86Instruction.Sqrtss, IntrinsicType.Unary));
+ Add(Intrinsic.X86Stmxcsr, new IntrinsicInfo(X86Instruction.None, IntrinsicType.Mxcsr));
+ Add(Intrinsic.X86Subpd, new IntrinsicInfo(X86Instruction.Subpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Subps, new IntrinsicInfo(X86Instruction.Subps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Subsd, new IntrinsicInfo(X86Instruction.Subsd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Subss, new IntrinsicInfo(X86Instruction.Subss, IntrinsicType.Binary));
+ Add(Intrinsic.X86Unpckhpd, new IntrinsicInfo(X86Instruction.Unpckhpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Unpckhps, new IntrinsicInfo(X86Instruction.Unpckhps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Unpcklpd, new IntrinsicInfo(X86Instruction.Unpcklpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Unpcklps, new IntrinsicInfo(X86Instruction.Unpcklps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Vcvtph2ps, new IntrinsicInfo(X86Instruction.Vcvtph2ps, IntrinsicType.Unary));
+ Add(Intrinsic.X86Vcvtps2ph, new IntrinsicInfo(X86Instruction.Vcvtps2ph, IntrinsicType.BinaryImm));
+ Add(Intrinsic.X86Vfmadd231pd, new IntrinsicInfo(X86Instruction.Vfmadd231pd, IntrinsicType.Fma));
+ Add(Intrinsic.X86Vfmadd231ps, new IntrinsicInfo(X86Instruction.Vfmadd231ps, IntrinsicType.Fma));
+ Add(Intrinsic.X86Vfmadd231sd, new IntrinsicInfo(X86Instruction.Vfmadd231sd, IntrinsicType.Fma));
+ Add(Intrinsic.X86Vfmadd231ss, new IntrinsicInfo(X86Instruction.Vfmadd231ss, IntrinsicType.Fma));
+ Add(Intrinsic.X86Vfmsub231sd, new IntrinsicInfo(X86Instruction.Vfmsub231sd, IntrinsicType.Fma));
+ Add(Intrinsic.X86Vfmsub231ss, new IntrinsicInfo(X86Instruction.Vfmsub231ss, IntrinsicType.Fma));
+ Add(Intrinsic.X86Vfnmadd231pd, new IntrinsicInfo(X86Instruction.Vfnmadd231pd, IntrinsicType.Fma));
+ Add(Intrinsic.X86Vfnmadd231ps, new IntrinsicInfo(X86Instruction.Vfnmadd231ps, IntrinsicType.Fma));
+ Add(Intrinsic.X86Vfnmadd231sd, new IntrinsicInfo(X86Instruction.Vfnmadd231sd, IntrinsicType.Fma));
+ Add(Intrinsic.X86Vfnmadd231ss, new IntrinsicInfo(X86Instruction.Vfnmadd231ss, IntrinsicType.Fma));
+ Add(Intrinsic.X86Vfnmsub231sd, new IntrinsicInfo(X86Instruction.Vfnmsub231sd, IntrinsicType.Fma));
+ Add(Intrinsic.X86Vfnmsub231ss, new IntrinsicInfo(X86Instruction.Vfnmsub231ss, IntrinsicType.Fma));
+ Add(Intrinsic.X86Vpternlogd, new IntrinsicInfo(X86Instruction.Vpternlogd, IntrinsicType.TernaryImm));
+ Add(Intrinsic.X86Xorpd, new IntrinsicInfo(X86Instruction.Xorpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Xorps, new IntrinsicInfo(X86Instruction.Xorps, IntrinsicType.Binary));
+ }
+
+ private static void Add(Intrinsic intrin, IntrinsicInfo info)
+ {
+ _intrinTable[(int)intrin] = info;
+ }
+
+ public static IntrinsicInfo GetInfo(Intrinsic intrin)
+ {
+ return _intrinTable[(int)intrin];
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/X86/IntrinsicType.cs b/src/ARMeilleure/CodeGen/X86/IntrinsicType.cs
new file mode 100644
index 00000000..5a9c14af
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/IntrinsicType.cs
@@ -0,0 +1,18 @@
+namespace ARMeilleure.CodeGen.X86
+{
+ enum IntrinsicType
+ {
+ Comis_,
+ Mxcsr,
+ PopCount,
+ Unary,
+ UnaryToGpr,
+ Binary,
+ BinaryGpr,
+ BinaryImm,
+ Crc32,
+ Ternary,
+ TernaryImm,
+ Fma
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/X86/Mxcsr.cs b/src/ARMeilleure/CodeGen/X86/Mxcsr.cs
new file mode 100644
index 00000000..c61eac31
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/Mxcsr.cs
@@ -0,0 +1,15 @@
+using System;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ [Flags]
+ enum Mxcsr
+ {
+ Ftz = 1 << 15, // Flush To Zero.
+ Rhi = 1 << 14, // Round Mode high bit.
+ Rlo = 1 << 13, // Round Mode low bit.
+ Um = 1 << 11, // Underflow Mask.
+ Dm = 1 << 8, // Denormal Mask.
+ Daz = 1 << 6 // Denormals Are Zero.
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/X86/PreAllocator.cs b/src/ARMeilleure/CodeGen/X86/PreAllocator.cs
new file mode 100644
index 00000000..cb742d67
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/PreAllocator.cs
@@ -0,0 +1,796 @@
+using ARMeilleure.CodeGen.RegisterAllocators;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+using static ARMeilleure.IntermediateRepresentation.Operation.Factory;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ class PreAllocator
+ {
+ public static void RunPass(CompilerContext cctx, StackAllocator stackAlloc, out int maxCallArgs)
+ {
+ maxCallArgs = -1;
+
+ Span<Operation> buffer = default;
+
+ CallConvName callConv = CallingConvention.GetCurrentCallConv();
+
+ Operand[] preservedArgs = new Operand[CallingConvention.GetArgumentsOnRegsCount()];
+
+ for (BasicBlock block = cctx.Cfg.Blocks.First; block != null; block = block.ListNext)
+ {
+ Operation nextNode;
+
+ for (Operation node = block.Operations.First; node != default; node = nextNode)
+ {
+ nextNode = node.ListNext;
+
+ if (node.Instruction == Instruction.Phi)
+ {
+ continue;
+ }
+
+ InsertConstantRegCopies(block.Operations, node);
+ InsertDestructiveRegCopies(block.Operations, node);
+ InsertConstrainedRegCopies(block.Operations, node);
+
+ switch (node.Instruction)
+ {
+ case Instruction.Call:
+ // Get the maximum number of arguments used on a call.
+ // On windows, when a struct is returned from the call,
+ // we also need to pass the pointer where the struct
+ // should be written on the first argument.
+ int argsCount = node.SourcesCount - 1;
+
+ if (node.Destination != default && node.Destination.Type == OperandType.V128)
+ {
+ argsCount++;
+ }
+
+ if (maxCallArgs < argsCount)
+ {
+ maxCallArgs = argsCount;
+ }
+
+ // Copy values to registers expected by the function
+ // being called, as mandated by the ABI.
+ if (callConv == CallConvName.Windows)
+ {
+ PreAllocatorWindows.InsertCallCopies(block.Operations, stackAlloc, node);
+ }
+ else /* if (callConv == CallConvName.SystemV) */
+ {
+ PreAllocatorSystemV.InsertCallCopies(block.Operations, node);
+ }
+ break;
+
+ case Instruction.ConvertToFPUI:
+ GenerateConvertToFPUI(block.Operations, node);
+ break;
+
+ case Instruction.LoadArgument:
+ if (callConv == CallConvName.Windows)
+ {
+ nextNode = PreAllocatorWindows.InsertLoadArgumentCopy(cctx, ref buffer, block.Operations, preservedArgs, node);
+ }
+ else /* if (callConv == CallConvName.SystemV) */
+ {
+ nextNode = PreAllocatorSystemV.InsertLoadArgumentCopy(cctx, ref buffer, block.Operations, preservedArgs, node);
+ }
+ break;
+
+ case Instruction.Negate:
+ if (!node.GetSource(0).Type.IsInteger())
+ {
+ GenerateNegate(block.Operations, node);
+ }
+ break;
+
+ case Instruction.Return:
+ if (callConv == CallConvName.Windows)
+ {
+ PreAllocatorWindows.InsertReturnCopy(cctx, block.Operations, preservedArgs, node);
+ }
+ else /* if (callConv == CallConvName.SystemV) */
+ {
+ PreAllocatorSystemV.InsertReturnCopy(block.Operations, node);
+ }
+ break;
+
+ case Instruction.Tailcall:
+ if (callConv == CallConvName.Windows)
+ {
+ PreAllocatorWindows.InsertTailcallCopies(block.Operations, stackAlloc, node);
+ }
+ else
+ {
+ PreAllocatorSystemV.InsertTailcallCopies(block.Operations, stackAlloc, node);
+ }
+ break;
+
+ case Instruction.VectorInsert8:
+ if (!HardwareCapabilities.SupportsSse41)
+ {
+ GenerateVectorInsert8(block.Operations, node);
+ }
+ break;
+
+ case Instruction.Extended:
+ if (node.Intrinsic == Intrinsic.X86Ldmxcsr)
+ {
+ int stackOffset = stackAlloc.Allocate(OperandType.I32);
+
+ node.SetSources(new Operand[] { Const(stackOffset), node.GetSource(0) });
+ }
+ else if (node.Intrinsic == Intrinsic.X86Stmxcsr)
+ {
+ int stackOffset = stackAlloc.Allocate(OperandType.I32);
+
+ node.SetSources(new Operand[] { Const(stackOffset) });
+ }
+ break;
+ }
+ }
+ }
+ }
+
+ protected static void InsertConstantRegCopies(IntrusiveList<Operation> nodes, Operation node)
+ {
+ if (node.SourcesCount == 0 || IsXmmIntrinsic(node))
+ {
+ return;
+ }
+
+ Instruction inst = node.Instruction;
+
+ Operand src1 = node.GetSource(0);
+ Operand src2;
+
+ if (src1.Kind == OperandKind.Constant)
+ {
+ if (!src1.Type.IsInteger())
+ {
+ // Handle non-integer types (FP32, FP64 and V128).
+ // For instructions without an immediate operand, we do the following:
+ // - Insert a copy with the constant value (as integer) to a GPR.
+ // - Insert a copy from the GPR to a XMM register.
+ // - Replace the constant use with the XMM register.
+ src1 = AddXmmCopy(nodes, node, src1);
+
+ node.SetSource(0, src1);
+ }
+ else if (!HasConstSrc1(inst))
+ {
+ // Handle integer types.
+ // Most ALU instructions accepts a 32-bits immediate on the second operand.
+ // We need to ensure the following:
+ // - If the constant is on operand 1, we need to move it.
+ // -- But first, we try to swap operand 1 and 2 if the instruction is commutative.
+ // -- Doing so may allow us to encode the constant as operand 2 and avoid a copy.
+ // - If the constant is on operand 2, we check if the instruction supports it,
+ // if not, we also add a copy. 64-bits constants are usually not supported.
+ if (IsCommutative(node))
+ {
+ src2 = node.GetSource(1);
+
+ Operand temp = src1;
+
+ src1 = src2;
+ src2 = temp;
+
+ node.SetSource(0, src1);
+ node.SetSource(1, src2);
+ }
+
+ if (src1.Kind == OperandKind.Constant)
+ {
+ src1 = AddCopy(nodes, node, src1);
+
+ node.SetSource(0, src1);
+ }
+ }
+ }
+
+ if (node.SourcesCount < 2)
+ {
+ return;
+ }
+
+ src2 = node.GetSource(1);
+
+ if (src2.Kind == OperandKind.Constant)
+ {
+ if (!src2.Type.IsInteger())
+ {
+ src2 = AddXmmCopy(nodes, node, src2);
+
+ node.SetSource(1, src2);
+ }
+ else if (!HasConstSrc2(inst) || CodeGenCommon.IsLongConst(src2))
+ {
+ src2 = AddCopy(nodes, node, src2);
+
+ node.SetSource(1, src2);
+ }
+ }
+ }
+
+ protected static void InsertConstrainedRegCopies(IntrusiveList<Operation> nodes, Operation node)
+ {
+ Operand dest = node.Destination;
+
+ switch (node.Instruction)
+ {
+ case Instruction.CompareAndSwap:
+ case Instruction.CompareAndSwap16:
+ case Instruction.CompareAndSwap8:
+ {
+ OperandType type = node.GetSource(1).Type;
+
+ if (type == OperandType.V128)
+ {
+ // Handle the many restrictions of the compare and exchange (16 bytes) instruction:
+ // - The expected value should be in RDX:RAX.
+ // - The new value to be written should be in RCX:RBX.
+ // - The value at the memory location is loaded to RDX:RAX.
+ void SplitOperand(Operand source, Operand lr, Operand hr)
+ {
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, lr, source, Const(0)));
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, hr, source, Const(1)));
+ }
+
+ Operand rax = Gpr(X86Register.Rax, OperandType.I64);
+ Operand rbx = Gpr(X86Register.Rbx, OperandType.I64);
+ Operand rcx = Gpr(X86Register.Rcx, OperandType.I64);
+ Operand rdx = Gpr(X86Register.Rdx, OperandType.I64);
+
+ SplitOperand(node.GetSource(1), rax, rdx);
+ SplitOperand(node.GetSource(2), rbx, rcx);
+
+ Operation operation = node;
+
+ node = nodes.AddAfter(node, Operation(Instruction.VectorCreateScalar, dest, rax));
+ nodes.AddAfter(node, Operation(Instruction.VectorInsert, dest, dest, rdx, Const(1)));
+
+ operation.SetDestinations(new Operand[] { rdx, rax });
+ operation.SetSources(new Operand[] { operation.GetSource(0), rdx, rax, rcx, rbx });
+ }
+ else
+ {
+ // Handle the many restrictions of the compare and exchange (32/64) instruction:
+ // - The expected value should be in (E/R)AX.
+ // - The value at the memory location is loaded to (E/R)AX.
+ Operand expected = node.GetSource(1);
+ Operand newValue = node.GetSource(2);
+
+ Operand rax = Gpr(X86Register.Rax, expected.Type);
+
+ nodes.AddBefore(node, Operation(Instruction.Copy, rax, expected));
+
+ // We need to store the new value into a temp, since it may
+ // be a constant, and this instruction does not support immediate operands.
+ Operand temp = Local(newValue.Type);
+
+ nodes.AddBefore(node, Operation(Instruction.Copy, temp, newValue));
+
+ node.SetSources(new Operand[] { node.GetSource(0), rax, temp });
+
+ nodes.AddAfter(node, Operation(Instruction.Copy, dest, rax));
+
+ node.Destination = rax;
+ }
+
+ break;
+ }
+
+ case Instruction.Divide:
+ case Instruction.DivideUI:
+ {
+ // Handle the many restrictions of the division instructions:
+ // - The dividend is always in RDX:RAX.
+ // - The result is always in RAX.
+ // - Additionally it also writes the remainder in RDX.
+ if (dest.Type.IsInteger())
+ {
+ Operand src1 = node.GetSource(0);
+
+ Operand rax = Gpr(X86Register.Rax, src1.Type);
+ Operand rdx = Gpr(X86Register.Rdx, src1.Type);
+
+ nodes.AddBefore(node, Operation(Instruction.Copy, rax, src1));
+ nodes.AddBefore(node, Operation(Instruction.Clobber, rdx));
+
+ nodes.AddAfter(node, Operation(Instruction.Copy, dest, rax));
+
+ node.SetSources(new Operand[] { rdx, rax, node.GetSource(1) });
+ node.Destination = rax;
+ }
+
+ break;
+ }
+
+ case Instruction.Extended:
+ {
+ bool isBlend = node.Intrinsic == Intrinsic.X86Blendvpd ||
+ node.Intrinsic == Intrinsic.X86Blendvps ||
+ node.Intrinsic == Intrinsic.X86Pblendvb;
+
+ // BLENDVPD, BLENDVPS, PBLENDVB last operand is always implied to be XMM0 when VEX is not supported.
+ // SHA256RNDS2 always has an implied XMM0 as a last operand.
+ if ((isBlend && !HardwareCapabilities.SupportsVexEncoding) || node.Intrinsic == Intrinsic.X86Sha256Rnds2)
+ {
+ Operand xmm0 = Xmm(X86Register.Xmm0, OperandType.V128);
+
+ nodes.AddBefore(node, Operation(Instruction.Copy, xmm0, node.GetSource(2)));
+
+ node.SetSource(2, xmm0);
+ }
+
+ break;
+ }
+
+ case Instruction.Multiply64HighSI:
+ case Instruction.Multiply64HighUI:
+ {
+ // Handle the many restrictions of the i64 * i64 = i128 multiply instructions:
+ // - The multiplicand is always in RAX.
+ // - The lower 64-bits of the result is always in RAX.
+ // - The higher 64-bits of the result is always in RDX.
+ Operand src1 = node.GetSource(0);
+
+ Operand rax = Gpr(X86Register.Rax, src1.Type);
+ Operand rdx = Gpr(X86Register.Rdx, src1.Type);
+
+ nodes.AddBefore(node, Operation(Instruction.Copy, rax, src1));
+
+ node.SetSource(0, rax);
+
+ nodes.AddAfter(node, Operation(Instruction.Copy, dest, rdx));
+
+ node.SetDestinations(new Operand[] { rdx, rax });
+
+ break;
+ }
+
+ case Instruction.RotateRight:
+ case Instruction.ShiftLeft:
+ case Instruction.ShiftRightSI:
+ case Instruction.ShiftRightUI:
+ {
+ // The shift register is always implied to be CL (low 8-bits of RCX or ECX).
+ if (node.GetSource(1).Kind == OperandKind.LocalVariable)
+ {
+ Operand rcx = Gpr(X86Register.Rcx, OperandType.I32);
+
+ nodes.AddBefore(node, Operation(Instruction.Copy, rcx, node.GetSource(1)));
+
+ node.SetSource(1, rcx);
+ }
+
+ break;
+ }
+ }
+ }
+
+ protected static void InsertDestructiveRegCopies(IntrusiveList<Operation> nodes, Operation node)
+ {
+ if (node.Destination == default || node.SourcesCount == 0)
+ {
+ return;
+ }
+
+ Instruction inst = node.Instruction;
+
+ Operand dest = node.Destination;
+ Operand src1 = node.GetSource(0);
+
+ // The multiply instruction (that maps to IMUL) is somewhat special, it has
+ // a three operand form where the second source is a immediate value.
+ bool threeOperandForm = inst == Instruction.Multiply && node.GetSource(1).Kind == OperandKind.Constant;
+
+ if (IsSameOperandDestSrc1(node) && src1.Kind == OperandKind.LocalVariable && !threeOperandForm)
+ {
+ bool useNewLocal = false;
+
+ for (int srcIndex = 1; srcIndex < node.SourcesCount; srcIndex++)
+ {
+ if (node.GetSource(srcIndex) == dest)
+ {
+ useNewLocal = true;
+
+ break;
+ }
+ }
+
+ if (useNewLocal)
+ {
+ // Dest is being used as some source already, we need to use a new
+ // local to store the temporary value, otherwise the value on dest
+ // local would be overwritten.
+ Operand temp = Local(dest.Type);
+
+ nodes.AddBefore(node, Operation(Instruction.Copy, temp, src1));
+
+ node.SetSource(0, temp);
+
+ nodes.AddAfter(node, Operation(Instruction.Copy, dest, temp));
+
+ node.Destination = temp;
+ }
+ else
+ {
+ nodes.AddBefore(node, Operation(Instruction.Copy, dest, src1));
+
+ node.SetSource(0, dest);
+ }
+ }
+ else if (inst == Instruction.ConditionalSelect)
+ {
+ Operand src2 = node.GetSource(1);
+ Operand src3 = node.GetSource(2);
+
+ if (src1 == dest || src2 == dest)
+ {
+ Operand temp = Local(dest.Type);
+
+ nodes.AddBefore(node, Operation(Instruction.Copy, temp, src3));
+
+ node.SetSource(2, temp);
+
+ nodes.AddAfter(node, Operation(Instruction.Copy, dest, temp));
+
+ node.Destination = temp;
+ }
+ else
+ {
+ nodes.AddBefore(node, Operation(Instruction.Copy, dest, src3));
+
+ node.SetSource(2, dest);
+ }
+ }
+ }
+
+ private static void GenerateConvertToFPUI(IntrusiveList<Operation> nodes, Operation node)
+ {
+ // Unsigned integer to FP conversions are not supported on X86.
+ // We need to turn them into signed integer to FP conversions, and
+ // adjust the final result.
+ Operand dest = node.Destination;
+ Operand source = node.GetSource(0);
+
+ Debug.Assert(source.Type.IsInteger(), $"Invalid source type \"{source.Type}\".");
+
+ Operation currentNode = node;
+
+ if (source.Type == OperandType.I32)
+ {
+ // For 32-bits integers, we can just zero-extend to 64-bits,
+ // and then use the 64-bits signed conversion instructions.
+ Operand zex = Local(OperandType.I64);
+
+ node = nodes.AddAfter(node, Operation(Instruction.ZeroExtend32, zex, source));
+ node = nodes.AddAfter(node, Operation(Instruction.ConvertToFP, dest, zex));
+ }
+ else /* if (source.Type == OperandType.I64) */
+ {
+ // For 64-bits integers, we need to do the following:
+ // - Ensure that the integer has the most significant bit clear.
+ // -- This can be done by shifting the value right by 1, that is, dividing by 2.
+ // -- The least significant bit is lost in this case though.
+ // - We can then convert the shifted value with a signed integer instruction.
+ // - The result still needs to be corrected after that.
+ // -- First, we need to multiply the result by 2, as we divided it by 2 before.
+ // --- This can be done efficiently by adding the result to itself.
+ // -- Then, we need to add the least significant bit that was shifted out.
+ // --- We can convert the least significant bit to float, and add it to the result.
+ Operand lsb = Local(OperandType.I64);
+ Operand half = Local(OperandType.I64);
+
+ Operand lsbF = Local(dest.Type);
+
+ node = nodes.AddAfter(node, Operation(Instruction.Copy, lsb, source));
+ node = nodes.AddAfter(node, Operation(Instruction.Copy, half, source));
+
+ node = nodes.AddAfter(node, Operation(Instruction.BitwiseAnd, lsb, lsb, Const(1L)));
+ node = nodes.AddAfter(node, Operation(Instruction.ShiftRightUI, half, half, Const(1)));
+
+ node = nodes.AddAfter(node, Operation(Instruction.ConvertToFP, lsbF, lsb));
+ node = nodes.AddAfter(node, Operation(Instruction.ConvertToFP, dest, half));
+
+ node = nodes.AddAfter(node, Operation(Instruction.Add, dest, dest, dest));
+ nodes.AddAfter(node, Operation(Instruction.Add, dest, dest, lsbF));
+ }
+
+ Delete(nodes, currentNode);
+ }
+
+ private static void GenerateNegate(IntrusiveList<Operation> nodes, Operation node)
+ {
+ // There's no SSE FP negate instruction, so we need to transform that into
+ // a XOR of the value to be negated with a mask with the highest bit set.
+ // This also produces -0 for a negation of the value 0.
+ Operand dest = node.Destination;
+ Operand source = node.GetSource(0);
+
+ Debug.Assert(dest.Type == OperandType.FP32 ||
+ dest.Type == OperandType.FP64, $"Invalid destination type \"{dest.Type}\".");
+
+ Operation currentNode = node;
+
+ Operand res = Local(dest.Type);
+
+ node = nodes.AddAfter(node, Operation(Instruction.VectorOne, res));
+
+ if (dest.Type == OperandType.FP32)
+ {
+ node = nodes.AddAfter(node, Operation(Intrinsic.X86Pslld, res, res, Const(31)));
+ }
+ else /* if (dest.Type == OperandType.FP64) */
+ {
+ node = nodes.AddAfter(node, Operation(Intrinsic.X86Psllq, res, res, Const(63)));
+ }
+
+ node = nodes.AddAfter(node, Operation(Intrinsic.X86Xorps, res, res, source));
+
+ nodes.AddAfter(node, Operation(Instruction.Copy, dest, res));
+
+ Delete(nodes, currentNode);
+ }
+
+ private static void GenerateVectorInsert8(IntrusiveList<Operation> nodes, Operation node)
+ {
+ // Handle vector insertion, when SSE 4.1 is not supported.
+ Operand dest = node.Destination;
+ Operand src1 = node.GetSource(0); // Vector
+ Operand src2 = node.GetSource(1); // Value
+ Operand src3 = node.GetSource(2); // Index
+
+ Debug.Assert(src3.Kind == OperandKind.Constant);
+
+ byte index = src3.AsByte();
+
+ Debug.Assert(index < 16);
+
+ Operation currentNode = node;
+
+ Operand temp1 = Local(OperandType.I32);
+ Operand temp2 = Local(OperandType.I32);
+
+ node = nodes.AddAfter(node, Operation(Instruction.Copy, temp2, src2));
+
+ Operation vextOp = Operation(Instruction.VectorExtract16, temp1, src1, Const(index >> 1));
+
+ node = nodes.AddAfter(node, vextOp);
+
+ if ((index & 1) != 0)
+ {
+ node = nodes.AddAfter(node, Operation(Instruction.ZeroExtend8, temp1, temp1));
+ node = nodes.AddAfter(node, Operation(Instruction.ShiftLeft, temp2, temp2, Const(8)));
+ node = nodes.AddAfter(node, Operation(Instruction.BitwiseOr, temp1, temp1, temp2));
+ }
+ else
+ {
+ node = nodes.AddAfter(node, Operation(Instruction.ZeroExtend8, temp2, temp2));
+ node = nodes.AddAfter(node, Operation(Instruction.BitwiseAnd, temp1, temp1, Const(0xff00)));
+ node = nodes.AddAfter(node, Operation(Instruction.BitwiseOr, temp1, temp1, temp2));
+ }
+
+ Operation vinsOp = Operation(Instruction.VectorInsert16, dest, src1, temp1, Const(index >> 1));
+
+ nodes.AddAfter(node, vinsOp);
+
+ Delete(nodes, currentNode);
+ }
+
+ protected static Operand AddXmmCopy(IntrusiveList<Operation> nodes, Operation node, Operand source)
+ {
+ Operand temp = Local(source.Type);
+ Operand intConst = AddCopy(nodes, node, GetIntConst(source));
+
+ Operation copyOp = Operation(Instruction.VectorCreateScalar, temp, intConst);
+
+ nodes.AddBefore(node, copyOp);
+
+ return temp;
+ }
+
+ protected static Operand AddCopy(IntrusiveList<Operation> nodes, Operation node, Operand source)
+ {
+ Operand temp = Local(source.Type);
+
+ Operation copyOp = Operation(Instruction.Copy, temp, source);
+
+ nodes.AddBefore(node, copyOp);
+
+ return temp;
+ }
+
+ private static Operand GetIntConst(Operand value)
+ {
+ if (value.Type == OperandType.FP32)
+ {
+ return Const(value.AsInt32());
+ }
+ else if (value.Type == OperandType.FP64)
+ {
+ return Const(value.AsInt64());
+ }
+
+ return value;
+ }
+
+ protected static void Delete(IntrusiveList<Operation> nodes, Operation node)
+ {
+ node.Destination = default;
+
+ for (int index = 0; index < node.SourcesCount; index++)
+ {
+ node.SetSource(index, default);
+ }
+
+ nodes.Remove(node);
+ }
+
+ protected static Operand Gpr(X86Register register, OperandType type)
+ {
+ return Register((int)register, RegisterType.Integer, type);
+ }
+
+ protected static Operand Xmm(X86Register register, OperandType type)
+ {
+ return Register((int)register, RegisterType.Vector, type);
+ }
+
+ private static bool IsSameOperandDestSrc1(Operation operation)
+ {
+ switch (operation.Instruction)
+ {
+ case Instruction.Add:
+ return !HardwareCapabilities.SupportsVexEncoding && !operation.Destination.Type.IsInteger();
+ case Instruction.Multiply:
+ case Instruction.Subtract:
+ return !HardwareCapabilities.SupportsVexEncoding || operation.Destination.Type.IsInteger();
+
+ case Instruction.BitwiseAnd:
+ case Instruction.BitwiseExclusiveOr:
+ case Instruction.BitwiseNot:
+ case Instruction.BitwiseOr:
+ case Instruction.ByteSwap:
+ case Instruction.Negate:
+ case Instruction.RotateRight:
+ case Instruction.ShiftLeft:
+ case Instruction.ShiftRightSI:
+ case Instruction.ShiftRightUI:
+ return true;
+
+ case Instruction.Divide:
+ return !HardwareCapabilities.SupportsVexEncoding && !operation.Destination.Type.IsInteger();
+
+ case Instruction.VectorInsert:
+ case Instruction.VectorInsert16:
+ case Instruction.VectorInsert8:
+ return !HardwareCapabilities.SupportsVexEncoding;
+
+ case Instruction.Extended:
+ return IsIntrinsicSameOperandDestSrc1(operation);
+ }
+
+ return IsVexSameOperandDestSrc1(operation);
+ }
+
+ private static bool IsIntrinsicSameOperandDestSrc1(Operation operation)
+ {
+ IntrinsicInfo info = IntrinsicTable.GetInfo(operation.Intrinsic);
+
+ return info.Type == IntrinsicType.Crc32 || info.Type == IntrinsicType.Fma || IsVexSameOperandDestSrc1(operation);
+ }
+
+ private static bool IsVexSameOperandDestSrc1(Operation operation)
+ {
+ if (IsIntrinsic(operation.Instruction))
+ {
+ IntrinsicInfo info = IntrinsicTable.GetInfo(operation.Intrinsic);
+
+ bool hasVex = HardwareCapabilities.SupportsVexEncoding && Assembler.SupportsVexPrefix(info.Inst);
+
+ bool isUnary = operation.SourcesCount < 2;
+
+ bool hasVecDest = operation.Destination != default && operation.Destination.Type == OperandType.V128;
+
+ return !hasVex && !isUnary && hasVecDest;
+ }
+
+ return false;
+ }
+
+ private static bool HasConstSrc1(Instruction inst)
+ {
+ switch (inst)
+ {
+ case Instruction.Copy:
+ case Instruction.LoadArgument:
+ case Instruction.Spill:
+ case Instruction.SpillArg:
+ return true;
+ }
+
+ return false;
+ }
+
+ private static bool HasConstSrc2(Instruction inst)
+ {
+ switch (inst)
+ {
+ case Instruction.Add:
+ case Instruction.BitwiseAnd:
+ case Instruction.BitwiseExclusiveOr:
+ case Instruction.BitwiseOr:
+ case Instruction.BranchIf:
+ case Instruction.Compare:
+ case Instruction.Multiply:
+ case Instruction.RotateRight:
+ case Instruction.ShiftLeft:
+ case Instruction.ShiftRightSI:
+ case Instruction.ShiftRightUI:
+ case Instruction.Store:
+ case Instruction.Store16:
+ case Instruction.Store8:
+ case Instruction.Subtract:
+ case Instruction.VectorExtract:
+ case Instruction.VectorExtract16:
+ case Instruction.VectorExtract8:
+ return true;
+ }
+
+ return false;
+ }
+
+ private static bool IsCommutative(Operation operation)
+ {
+ switch (operation.Instruction)
+ {
+ case Instruction.Add:
+ case Instruction.BitwiseAnd:
+ case Instruction.BitwiseExclusiveOr:
+ case Instruction.BitwiseOr:
+ case Instruction.Multiply:
+ return true;
+
+ case Instruction.BranchIf:
+ case Instruction.Compare:
+ {
+ Operand comp = operation.GetSource(2);
+
+ Debug.Assert(comp.Kind == OperandKind.Constant);
+
+ var compType = (Comparison)comp.AsInt32();
+
+ return compType == Comparison.Equal || compType == Comparison.NotEqual;
+ }
+ }
+
+ return false;
+ }
+
+ private static bool IsIntrinsic(Instruction inst)
+ {
+ return inst == Instruction.Extended;
+ }
+
+ private static bool IsXmmIntrinsic(Operation operation)
+ {
+ if (operation.Instruction != Instruction.Extended)
+ {
+ return false;
+ }
+
+ IntrinsicInfo info = IntrinsicTable.GetInfo(operation.Intrinsic);
+
+ return info.Type != IntrinsicType.Crc32;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/X86/PreAllocatorSystemV.cs b/src/ARMeilleure/CodeGen/X86/PreAllocatorSystemV.cs
new file mode 100644
index 00000000..a84d5050
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/PreAllocatorSystemV.cs
@@ -0,0 +1,334 @@
+using ARMeilleure.CodeGen.RegisterAllocators;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+using static ARMeilleure.IntermediateRepresentation.Operation.Factory;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ class PreAllocatorSystemV : PreAllocator
+ {
+ public static void InsertCallCopies(IntrusiveList<Operation> nodes, Operation node)
+ {
+ Operand dest = node.Destination;
+
+ List<Operand> sources = new List<Operand>
+ {
+ node.GetSource(0)
+ };
+
+ int argsCount = node.SourcesCount - 1;
+
+ int intMax = CallingConvention.GetIntArgumentsOnRegsCount();
+ int vecMax = CallingConvention.GetVecArgumentsOnRegsCount();
+
+ int intCount = 0;
+ int vecCount = 0;
+
+ int stackOffset = 0;
+
+ for (int index = 0; index < argsCount; index++)
+ {
+ Operand source = node.GetSource(index + 1);
+
+ bool passOnReg;
+
+ if (source.Type.IsInteger())
+ {
+ passOnReg = intCount < intMax;
+ }
+ else if (source.Type == OperandType.V128)
+ {
+ passOnReg = intCount + 1 < intMax;
+ }
+ else
+ {
+ passOnReg = vecCount < vecMax;
+ }
+
+ if (source.Type == OperandType.V128 && passOnReg)
+ {
+ // V128 is a struct, we pass each half on a GPR if possible.
+ Operand argReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
+ Operand argReg2 = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
+
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg, source, Const(0)));
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg2, source, Const(1)));
+
+ continue;
+ }
+
+ if (passOnReg)
+ {
+ Operand argReg = source.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntArgumentRegister(intCount++), source.Type)
+ : Xmm(CallingConvention.GetVecArgumentRegister(vecCount++), source.Type);
+
+ Operation copyOp = Operation(Instruction.Copy, argReg, source);
+
+ InsertConstantRegCopies(nodes, nodes.AddBefore(node, copyOp));
+
+ sources.Add(argReg);
+ }
+ else
+ {
+ Operand offset = Const(stackOffset);
+
+ Operation spillOp = Operation(Instruction.SpillArg, default, offset, source);
+
+ InsertConstantRegCopies(nodes, nodes.AddBefore(node, spillOp));
+
+ stackOffset += source.Type.GetSizeInBytes();
+ }
+ }
+
+ node.SetSources(sources.ToArray());
+
+ if (dest != default)
+ {
+ if (dest.Type == OperandType.V128)
+ {
+ Operand retLReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64);
+ Operand retHReg = Gpr(CallingConvention.GetIntReturnRegisterHigh(), OperandType.I64);
+
+ Operation operation = node;
+
+ node = nodes.AddAfter(node, Operation(Instruction.VectorCreateScalar, dest, retLReg));
+ nodes.AddAfter(node, Operation(Instruction.VectorInsert, dest, dest, retHReg, Const(1)));
+
+ operation.Destination = default;
+ }
+ else
+ {
+ Operand retReg = dest.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntReturnRegister(), dest.Type)
+ : Xmm(CallingConvention.GetVecReturnRegister(), dest.Type);
+
+ Operation copyOp = Operation(Instruction.Copy, dest, retReg);
+
+ nodes.AddAfter(node, copyOp);
+
+ node.Destination = retReg;
+ }
+ }
+ }
+
+ public static void InsertTailcallCopies(IntrusiveList<Operation> nodes, StackAllocator stackAlloc, Operation node)
+ {
+ List<Operand> sources = new List<Operand>
+ {
+ node.GetSource(0)
+ };
+
+ int argsCount = node.SourcesCount - 1;
+
+ int intMax = CallingConvention.GetIntArgumentsOnRegsCount();
+ int vecMax = CallingConvention.GetVecArgumentsOnRegsCount();
+
+ int intCount = 0;
+ int vecCount = 0;
+
+ // Handle arguments passed on registers.
+ for (int index = 0; index < argsCount; index++)
+ {
+ Operand source = node.GetSource(1 + index);
+
+ bool passOnReg;
+
+ if (source.Type.IsInteger())
+ {
+ passOnReg = intCount + 1 < intMax;
+ }
+ else
+ {
+ passOnReg = vecCount < vecMax;
+ }
+
+ if (source.Type == OperandType.V128 && passOnReg)
+ {
+ // V128 is a struct, we pass each half on a GPR if possible.
+ Operand argReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
+ Operand argReg2 = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
+
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg, source, Const(0)));
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg2, source, Const(1)));
+
+ continue;
+ }
+
+ if (passOnReg)
+ {
+ Operand argReg = source.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntArgumentRegister(intCount++), source.Type)
+ : Xmm(CallingConvention.GetVecArgumentRegister(vecCount++), source.Type);
+
+ Operation copyOp = Operation(Instruction.Copy, argReg, source);
+
+ InsertConstantRegCopies(nodes, nodes.AddBefore(node, copyOp));
+
+ sources.Add(argReg);
+ }
+ else
+ {
+ throw new NotImplementedException("Spilling is not currently supported for tail calls. (too many arguments)");
+ }
+ }
+
+ // The target address must be on the return registers, since we
+ // don't return anything and it is guaranteed to not be a
+ // callee saved register (which would be trashed on the epilogue).
+ Operand retReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64);
+
+ Operation addrCopyOp = Operation(Instruction.Copy, retReg, node.GetSource(0));
+
+ nodes.AddBefore(node, addrCopyOp);
+
+ sources[0] = retReg;
+
+ node.SetSources(sources.ToArray());
+ }
+
+ public static Operation InsertLoadArgumentCopy(
+ CompilerContext cctx,
+ ref Span<Operation> buffer,
+ IntrusiveList<Operation> nodes,
+ Operand[] preservedArgs,
+ Operation node)
+ {
+ Operand source = node.GetSource(0);
+
+ Debug.Assert(source.Kind == OperandKind.Constant, "Non-constant LoadArgument source kind.");
+
+ int index = source.AsInt32();
+
+ int intCount = 0;
+ int vecCount = 0;
+
+ for (int cIndex = 0; cIndex < index; cIndex++)
+ {
+ OperandType argType = cctx.FuncArgTypes[cIndex];
+
+ if (argType.IsInteger())
+ {
+ intCount++;
+ }
+ else if (argType == OperandType.V128)
+ {
+ intCount += 2;
+ }
+ else
+ {
+ vecCount++;
+ }
+ }
+
+ bool passOnReg;
+
+ if (source.Type.IsInteger())
+ {
+ passOnReg = intCount < CallingConvention.GetIntArgumentsOnRegsCount();
+ }
+ else if (source.Type == OperandType.V128)
+ {
+ passOnReg = intCount + 1 < CallingConvention.GetIntArgumentsOnRegsCount();
+ }
+ else
+ {
+ passOnReg = vecCount < CallingConvention.GetVecArgumentsOnRegsCount();
+ }
+
+ if (passOnReg)
+ {
+ Operand dest = node.Destination;
+
+ if (preservedArgs[index] == default)
+ {
+ if (dest.Type == OperandType.V128)
+ {
+ // V128 is a struct, we pass each half on a GPR if possible.
+ Operand pArg = Local(OperandType.V128);
+
+ Operand argLReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount), OperandType.I64);
+ Operand argHReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount + 1), OperandType.I64);
+
+ Operation copyL = Operation(Instruction.VectorCreateScalar, pArg, argLReg);
+ Operation copyH = Operation(Instruction.VectorInsert, pArg, pArg, argHReg, Const(1));
+
+ cctx.Cfg.Entry.Operations.AddFirst(copyH);
+ cctx.Cfg.Entry.Operations.AddFirst(copyL);
+
+ preservedArgs[index] = pArg;
+ }
+ else
+ {
+ Operand pArg = Local(dest.Type);
+
+ Operand argReg = dest.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntArgumentRegister(intCount), dest.Type)
+ : Xmm(CallingConvention.GetVecArgumentRegister(vecCount), dest.Type);
+
+ Operation copyOp = Operation(Instruction.Copy, pArg, argReg);
+
+ cctx.Cfg.Entry.Operations.AddFirst(copyOp);
+
+ preservedArgs[index] = pArg;
+ }
+ }
+
+ Operation nextNode;
+
+ if (dest.AssignmentsCount == 1)
+ {
+ // Let's propagate the argument if we can to avoid copies.
+ PreAllocatorCommon.Propagate(ref buffer, dest, preservedArgs[index]);
+ nextNode = node.ListNext;
+ }
+ else
+ {
+ Operation argCopyOp = Operation(Instruction.Copy, dest, preservedArgs[index]);
+ nextNode = nodes.AddBefore(node, argCopyOp);
+ }
+
+ Delete(nodes, node);
+ return nextNode;
+ }
+ else
+ {
+ // TODO: Pass on stack.
+ return node;
+ }
+ }
+
+ public static void InsertReturnCopy(IntrusiveList<Operation> nodes, Operation node)
+ {
+ if (node.SourcesCount == 0)
+ {
+ return;
+ }
+
+ Operand source = node.GetSource(0);
+
+ if (source.Type == OperandType.V128)
+ {
+ Operand retLReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64);
+ Operand retHReg = Gpr(CallingConvention.GetIntReturnRegisterHigh(), OperandType.I64);
+
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, retLReg, source, Const(0)));
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, retHReg, source, Const(1)));
+ }
+ else
+ {
+ Operand retReg = source.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntReturnRegister(), source.Type)
+ : Xmm(CallingConvention.GetVecReturnRegister(), source.Type);
+
+ Operation retCopyOp = Operation(Instruction.Copy, retReg, source);
+
+ nodes.AddBefore(node, retCopyOp);
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/X86/PreAllocatorWindows.cs b/src/ARMeilleure/CodeGen/X86/PreAllocatorWindows.cs
new file mode 100644
index 00000000..45319e6a
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/PreAllocatorWindows.cs
@@ -0,0 +1,327 @@
+using ARMeilleure.CodeGen.RegisterAllocators;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+using static ARMeilleure.IntermediateRepresentation.Operation.Factory;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ class PreAllocatorWindows : PreAllocator
+ {
+ public static void InsertCallCopies(IntrusiveList<Operation> nodes, StackAllocator stackAlloc, Operation node)
+ {
+ Operand dest = node.Destination;
+
+ // Handle struct arguments.
+ int retArgs = 0;
+ int stackAllocOffset = 0;
+
+ int AllocateOnStack(int size)
+ {
+ // We assume that the stack allocator is initially empty (TotalSize = 0).
+ // Taking that into account, we can reuse the space allocated for other
+ // calls by keeping track of our own allocated size (stackAllocOffset).
+ // If the space allocated is not big enough, then we just expand it.
+ int offset = stackAllocOffset;
+
+ if (stackAllocOffset + size > stackAlloc.TotalSize)
+ {
+ stackAlloc.Allocate((stackAllocOffset + size) - stackAlloc.TotalSize);
+ }
+
+ stackAllocOffset += size;
+
+ return offset;
+ }
+
+ Operand arg0Reg = default;
+
+ if (dest != default && dest.Type == OperandType.V128)
+ {
+ int stackOffset = AllocateOnStack(dest.Type.GetSizeInBytes());
+
+ arg0Reg = Gpr(CallingConvention.GetIntArgumentRegister(0), OperandType.I64);
+
+ Operation allocOp = Operation(Instruction.StackAlloc, arg0Reg, Const(stackOffset));
+
+ nodes.AddBefore(node, allocOp);
+
+ retArgs = 1;
+ }
+
+ int argsCount = node.SourcesCount - 1;
+ int maxArgs = CallingConvention.GetArgumentsOnRegsCount() - retArgs;
+
+ if (argsCount > maxArgs)
+ {
+ argsCount = maxArgs;
+ }
+
+ Operand[] sources = new Operand[1 + retArgs + argsCount];
+
+ sources[0] = node.GetSource(0);
+
+ if (arg0Reg != default)
+ {
+ sources[1] = arg0Reg;
+ }
+
+ for (int index = 1; index < node.SourcesCount; index++)
+ {
+ Operand source = node.GetSource(index);
+
+ if (source.Type == OperandType.V128)
+ {
+ Operand stackAddr = Local(OperandType.I64);
+
+ int stackOffset = AllocateOnStack(source.Type.GetSizeInBytes());
+
+ nodes.AddBefore(node, Operation(Instruction.StackAlloc, stackAddr, Const(stackOffset)));
+
+ Operation storeOp = Operation(Instruction.Store, default, stackAddr, source);
+
+ InsertConstantRegCopies(nodes, nodes.AddBefore(node, storeOp));
+
+ node.SetSource(index, stackAddr);
+ }
+ }
+
+ // Handle arguments passed on registers.
+ for (int index = 0; index < argsCount; index++)
+ {
+ Operand source = node.GetSource(index + 1);
+ Operand argReg;
+
+ int argIndex = index + retArgs;
+
+ if (source.Type.IsInteger())
+ {
+ argReg = Gpr(CallingConvention.GetIntArgumentRegister(argIndex), source.Type);
+ }
+ else
+ {
+ argReg = Xmm(CallingConvention.GetVecArgumentRegister(argIndex), source.Type);
+ }
+
+ Operation copyOp = Operation(Instruction.Copy, argReg, source);
+
+ InsertConstantRegCopies(nodes, nodes.AddBefore(node, copyOp));
+
+ sources[1 + retArgs + index] = argReg;
+ }
+
+ // The remaining arguments (those that are not passed on registers)
+ // should be passed on the stack, we write them to the stack with "SpillArg".
+ for (int index = argsCount; index < node.SourcesCount - 1; index++)
+ {
+ Operand source = node.GetSource(index + 1);
+ Operand offset = Const((index + retArgs) * 8);
+
+ Operation spillOp = Operation(Instruction.SpillArg, default, offset, source);
+
+ InsertConstantRegCopies(nodes, nodes.AddBefore(node, spillOp));
+ }
+
+ if (dest != default)
+ {
+ if (dest.Type == OperandType.V128)
+ {
+ Operand retValueAddr = Local(OperandType.I64);
+
+ nodes.AddBefore(node, Operation(Instruction.Copy, retValueAddr, arg0Reg));
+
+ Operation loadOp = Operation(Instruction.Load, dest, retValueAddr);
+
+ nodes.AddAfter(node, loadOp);
+
+ node.Destination = default;
+ }
+ else
+ {
+ Operand retReg = dest.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntReturnRegister(), dest.Type)
+ : Xmm(CallingConvention.GetVecReturnRegister(), dest.Type);
+
+ Operation copyOp = Operation(Instruction.Copy, dest, retReg);
+
+ nodes.AddAfter(node, copyOp);
+
+ node.Destination = retReg;
+ }
+ }
+
+ node.SetSources(sources);
+ }
+
+ public static void InsertTailcallCopies(IntrusiveList<Operation> nodes, StackAllocator stackAlloc, Operation node)
+ {
+ int argsCount = node.SourcesCount - 1;
+ int maxArgs = CallingConvention.GetArgumentsOnRegsCount();
+
+ if (argsCount > maxArgs)
+ {
+ throw new NotImplementedException("Spilling is not currently supported for tail calls. (too many arguments)");
+ }
+
+ Operand[] sources = new Operand[1 + argsCount];
+
+ // Handle arguments passed on registers.
+ for (int index = 0; index < argsCount; index++)
+ {
+ Operand source = node.GetSource(1 + index);
+ Operand argReg = source.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntArgumentRegister(index), source.Type)
+ : Xmm(CallingConvention.GetVecArgumentRegister(index), source.Type);
+
+ Operation copyOp = Operation(Instruction.Copy, argReg, source);
+
+ InsertConstantRegCopies(nodes, nodes.AddBefore(node, copyOp));
+
+ sources[1 + index] = argReg;
+ }
+
+ // The target address must be on the return registers, since we
+ // don't return anything and it is guaranteed to not be a
+ // callee saved register (which would be trashed on the epilogue).
+ Operand retReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64);
+
+ Operation addrCopyOp = Operation(Instruction.Copy, retReg, node.GetSource(0));
+
+ nodes.AddBefore(node, addrCopyOp);
+
+ sources[0] = retReg;
+
+ node.SetSources(sources);
+ }
+
+ public static Operation InsertLoadArgumentCopy(
+ CompilerContext cctx,
+ ref Span<Operation> buffer,
+ IntrusiveList<Operation> nodes,
+ Operand[] preservedArgs,
+ Operation node)
+ {
+ Operand source = node.GetSource(0);
+
+ Debug.Assert(source.Kind == OperandKind.Constant, "Non-constant LoadArgument source kind.");
+
+ int retArgs = cctx.FuncReturnType == OperandType.V128 ? 1 : 0;
+
+ int index = source.AsInt32() + retArgs;
+
+ if (index < CallingConvention.GetArgumentsOnRegsCount())
+ {
+ Operand dest = node.Destination;
+
+ if (preservedArgs[index] == default)
+ {
+ Operand argReg, pArg;
+
+ if (dest.Type.IsInteger())
+ {
+ argReg = Gpr(CallingConvention.GetIntArgumentRegister(index), dest.Type);
+ pArg = Local(dest.Type);
+ }
+ else if (dest.Type == OperandType.V128)
+ {
+ argReg = Gpr(CallingConvention.GetIntArgumentRegister(index), OperandType.I64);
+ pArg = Local(OperandType.I64);
+ }
+ else
+ {
+ argReg = Xmm(CallingConvention.GetVecArgumentRegister(index), dest.Type);
+ pArg = Local(dest.Type);
+ }
+
+ Operation copyOp = Operation(Instruction.Copy, pArg, argReg);
+
+ cctx.Cfg.Entry.Operations.AddFirst(copyOp);
+
+ preservedArgs[index] = pArg;
+ }
+
+ Operation nextNode;
+
+ if (dest.Type != OperandType.V128 && dest.AssignmentsCount == 1)
+ {
+ // Let's propagate the argument if we can to avoid copies.
+ PreAllocatorCommon.Propagate(ref buffer, dest, preservedArgs[index]);
+ nextNode = node.ListNext;
+ }
+ else
+ {
+ Operation argCopyOp = Operation(dest.Type == OperandType.V128
+ ? Instruction.Load
+ : Instruction.Copy, dest, preservedArgs[index]);
+
+ nextNode = nodes.AddBefore(node, argCopyOp);
+ }
+
+ Delete(nodes, node);
+ return nextNode;
+ }
+ else
+ {
+ // TODO: Pass on stack.
+ return node;
+ }
+ }
+
+ public static void InsertReturnCopy(
+ CompilerContext cctx,
+ IntrusiveList<Operation> nodes,
+ Operand[] preservedArgs,
+ Operation node)
+ {
+ if (node.SourcesCount == 0)
+ {
+ return;
+ }
+
+ Operand source = node.GetSource(0);
+ Operand retReg;
+
+ if (source.Type.IsInteger())
+ {
+ retReg = Gpr(CallingConvention.GetIntReturnRegister(), source.Type);
+ }
+ else if (source.Type == OperandType.V128)
+ {
+ if (preservedArgs[0] == default)
+ {
+ Operand preservedArg = Local(OperandType.I64);
+ Operand arg0 = Gpr(CallingConvention.GetIntArgumentRegister(0), OperandType.I64);
+
+ Operation copyOp = Operation(Instruction.Copy, preservedArg, arg0);
+
+ cctx.Cfg.Entry.Operations.AddFirst(copyOp);
+
+ preservedArgs[0] = preservedArg;
+ }
+
+ retReg = preservedArgs[0];
+ }
+ else
+ {
+ retReg = Xmm(CallingConvention.GetVecReturnRegister(), source.Type);
+ }
+
+ if (source.Type == OperandType.V128)
+ {
+ Operation retStoreOp = Operation(Instruction.Store, default, retReg, source);
+
+ nodes.AddBefore(node, retStoreOp);
+ }
+ else
+ {
+ Operation retCopyOp = Operation(Instruction.Copy, retReg, source);
+
+ nodes.AddBefore(node, retCopyOp);
+ }
+
+ node.SetSources(Array.Empty<Operand>());
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/X86/X86Condition.cs b/src/ARMeilleure/CodeGen/X86/X86Condition.cs
new file mode 100644
index 00000000..c82cbdec
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/X86Condition.cs
@@ -0,0 +1,47 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ enum X86Condition
+ {
+ Overflow = 0x0,
+ NotOverflow = 0x1,
+ Below = 0x2,
+ AboveOrEqual = 0x3,
+ Equal = 0x4,
+ NotEqual = 0x5,
+ BelowOrEqual = 0x6,
+ Above = 0x7,
+ Sign = 0x8,
+ NotSign = 0x9,
+ ParityEven = 0xa,
+ ParityOdd = 0xb,
+ Less = 0xc,
+ GreaterOrEqual = 0xd,
+ LessOrEqual = 0xe,
+ Greater = 0xf
+ }
+
+ static class ComparisonX86Extensions
+ {
+ public static X86Condition ToX86Condition(this Comparison comp)
+ {
+ return comp switch
+ {
+ Comparison.Equal => X86Condition.Equal,
+ Comparison.NotEqual => X86Condition.NotEqual,
+ Comparison.Greater => X86Condition.Greater,
+ Comparison.LessOrEqual => X86Condition.LessOrEqual,
+ Comparison.GreaterUI => X86Condition.Above,
+ Comparison.LessOrEqualUI => X86Condition.BelowOrEqual,
+ Comparison.GreaterOrEqual => X86Condition.GreaterOrEqual,
+ Comparison.Less => X86Condition.Less,
+ Comparison.GreaterOrEqualUI => X86Condition.AboveOrEqual,
+ Comparison.LessUI => X86Condition.Below,
+
+ _ => throw new ArgumentException(null, nameof(comp))
+ };
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/X86/X86Instruction.cs b/src/ARMeilleure/CodeGen/X86/X86Instruction.cs
new file mode 100644
index 00000000..9a85c516
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/X86Instruction.cs
@@ -0,0 +1,231 @@
+namespace ARMeilleure.CodeGen.X86
+{
+ enum X86Instruction
+ {
+ None,
+ Add,
+ Addpd,
+ Addps,
+ Addsd,
+ Addss,
+ Aesdec,
+ Aesdeclast,
+ Aesenc,
+ Aesenclast,
+ Aesimc,
+ And,
+ Andnpd,
+ Andnps,
+ Andpd,
+ Andps,
+ Blendvpd,
+ Blendvps,
+ Bsr,
+ Bswap,
+ Call,
+ Cmovcc,
+ Cmp,
+ Cmppd,
+ Cmpps,
+ Cmpsd,
+ Cmpss,
+ Cmpxchg,
+ Cmpxchg16b,
+ Cmpxchg8,
+ Comisd,
+ Comiss,
+ Crc32,
+ Crc32_16,
+ Crc32_8,
+ Cvtdq2pd,
+ Cvtdq2ps,
+ Cvtpd2dq,
+ Cvtpd2ps,
+ Cvtps2dq,
+ Cvtps2pd,
+ Cvtsd2si,
+ Cvtsd2ss,
+ Cvtsi2sd,
+ Cvtsi2ss,
+ Cvtss2sd,
+ Cvtss2si,
+ Div,
+ Divpd,
+ Divps,
+ Divsd,
+ Divss,
+ Gf2p8affineqb,
+ Haddpd,
+ Haddps,
+ Idiv,
+ Imul,
+ Imul128,
+ Insertps,
+ Jmp,
+ Ldmxcsr,
+ Lea,
+ Maxpd,
+ Maxps,
+ Maxsd,
+ Maxss,
+ Minpd,
+ Minps,
+ Minsd,
+ Minss,
+ Mov,
+ Mov16,
+ Mov8,
+ Movd,
+ Movdqu,
+ Movhlps,
+ Movlhps,
+ Movq,
+ Movsd,
+ Movss,
+ Movsx16,
+ Movsx32,
+ Movsx8,
+ Movzx16,
+ Movzx8,
+ Mul128,
+ Mulpd,
+ Mulps,
+ Mulsd,
+ Mulss,
+ Neg,
+ Not,
+ Or,
+ Paddb,
+ Paddd,
+ Paddq,
+ Paddw,
+ Palignr,
+ Pand,
+ Pandn,
+ Pavgb,
+ Pavgw,
+ Pblendvb,
+ Pclmulqdq,
+ Pcmpeqb,
+ Pcmpeqd,
+ Pcmpeqq,
+ Pcmpeqw,
+ Pcmpgtb,
+ Pcmpgtd,
+ Pcmpgtq,
+ Pcmpgtw,
+ Pextrb,
+ Pextrd,
+ Pextrq,
+ Pextrw,
+ Pinsrb,
+ Pinsrd,
+ Pinsrq,
+ Pinsrw,
+ Pmaxsb,
+ Pmaxsd,
+ Pmaxsw,
+ Pmaxub,
+ Pmaxud,
+ Pmaxuw,
+ Pminsb,
+ Pminsd,
+ Pminsw,
+ Pminub,
+ Pminud,
+ Pminuw,
+ Pmovsxbw,
+ Pmovsxdq,
+ Pmovsxwd,
+ Pmovzxbw,
+ Pmovzxdq,
+ Pmovzxwd,
+ Pmulld,
+ Pmullw,
+ Pop,
+ Popcnt,
+ Por,
+ Pshufb,
+ Pshufd,
+ Pslld,
+ Pslldq,
+ Psllq,
+ Psllw,
+ Psrad,
+ Psraw,
+ Psrld,
+ Psrlq,
+ Psrldq,
+ Psrlw,
+ Psubb,
+ Psubd,
+ Psubq,
+ Psubw,
+ Punpckhbw,
+ Punpckhdq,
+ Punpckhqdq,
+ Punpckhwd,
+ Punpcklbw,
+ Punpckldq,
+ Punpcklqdq,
+ Punpcklwd,
+ Push,
+ Pxor,
+ Rcpps,
+ Rcpss,
+ Ror,
+ Roundpd,
+ Roundps,
+ Roundsd,
+ Roundss,
+ Rsqrtps,
+ Rsqrtss,
+ Sar,
+ Setcc,
+ Sha256Msg1,
+ Sha256Msg2,
+ Sha256Rnds2,
+ Shl,
+ Shr,
+ Shufpd,
+ Shufps,
+ Sqrtpd,
+ Sqrtps,
+ Sqrtsd,
+ Sqrtss,
+ Stmxcsr,
+ Sub,
+ Subpd,
+ Subps,
+ Subsd,
+ Subss,
+ Test,
+ Unpckhpd,
+ Unpckhps,
+ Unpcklpd,
+ Unpcklps,
+ Vblendvpd,
+ Vblendvps,
+ Vcvtph2ps,
+ Vcvtps2ph,
+ Vfmadd231pd,
+ Vfmadd231ps,
+ Vfmadd231sd,
+ Vfmadd231ss,
+ Vfmsub231sd,
+ Vfmsub231ss,
+ Vfnmadd231pd,
+ Vfnmadd231ps,
+ Vfnmadd231sd,
+ Vfnmadd231ss,
+ Vfnmsub231sd,
+ Vfnmsub231ss,
+ Vpblendvb,
+ Vpternlogd,
+ Xor,
+ Xorpd,
+ Xorps,
+
+ Count
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/X86/X86Optimizer.cs b/src/ARMeilleure/CodeGen/X86/X86Optimizer.cs
new file mode 100644
index 00000000..98a19b9a
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/X86Optimizer.cs
@@ -0,0 +1,259 @@
+using ARMeilleure.CodeGen.Optimizations;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System.Collections.Generic;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+using static ARMeilleure.IntermediateRepresentation.Operation.Factory;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ static class X86Optimizer
+ {
+ private const int MaxConstantUses = 10000;
+
+ public static void RunPass(ControlFlowGraph cfg)
+ {
+ var constants = new Dictionary<ulong, Operand>();
+
+ Operand GetConstantCopy(BasicBlock block, Operation operation, Operand source)
+ {
+ // If the constant has many uses, we also force a new constant mov to be added, in order
+ // to avoid overflow of the counts field (that is limited to 16 bits).
+ if (!constants.TryGetValue(source.Value, out var constant) || constant.UsesCount > MaxConstantUses)
+ {
+ constant = Local(source.Type);
+
+ Operation copyOp = Operation(Instruction.Copy, constant, source);
+
+ block.Operations.AddBefore(operation, copyOp);
+
+ constants[source.Value] = constant;
+ }
+
+ return constant;
+ }
+
+ for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
+ {
+ constants.Clear();
+
+ Operation nextNode;
+
+ for (Operation node = block.Operations.First; node != default; node = nextNode)
+ {
+ nextNode = node.ListNext;
+
+ // Insert copies for constants that can't fit on a 32-bits immediate.
+ // Doing this early unblocks a few optimizations.
+ if (node.Instruction == Instruction.Add)
+ {
+ Operand src1 = node.GetSource(0);
+ Operand src2 = node.GetSource(1);
+
+ if (src1.Kind == OperandKind.Constant && (src1.Relocatable || CodeGenCommon.IsLongConst(src1)))
+ {
+ node.SetSource(0, GetConstantCopy(block, node, src1));
+ }
+
+ if (src2.Kind == OperandKind.Constant && (src2.Relocatable || CodeGenCommon.IsLongConst(src2)))
+ {
+ node.SetSource(1, GetConstantCopy(block, node, src2));
+ }
+ }
+
+ // Try to fold something like:
+ // shl rbx, 2
+ // add rax, rbx
+ // add rax, 0xcafe
+ // mov rax, [rax]
+ // Into:
+ // mov rax, [rax+rbx*4+0xcafe]
+ if (IsMemoryLoadOrStore(node.Instruction))
+ {
+ OperandType type;
+
+ if (node.Destination != default)
+ {
+ type = node.Destination.Type;
+ }
+ else
+ {
+ type = node.GetSource(1).Type;
+ }
+
+ Operand memOp = GetMemoryOperandOrNull(node.GetSource(0), type);
+
+ if (memOp != default)
+ {
+ node.SetSource(0, memOp);
+ }
+ }
+ }
+ }
+
+ Optimizer.RemoveUnusedNodes(cfg);
+ }
+
+ private static Operand GetMemoryOperandOrNull(Operand addr, OperandType type)
+ {
+ Operand baseOp = addr;
+
+ // First we check if the address is the result of a local X with 32-bits immediate
+ // addition. If that is the case, then the baseOp is X, and the memory operand immediate
+ // becomes the addition immediate. Otherwise baseOp keeps being the address.
+ int imm = GetConstOp(ref baseOp);
+
+ // Now we check if the baseOp is the result of a local Y with a local Z addition.
+ // If that is the case, we now set baseOp to Y and indexOp to Z. We further check
+ // if Z is the result of a left shift of local W by a value >= 0 and <= 3, if that
+ // is the case, we set indexOp to W and adjust the scale value of the memory operand
+ // to match that of the left shift.
+ // There is one missed case, which is the address being a shift result, but this is
+ // probably not worth optimizing as it should never happen.
+ (Operand indexOp, Multiplier scale) = GetIndexOp(ref baseOp);
+
+ // If baseOp is still equal to address, then there's nothing that can be optimized.
+ if (baseOp == addr)
+ {
+ return default;
+ }
+
+ if (imm == 0 && scale == Multiplier.x1 && indexOp != default)
+ {
+ imm = GetConstOp(ref indexOp);
+ }
+
+ return MemoryOp(type, baseOp, indexOp, scale, imm);
+ }
+
+ private static int GetConstOp(ref Operand baseOp)
+ {
+ Operation operation = GetAsgOpWithInst(baseOp, Instruction.Add);
+
+ if (operation == default)
+ {
+ return 0;
+ }
+
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ Operand constOp;
+ Operand otherOp;
+
+ if (src1.Kind == OperandKind.Constant && src2.Kind == OperandKind.LocalVariable)
+ {
+ constOp = src1;
+ otherOp = src2;
+ }
+ else if (src1.Kind == OperandKind.LocalVariable && src2.Kind == OperandKind.Constant)
+ {
+ constOp = src2;
+ otherOp = src1;
+ }
+ else
+ {
+ return 0;
+ }
+
+ // If we have addition by 64-bits constant, then we can't optimize it further,
+ // as we can't encode a 64-bits immediate on the memory operand.
+ if (CodeGenCommon.IsLongConst(constOp))
+ {
+ return 0;
+ }
+
+ baseOp = otherOp;
+
+ return constOp.AsInt32();
+ }
+
+ private static (Operand, Multiplier) GetIndexOp(ref Operand baseOp)
+ {
+ Operand indexOp = default;
+
+ Multiplier scale = Multiplier.x1;
+
+ Operation addOp = GetAsgOpWithInst(baseOp, Instruction.Add);
+
+ if (addOp == default)
+ {
+ return (indexOp, scale);
+ }
+
+ Operand src1 = addOp.GetSource(0);
+ Operand src2 = addOp.GetSource(1);
+
+ if (src1.Kind != OperandKind.LocalVariable || src2.Kind != OperandKind.LocalVariable)
+ {
+ return (indexOp, scale);
+ }
+
+ baseOp = src1;
+ indexOp = src2;
+
+ Operation shlOp = GetAsgOpWithInst(src1, Instruction.ShiftLeft);
+
+ bool indexOnSrc2 = false;
+
+ if (shlOp == default)
+ {
+ shlOp = GetAsgOpWithInst(src2, Instruction.ShiftLeft);
+
+ indexOnSrc2 = true;
+ }
+
+ if (shlOp != default)
+ {
+ Operand shSrc = shlOp.GetSource(0);
+ Operand shift = shlOp.GetSource(1);
+
+ if (shSrc.Kind == OperandKind.LocalVariable && shift.Kind == OperandKind.Constant && shift.Value <= 3)
+ {
+ scale = shift.Value switch
+ {
+ 1 => Multiplier.x2,
+ 2 => Multiplier.x4,
+ 3 => Multiplier.x8,
+ _ => Multiplier.x1
+ };
+
+ baseOp = indexOnSrc2 ? src1 : src2;
+ indexOp = shSrc;
+ }
+ }
+
+ return (indexOp, scale);
+ }
+
+ private static Operation GetAsgOpWithInst(Operand op, Instruction inst)
+ {
+ // If we have multiple assignments, folding is not safe
+ // as the value may be different depending on the
+ // control flow path.
+ if (op.AssignmentsCount != 1)
+ {
+ return default;
+ }
+
+ Operation asgOp = op.Assignments[0];
+
+ if (asgOp.Instruction != inst)
+ {
+ return default;
+ }
+
+ return asgOp;
+ }
+
+ private static bool IsMemoryLoadOrStore(Instruction inst)
+ {
+ return inst == Instruction.Load ||
+ inst == Instruction.Load16 ||
+ inst == Instruction.Load8 ||
+ inst == Instruction.Store ||
+ inst == Instruction.Store16 ||
+ inst == Instruction.Store8;
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/X86/X86Register.cs b/src/ARMeilleure/CodeGen/X86/X86Register.cs
new file mode 100644
index 00000000..01f63e31
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/X86Register.cs
@@ -0,0 +1,41 @@
+namespace ARMeilleure.CodeGen.X86
+{
+ enum X86Register
+ {
+ Invalid = -1,
+
+ Rax = 0,
+ Rcx = 1,
+ Rdx = 2,
+ Rbx = 3,
+ Rsp = 4,
+ Rbp = 5,
+ Rsi = 6,
+ Rdi = 7,
+ R8 = 8,
+ R9 = 9,
+ R10 = 10,
+ R11 = 11,
+ R12 = 12,
+ R13 = 13,
+ R14 = 14,
+ R15 = 15,
+
+ Xmm0 = 0,
+ Xmm1 = 1,
+ Xmm2 = 2,
+ Xmm3 = 3,
+ Xmm4 = 4,
+ Xmm5 = 5,
+ Xmm6 = 6,
+ Xmm7 = 7,
+ Xmm8 = 8,
+ Xmm9 = 9,
+ Xmm10 = 10,
+ Xmm11 = 11,
+ Xmm12 = 12,
+ Xmm13 = 13,
+ Xmm14 = 14,
+ Xmm15 = 15
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Common/AddressTable.cs b/src/ARMeilleure/Common/AddressTable.cs
new file mode 100644
index 00000000..9db2d00d
--- /dev/null
+++ b/src/ARMeilleure/Common/AddressTable.cs
@@ -0,0 +1,252 @@
+using ARMeilleure.Diagnostics;
+using System;
+using System.Collections.Generic;
+using System.Runtime.InteropServices;
+
+namespace ARMeilleure.Common
+{
+ /// <summary>
+ /// Represents a table of guest address to a value.
+ /// </summary>
+ /// <typeparam name="TEntry">Type of the value</typeparam>
+ unsafe class AddressTable<TEntry> : IDisposable where TEntry : unmanaged
+ {
+ /// <summary>
+ /// Represents a level in an <see cref="AddressTable{TEntry}"/>.
+ /// </summary>
+ public readonly struct Level
+ {
+ /// <summary>
+ /// Gets the index of the <see cref="Level"/> in the guest address.
+ /// </summary>
+ public int Index { get; }
+
+ /// <summary>
+ /// Gets the length of the <see cref="Level"/> in the guest address.
+ /// </summary>
+ public int Length { get; }
+
+ /// <summary>
+ /// Gets the mask which masks the bits used by the <see cref="Level"/>.
+ /// </summary>
+ public ulong Mask => ((1ul << Length) - 1) << Index;
+
+ /// <summary>
+ /// Initializes a new instance of the <see cref="Level"/> structure with the specified
+ /// <paramref name="index"/> and <paramref name="length"/>.
+ /// </summary>
+ /// <param name="index">Index of the <see cref="Level"/></param>
+ /// <param name="length">Length of the <see cref="Level"/></param>
+ public Level(int index, int length)
+ {
+ (Index, Length) = (index, length);
+ }
+
+ /// <summary>
+ /// Gets the value of the <see cref="Level"/> from the specified guest <paramref name="address"/>.
+ /// </summary>
+ /// <param name="address">Guest address</param>
+ /// <returns>Value of the <see cref="Level"/> from the specified guest <paramref name="address"/></returns>
+ public int GetValue(ulong address)
+ {
+ return (int)((address & Mask) >> Index);
+ }
+ }
+
+ private bool _disposed;
+ private TEntry** _table;
+ private readonly List<IntPtr> _pages;
+
+ /// <summary>
+ /// Gets the bits used by the <see cref="Levels"/> of the <see cref="AddressTable{TEntry}"/> instance.
+ /// </summary>
+ public ulong Mask { get; }
+
+ /// <summary>
+ /// Gets the <see cref="Level"/>s used by the <see cref="AddressTable{TEntry}"/> instance.
+ /// </summary>
+ public Level[] Levels { get; }
+
+ /// <summary>
+ /// Gets or sets the default fill value of newly created leaf pages.
+ /// </summary>
+ public TEntry Fill { get; set; }
+
+ /// <summary>
+ /// Gets the base address of the <see cref="EntryTable{TEntry}"/>.
+ /// </summary>
+ /// <exception cref="ObjectDisposedException"><see cref="EntryTable{TEntry}"/> instance was disposed</exception>
+ public IntPtr Base
+ {
+ get
+ {
+ ObjectDisposedException.ThrowIf(_disposed, this);
+
+ lock (_pages)
+ {
+ return (IntPtr)GetRootPage();
+ }
+ }
+ }
+
+ /// <summary>
+ /// Constructs a new instance of the <see cref="AddressTable{TEntry}"/> class with the specified list of
+ /// <see cref="Level"/>.
+ /// </summary>
+ /// <exception cref="ArgumentNullException"><paramref name="levels"/> is null</exception>
+ /// <exception cref="ArgumentException">Length of <paramref name="levels"/> is less than 2</exception>
+ public AddressTable(Level[] levels)
+ {
+ ArgumentNullException.ThrowIfNull(levels);
+
+ if (levels.Length < 2)
+ {
+ throw new ArgumentException("Table must be at least 2 levels deep.", nameof(levels));
+ }
+
+ _pages = new List<IntPtr>(capacity: 16);
+
+ Levels = levels;
+ Mask = 0;
+
+ foreach (var level in Levels)
+ {
+ Mask |= level.Mask;
+ }
+ }
+
+ /// <summary>
+ /// Determines if the specified <paramref name="address"/> is in the range of the
+ /// <see cref="AddressTable{TEntry}"/>.
+ /// </summary>
+ /// <param name="address">Guest address</param>
+ /// <returns><see langword="true"/> if is valid; otherwise <see langword="false"/></returns>
+ public bool IsValid(ulong address)
+ {
+ return (address & ~Mask) == 0;
+ }
+
+ /// <summary>
+ /// Gets a reference to the value at the specified guest <paramref name="address"/>.
+ /// </summary>
+ /// <param name="address">Guest address</param>
+ /// <returns>Reference to the value at the specified guest <paramref name="address"/></returns>
+ /// <exception cref="ObjectDisposedException"><see cref="EntryTable{TEntry}"/> instance was disposed</exception>
+ /// <exception cref="ArgumentException"><paramref name="address"/> is not mapped</exception>
+ public ref TEntry GetValue(ulong address)
+ {
+ ObjectDisposedException.ThrowIf(_disposed, this);
+
+ if (!IsValid(address))
+ {
+ throw new ArgumentException($"Address 0x{address:X} is not mapped onto the table.", nameof(address));
+ }
+
+ lock (_pages)
+ {
+ return ref GetPage(address)[Levels[^1].GetValue(address)];
+ }
+ }
+
+ /// <summary>
+ /// Gets the leaf page for the specified guest <paramref name="address"/>.
+ /// </summary>
+ /// <param name="address">Guest address</param>
+ /// <returns>Leaf page for the specified guest <paramref name="address"/></returns>
+ private TEntry* GetPage(ulong address)
+ {
+ TEntry** page = GetRootPage();
+
+ for (int i = 0; i < Levels.Length - 1; i++)
+ {
+ ref Level level = ref Levels[i];
+ ref TEntry* nextPage = ref page[level.GetValue(address)];
+
+ if (nextPage == null)
+ {
+ ref Level nextLevel = ref Levels[i + 1];
+
+ nextPage = i == Levels.Length - 2 ?
+ (TEntry*)Allocate(1 << nextLevel.Length, Fill, leaf: true) :
+ (TEntry*)Allocate(1 << nextLevel.Length, IntPtr.Zero, leaf: false);
+ }
+
+ page = (TEntry**)nextPage;
+ }
+
+ return (TEntry*)page;
+ }
+
+ /// <summary>
+ /// Lazily initialize and get the root page of the <see cref="AddressTable{TEntry}"/>.
+ /// </summary>
+ /// <returns>Root page of the <see cref="AddressTable{TEntry}"/></returns>
+ private TEntry** GetRootPage()
+ {
+ if (_table == null)
+ {
+ _table = (TEntry**)Allocate(1 << Levels[0].Length, fill: IntPtr.Zero, leaf: false);
+ }
+
+ return _table;
+ }
+
+ /// <summary>
+ /// Allocates a block of memory of the specified type and length.
+ /// </summary>
+ /// <typeparam name="T">Type of elements</typeparam>
+ /// <param name="length">Number of elements</param>
+ /// <param name="fill">Fill value</param>
+ /// <param name="leaf"><see langword="true"/> if leaf; otherwise <see langword="false"/></param>
+ /// <returns>Allocated block</returns>
+ private IntPtr Allocate<T>(int length, T fill, bool leaf) where T : unmanaged
+ {
+ var size = sizeof(T) * length;
+ var page = (IntPtr)NativeAllocator.Instance.Allocate((uint)size);
+ var span = new Span<T>((void*)page, length);
+
+ span.Fill(fill);
+
+ _pages.Add(page);
+
+ TranslatorEventSource.Log.AddressTableAllocated(size, leaf);
+
+ return page;
+ }
+
+ /// <summary>
+ /// Releases all resources used by the <see cref="AddressTable{TEntry}"/> instance.
+ /// </summary>
+ public void Dispose()
+ {
+ Dispose(true);
+ GC.SuppressFinalize(this);
+ }
+
+ /// <summary>
+ /// Releases all unmanaged and optionally managed resources used by the <see cref="AddressTable{TEntry}"/>
+ /// instance.
+ /// </summary>
+ /// <param name="disposing"><see langword="true"/> to dispose managed resources also; otherwise just unmanaged resouces</param>
+ protected virtual void Dispose(bool disposing)
+ {
+ if (!_disposed)
+ {
+ foreach (var page in _pages)
+ {
+ Marshal.FreeHGlobal(page);
+ }
+
+ _disposed = true;
+ }
+ }
+
+ /// <summary>
+ /// Frees resources used by the <see cref="AddressTable{TEntry}"/> instance.
+ /// </summary>
+ ~AddressTable()
+ {
+ Dispose(false);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Common/Allocator.cs b/src/ARMeilleure/Common/Allocator.cs
new file mode 100644
index 00000000..247a8e8b
--- /dev/null
+++ b/src/ARMeilleure/Common/Allocator.cs
@@ -0,0 +1,24 @@
+using System;
+
+namespace ARMeilleure.Common
+{
+ unsafe abstract class Allocator : IDisposable
+ {
+ public T* Allocate<T>(ulong count = 1) where T : unmanaged
+ {
+ return (T*)Allocate(count * (uint)sizeof(T));
+ }
+
+ public abstract void* Allocate(ulong size);
+
+ public abstract void Free(void* block);
+
+ protected virtual void Dispose(bool disposing) { }
+
+ public void Dispose()
+ {
+ Dispose(true);
+ GC.SuppressFinalize(this);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Common/ArenaAllocator.cs b/src/ARMeilleure/Common/ArenaAllocator.cs
new file mode 100644
index 00000000..bce6794a
--- /dev/null
+++ b/src/ARMeilleure/Common/ArenaAllocator.cs
@@ -0,0 +1,187 @@
+using System;
+using System.Collections.Generic;
+using System.Runtime.CompilerServices;
+
+namespace ARMeilleure.Common
+{
+ unsafe sealed class ArenaAllocator : Allocator
+ {
+ private class PageInfo
+ {
+ public byte* Pointer;
+ public byte Unused;
+ public int UnusedCounter;
+ }
+
+ private int _lastReset;
+ private ulong _index;
+ private int _pageIndex;
+ private PageInfo _page;
+ private List<PageInfo> _pages;
+ private readonly ulong _pageSize;
+ private readonly uint _pageCount;
+ private readonly List<IntPtr> _extras;
+
+ public ArenaAllocator(uint pageSize, uint pageCount)
+ {
+ _lastReset = Environment.TickCount;
+
+ // Set _index to pageSize so that the first allocation goes through the slow path.
+ _index = pageSize;
+ _pageIndex = -1;
+
+ _page = null;
+ _pages = new List<PageInfo>();
+ _pageSize = pageSize;
+ _pageCount = pageCount;
+
+ _extras = new List<IntPtr>();
+ }
+
+ public Span<T> AllocateSpan<T>(ulong count) where T : unmanaged
+ {
+ return new Span<T>(Allocate<T>(count), (int)count);
+ }
+
+ public override void* Allocate(ulong size)
+ {
+ if (_index + size <= _pageSize)
+ {
+ byte* result = _page.Pointer + _index;
+
+ _index += size;
+
+ return result;
+ }
+
+ return AllocateSlow(size);
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ private void* AllocateSlow(ulong size)
+ {
+ if (size > _pageSize)
+ {
+ void* extra = NativeAllocator.Instance.Allocate(size);
+
+ _extras.Add((IntPtr)extra);
+
+ return extra;
+ }
+
+ if (_index + size > _pageSize)
+ {
+ _index = 0;
+ _pageIndex++;
+ }
+
+ if (_pageIndex < _pages.Count)
+ {
+ _page = _pages[_pageIndex];
+ _page.Unused = 0;
+ }
+ else
+ {
+ _page = new PageInfo();
+ _page.Pointer = (byte*)NativeAllocator.Instance.Allocate(_pageSize);
+
+ _pages.Add(_page);
+ }
+
+ byte* result = _page.Pointer + _index;
+
+ _index += size;
+
+ return result;
+ }
+
+ public override void Free(void* block) { }
+
+ public void Reset()
+ {
+ _index = _pageSize;
+ _pageIndex = -1;
+ _page = null;
+
+ // Free excess pages that was allocated.
+ while (_pages.Count > _pageCount)
+ {
+ NativeAllocator.Instance.Free(_pages[_pages.Count - 1].Pointer);
+
+ _pages.RemoveAt(_pages.Count - 1);
+ }
+
+ // Free extra blocks that are not page-sized
+ foreach (IntPtr ptr in _extras)
+ {
+ NativeAllocator.Instance.Free((void*)ptr);
+ }
+
+ _extras.Clear();
+
+ // Free pooled pages that has not been used in a while. Remove pages at the back first, because we try to
+ // keep the pages at the front alive, since they're more likely to be hot and in the d-cache.
+ bool removing = true;
+
+ // If arena is used frequently, keep pages for longer. Otherwise keep pages for a shorter amount of time.
+ int now = Environment.TickCount;
+ int count = (now - _lastReset) switch {
+ >= 5000 => 0,
+ >= 2500 => 50,
+ >= 1000 => 100,
+ >= 10 => 1500,
+ _ => 5000
+ };
+
+ for (int i = _pages.Count - 1; i >= 0; i--)
+ {
+ PageInfo page = _pages[i];
+
+ if (page.Unused == 0)
+ {
+ page.UnusedCounter = 0;
+ }
+
+ page.UnusedCounter += page.Unused;
+ page.Unused = 1;
+
+ // If page not used after `count` resets, remove it.
+ if (removing && page.UnusedCounter >= count)
+ {
+ NativeAllocator.Instance.Free(page.Pointer);
+
+ _pages.RemoveAt(i);
+ }
+ else
+ {
+ removing = false;
+ }
+ }
+
+ _lastReset = now;
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ if (_pages != null)
+ {
+ foreach (PageInfo info in _pages)
+ {
+ NativeAllocator.Instance.Free(info.Pointer);
+ }
+
+ foreach (IntPtr ptr in _extras)
+ {
+ NativeAllocator.Instance.Free((void*)ptr);
+ }
+
+ _pages = null;
+ }
+ }
+
+ ~ArenaAllocator()
+ {
+ Dispose(false);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Common/BitMap.cs b/src/ARMeilleure/Common/BitMap.cs
new file mode 100644
index 00000000..27ef031f
--- /dev/null
+++ b/src/ARMeilleure/Common/BitMap.cs
@@ -0,0 +1,222 @@
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.Numerics;
+using System.Runtime.CompilerServices;
+
+namespace ARMeilleure.Common
+{
+ unsafe class BitMap : IEnumerable<int>, IDisposable
+ {
+ private const int IntSize = 64;
+ private const int IntMask = IntSize - 1;
+
+ private int _count;
+ private long* _masks;
+ private readonly Allocator _allocator;
+
+ public BitMap(Allocator allocator)
+ {
+ _allocator = allocator;
+ }
+
+ public BitMap(Allocator allocator, int capacity) : this(allocator)
+ {
+ EnsureCapacity(capacity);
+ }
+
+ public bool Set(int bit)
+ {
+ EnsureCapacity(bit + 1);
+
+ int wordIndex = bit / IntSize;
+ int wordBit = bit & IntMask;
+
+ long wordMask = 1L << wordBit;
+
+ if ((_masks[wordIndex] & wordMask) != 0)
+ {
+ return false;
+ }
+
+ _masks[wordIndex] |= wordMask;
+
+ return true;
+ }
+
+ public void Clear(int bit)
+ {
+ EnsureCapacity(bit + 1);
+
+ int wordIndex = bit / IntSize;
+ int wordBit = bit & IntMask;
+
+ long wordMask = 1L << wordBit;
+
+ _masks[wordIndex] &= ~wordMask;
+ }
+
+ public bool IsSet(int bit)
+ {
+ EnsureCapacity(bit + 1);
+
+ int wordIndex = bit / IntSize;
+ int wordBit = bit & IntMask;
+
+ return (_masks[wordIndex] & (1L << wordBit)) != 0;
+ }
+
+ public int FindFirstUnset()
+ {
+ for (int index = 0; index < _count; index++)
+ {
+ long mask = _masks[index];
+
+ if (mask != -1L)
+ {
+ return BitOperations.TrailingZeroCount(~mask) + index * IntSize;
+ }
+ }
+
+ return _count * IntSize;
+ }
+
+ public bool Set(BitMap map)
+ {
+ EnsureCapacity(map._count * IntSize);
+
+ bool modified = false;
+
+ for (int index = 0; index < _count; index++)
+ {
+ long newValue = _masks[index] | map._masks[index];
+
+ if (_masks[index] != newValue)
+ {
+ _masks[index] = newValue;
+
+ modified = true;
+ }
+ }
+
+ return modified;
+ }
+
+ public bool Clear(BitMap map)
+ {
+ EnsureCapacity(map._count * IntSize);
+
+ bool modified = false;
+
+ for (int index = 0; index < _count; index++)
+ {
+ long newValue = _masks[index] & ~map._masks[index];
+
+ if (_masks[index] != newValue)
+ {
+ _masks[index] = newValue;
+
+ modified = true;
+ }
+ }
+
+ return modified;
+ }
+
+ private void EnsureCapacity(int size)
+ {
+ int count = (size + IntMask) / IntSize;
+
+ if (count > _count)
+ {
+ var oldMask = _masks;
+ var oldSpan = new Span<long>(_masks, _count);
+
+ _masks = _allocator.Allocate<long>((uint)count);
+ _count = count;
+
+ var newSpan = new Span<long>(_masks, _count);
+
+ oldSpan.CopyTo(newSpan);
+ newSpan.Slice(oldSpan.Length).Clear();
+
+ _allocator.Free(oldMask);
+ }
+ }
+
+ public void Dispose()
+ {
+ if (_masks != null)
+ {
+ _allocator.Free(_masks);
+
+ _masks = null;
+ }
+ }
+
+ IEnumerator IEnumerable.GetEnumerator()
+ {
+ return GetEnumerator();
+ }
+
+ IEnumerator<int> IEnumerable<int>.GetEnumerator()
+ {
+ return GetEnumerator();
+ }
+
+ public Enumerator GetEnumerator()
+ {
+ return new Enumerator(this);
+ }
+
+ public struct Enumerator : IEnumerator<int>
+ {
+ private long _index;
+ private long _mask;
+ private int _bit;
+ private readonly BitMap _map;
+
+ public int Current => (int)_index * IntSize + _bit;
+ object IEnumerator.Current => Current;
+
+ public Enumerator(BitMap map)
+ {
+ _index = -1;
+ _mask = 0;
+ _bit = 0;
+ _map = map;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public bool MoveNext()
+ {
+ if (_mask != 0)
+ {
+ _mask &= ~(1L << _bit);
+ }
+
+ // Manually hoist these loads, because RyuJIT does not.
+ long count = (uint)_map._count;
+ long* masks = _map._masks;
+
+ while (_mask == 0)
+ {
+ if (++_index >= count)
+ {
+ return false;
+ }
+
+ _mask = masks[_index];
+ }
+
+ _bit = BitOperations.TrailingZeroCount(_mask);
+
+ return true;
+ }
+
+ public void Reset() { }
+
+ public void Dispose() { }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Common/BitUtils.cs b/src/ARMeilleure/Common/BitUtils.cs
new file mode 100644
index 00000000..e7697ff3
--- /dev/null
+++ b/src/ARMeilleure/Common/BitUtils.cs
@@ -0,0 +1,57 @@
+using System;
+using System.Numerics;
+
+namespace ARMeilleure.Common
+{
+ static class BitUtils
+ {
+ private static ReadOnlySpan<sbyte> HbsNibbleLut => new sbyte[] { -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3 };
+
+ public static long FillWithOnes(int bits)
+ {
+ return bits == 64 ? -1L : (1L << bits) - 1;
+ }
+
+ public static int HighestBitSet(int value)
+ {
+ return 31 - BitOperations.LeadingZeroCount((uint)value);
+ }
+
+ public static int HighestBitSetNibble(int value)
+ {
+ return HbsNibbleLut[value];
+ }
+
+ public static long Replicate(long bits, int size)
+ {
+ long output = 0;
+
+ for (int bit = 0; bit < 64; bit += size)
+ {
+ output |= bits << bit;
+ }
+
+ return output;
+ }
+
+ public static int RotateRight(int bits, int shift, int size)
+ {
+ return (int)RotateRight((uint)bits, shift, size);
+ }
+
+ public static uint RotateRight(uint bits, int shift, int size)
+ {
+ return (bits >> shift) | (bits << (size - shift));
+ }
+
+ public static long RotateRight(long bits, int shift, int size)
+ {
+ return (long)RotateRight((ulong)bits, shift, size);
+ }
+
+ public static ulong RotateRight(ulong bits, int shift, int size)
+ {
+ return (bits >> shift) | (bits << (size - shift));
+ }
+ }
+}
diff --git a/src/ARMeilleure/Common/Counter.cs b/src/ARMeilleure/Common/Counter.cs
new file mode 100644
index 00000000..d7210d15
--- /dev/null
+++ b/src/ARMeilleure/Common/Counter.cs
@@ -0,0 +1,98 @@
+using System;
+
+namespace ARMeilleure.Common
+{
+ /// <summary>
+ /// Represents a numeric counter which can be used for instrumentation of compiled code.
+ /// </summary>
+ /// <typeparam name="T">Type of the counter</typeparam>
+ class Counter<T> : IDisposable where T : unmanaged
+ {
+ private bool _disposed;
+ /// <summary>
+ /// Index in the <see cref="EntryTable{T}"/>
+ /// </summary>
+ private readonly int _index;
+ private readonly EntryTable<T> _countTable;
+
+ /// <summary>
+ /// Initializes a new instance of the <see cref="Counter{T}"/> class from the specified
+ /// <see cref="EntryTable{T}"/> instance and index.
+ /// </summary>
+ /// <param name="countTable"><see cref="EntryTable{T}"/> instance</param>
+ /// <exception cref="ArgumentNullException"><paramref name="countTable"/> is <see langword="null"/></exception>
+ /// <exception cref="ArgumentException"><typeparamref name="T"/> is unsupported</exception>
+ public Counter(EntryTable<T> countTable)
+ {
+ if (typeof(T) != typeof(byte) && typeof(T) != typeof(sbyte) &&
+ typeof(T) != typeof(short) && typeof(T) != typeof(ushort) &&
+ typeof(T) != typeof(int) && typeof(T) != typeof(uint) &&
+ typeof(T) != typeof(long) && typeof(T) != typeof(ulong) &&
+ typeof(T) != typeof(nint) && typeof(T) != typeof(nuint) &&
+ typeof(T) != typeof(float) && typeof(T) != typeof(double))
+ {
+ throw new ArgumentException("Counter does not support the specified type.");
+ }
+
+ _countTable = countTable ?? throw new ArgumentNullException(nameof(countTable));
+ _index = countTable.Allocate();
+ }
+
+ /// <summary>
+ /// Gets a reference to the value of the counter.
+ /// </summary>
+ /// <exception cref="ObjectDisposedException"><see cref="Counter{T}"/> instance was disposed</exception>
+ /// <remarks>
+ /// This can refer to freed memory if the owning <see cref="EntryTable{TEntry}"/> is disposed.
+ /// </remarks>
+ public ref T Value
+ {
+ get
+ {
+ ObjectDisposedException.ThrowIf(_disposed, this);
+
+ return ref _countTable.GetValue(_index);
+ }
+ }
+
+ /// <summary>
+ /// Releases all resources used by the <see cref="Counter{T}"/> instance.
+ /// </summary>
+ public void Dispose()
+ {
+ Dispose(true);
+ GC.SuppressFinalize(this);
+ }
+
+ /// <summary>
+ /// Releases all unmanaged and optionally managed resources used by the <see cref="Counter{T}"/> instance.
+ /// </summary>
+ /// <param name="disposing"><see langword="true"/> to dispose managed resources also; otherwise just unmanaged resources</param>
+ protected virtual void Dispose(bool disposing)
+ {
+ if (!_disposed)
+ {
+ try
+ {
+ // The index into the EntryTable is essentially an unmanaged resource since we allocate and free the
+ // resource ourselves.
+ _countTable.Free(_index);
+ }
+ catch (ObjectDisposedException)
+ {
+ // Can happen because _countTable may be disposed before the Counter instance.
+ }
+
+ _disposed = true;
+ }
+ }
+
+ /// <summary>
+ /// Frees resources used by the <see cref="Counter{T}"/> instance.
+ /// </summary>
+ ~Counter()
+ {
+ Dispose(false);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Common/EntryTable.cs b/src/ARMeilleure/Common/EntryTable.cs
new file mode 100644
index 00000000..6f205797
--- /dev/null
+++ b/src/ARMeilleure/Common/EntryTable.cs
@@ -0,0 +1,188 @@
+using System;
+using System.Collections.Generic;
+using System.Numerics;
+
+namespace ARMeilleure.Common
+{
+ /// <summary>
+ /// Represents an expandable table of the type <typeparamref name="TEntry"/>, whose entries will remain at the same
+ /// address through out the table's lifetime.
+ /// </summary>
+ /// <typeparam name="TEntry">Type of the entry in the table</typeparam>
+ class EntryTable<TEntry> : IDisposable where TEntry : unmanaged
+ {
+ private bool _disposed;
+ private int _freeHint;
+ private readonly int _pageCapacity; // Number of entries per page.
+ private readonly int _pageLogCapacity;
+ private readonly Dictionary<int, IntPtr> _pages;
+ private readonly BitMap _allocated;
+
+ /// <summary>
+ /// Initializes a new instance of the <see cref="EntryTable{TEntry}"/> class with the desired page size in
+ /// bytes.
+ /// </summary>
+ /// <param name="pageSize">Desired page size in bytes</param>
+ /// <exception cref="ArgumentOutOfRangeException"><paramref name="pageSize"/> is less than 0</exception>
+ /// <exception cref="ArgumentException"><typeparamref name="TEntry"/>'s size is zero</exception>
+ /// <remarks>
+ /// The actual page size may be smaller or larger depending on the size of <typeparamref name="TEntry"/>.
+ /// </remarks>
+ public unsafe EntryTable(int pageSize = 4096)
+ {
+ if (pageSize < 0)
+ {
+ throw new ArgumentOutOfRangeException(nameof(pageSize), "Page size cannot be negative.");
+ }
+
+ if (sizeof(TEntry) == 0)
+ {
+ throw new ArgumentException("Size of TEntry cannot be zero.");
+ }
+
+ _allocated = new BitMap(NativeAllocator.Instance);
+ _pages = new Dictionary<int, IntPtr>();
+ _pageLogCapacity = BitOperations.Log2((uint)(pageSize / sizeof(TEntry)));
+ _pageCapacity = 1 << _pageLogCapacity;
+ }
+
+ /// <summary>
+ /// Allocates an entry in the <see cref="EntryTable{TEntry}"/>.
+ /// </summary>
+ /// <returns>Index of entry allocated in the table</returns>
+ /// <exception cref="ObjectDisposedException"><see cref="EntryTable{TEntry}"/> instance was disposed</exception>
+ public int Allocate()
+ {
+ ObjectDisposedException.ThrowIf(_disposed, this);
+
+ lock (_allocated)
+ {
+ if (_allocated.IsSet(_freeHint))
+ {
+ _freeHint = _allocated.FindFirstUnset();
+ }
+
+ int index = _freeHint++;
+ var page = GetPage(index);
+
+ _allocated.Set(index);
+
+ GetValue(page, index) = default;
+
+ return index;
+ }
+ }
+
+ /// <summary>
+ /// Frees the entry at the specified <paramref name="index"/>.
+ /// </summary>
+ /// <param name="index">Index of entry to free</param>
+ /// <exception cref="ObjectDisposedException"><see cref="EntryTable{TEntry}"/> instance was disposed</exception>
+ public void Free(int index)
+ {
+ ObjectDisposedException.ThrowIf(_disposed, this);
+
+ lock (_allocated)
+ {
+ if (_allocated.IsSet(index))
+ {
+ _allocated.Clear(index);
+
+ _freeHint = index;
+ }
+ }
+ }
+
+ /// <summary>
+ /// Gets a reference to the entry at the specified allocated <paramref name="index"/>.
+ /// </summary>
+ /// <param name="index">Index of the entry</param>
+ /// <returns>Reference to the entry at the specified <paramref name="index"/></returns>
+ /// <exception cref="ObjectDisposedException"><see cref="EntryTable{TEntry}"/> instance was disposed</exception>
+ /// <exception cref="ArgumentException">Entry at <paramref name="index"/> is not allocated</exception>
+ public ref TEntry GetValue(int index)
+ {
+ ObjectDisposedException.ThrowIf(_disposed, this);
+
+ lock (_allocated)
+ {
+ if (!_allocated.IsSet(index))
+ {
+ throw new ArgumentException("Entry at the specified index was not allocated", nameof(index));
+ }
+
+ var page = GetPage(index);
+
+ return ref GetValue(page, index);
+ }
+ }
+
+ /// <summary>
+ /// Gets a reference to the entry at using the specified <paramref name="index"/> from the specified
+ /// <paramref name="page"/>.
+ /// </summary>
+ /// <param name="page">Page to use</param>
+ /// <param name="index">Index to use</param>
+ /// <returns>Reference to the entry</returns>
+ private ref TEntry GetValue(Span<TEntry> page, int index)
+ {
+ return ref page[index & (_pageCapacity - 1)];
+ }
+
+ /// <summary>
+ /// Gets the page for the specified <see cref="index"/>.
+ /// </summary>
+ /// <param name="index">Index to use</param>
+ /// <returns>Page for the specified <see cref="index"/></returns>
+ private unsafe Span<TEntry> GetPage(int index)
+ {
+ var pageIndex = (int)((uint)(index & ~(_pageCapacity - 1)) >> _pageLogCapacity);
+
+ if (!_pages.TryGetValue(pageIndex, out IntPtr page))
+ {
+ page = (IntPtr)NativeAllocator.Instance.Allocate((uint)sizeof(TEntry) * (uint)_pageCapacity);
+
+ _pages.Add(pageIndex, page);
+ }
+
+ return new Span<TEntry>((void*)page, _pageCapacity);
+ }
+
+ /// <summary>
+ /// Releases all resources used by the <see cref="EntryTable{TEntry}"/> instance.
+ /// </summary>
+ public void Dispose()
+ {
+ Dispose(true);
+ GC.SuppressFinalize(this);
+ }
+
+ /// <summary>
+ /// Releases all unmanaged and optionally managed resources used by the <see cref="EntryTable{TEntry}"/>
+ /// instance.
+ /// </summary>
+ /// <param name="disposing"><see langword="true"/> to dispose managed resources also; otherwise just unmanaged resouces</param>
+ protected unsafe virtual void Dispose(bool disposing)
+ {
+ if (!_disposed)
+ {
+ _allocated.Dispose();
+
+ foreach (var page in _pages.Values)
+ {
+ NativeAllocator.Instance.Free((void*)page);
+ }
+
+ _disposed = true;
+ }
+ }
+
+ /// <summary>
+ /// Frees resources used by the <see cref="EntryTable{TEntry}"/> instance.
+ /// </summary>
+ ~EntryTable()
+ {
+ Dispose(false);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Common/EnumUtils.cs b/src/ARMeilleure/Common/EnumUtils.cs
new file mode 100644
index 00000000..2a4aa645
--- /dev/null
+++ b/src/ARMeilleure/Common/EnumUtils.cs
@@ -0,0 +1,12 @@
+using System;
+
+namespace ARMeilleure.Common
+{
+ static class EnumUtils
+ {
+ public static int GetCount(Type enumType)
+ {
+ return Enum.GetNames(enumType).Length;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Common/NativeAllocator.cs b/src/ARMeilleure/Common/NativeAllocator.cs
new file mode 100644
index 00000000..71c04a9b
--- /dev/null
+++ b/src/ARMeilleure/Common/NativeAllocator.cs
@@ -0,0 +1,27 @@
+using System;
+using System.Runtime.InteropServices;
+
+namespace ARMeilleure.Common
+{
+ unsafe sealed class NativeAllocator : Allocator
+ {
+ public static NativeAllocator Instance { get; } = new();
+
+ public override void* Allocate(ulong size)
+ {
+ void* result = (void*)Marshal.AllocHGlobal((IntPtr)size);
+
+ if (result == null)
+ {
+ throw new OutOfMemoryException();
+ }
+
+ return result;
+ }
+
+ public override void Free(void* block)
+ {
+ Marshal.FreeHGlobal((IntPtr)block);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/Block.cs b/src/ARMeilleure/Decoders/Block.cs
new file mode 100644
index 00000000..f296d299
--- /dev/null
+++ b/src/ARMeilleure/Decoders/Block.cs
@@ -0,0 +1,101 @@
+using System;
+using System.Collections.Generic;
+
+namespace ARMeilleure.Decoders
+{
+ class Block
+ {
+ public ulong Address { get; set; }
+ public ulong EndAddress { get; set; }
+
+ public Block Next { get; set; }
+ public Block Branch { get; set; }
+
+ public bool Exit { get; set; }
+
+ public List<OpCode> OpCodes { get; }
+
+ public Block()
+ {
+ OpCodes = new List<OpCode>();
+ }
+
+ public Block(ulong address) : this()
+ {
+ Address = address;
+ }
+
+ public void Split(Block rightBlock)
+ {
+ int splitIndex = BinarySearch(OpCodes, rightBlock.Address);
+
+ if (OpCodes[splitIndex].Address < rightBlock.Address)
+ {
+ splitIndex++;
+ }
+
+ int splitCount = OpCodes.Count - splitIndex;
+
+ if (splitCount <= 0)
+ {
+ throw new ArgumentException("Can't split at right block address.");
+ }
+
+ rightBlock.EndAddress = EndAddress;
+
+ rightBlock.Next = Next;
+ rightBlock.Branch = Branch;
+
+ rightBlock.OpCodes.AddRange(OpCodes.GetRange(splitIndex, splitCount));
+
+ EndAddress = rightBlock.Address;
+
+ Next = rightBlock;
+ Branch = null;
+
+ OpCodes.RemoveRange(splitIndex, splitCount);
+ }
+
+ private static int BinarySearch(List<OpCode> opCodes, ulong address)
+ {
+ int left = 0;
+ int middle = 0;
+ int right = opCodes.Count - 1;
+
+ while (left <= right)
+ {
+ int size = right - left;
+
+ middle = left + (size >> 1);
+
+ OpCode opCode = opCodes[middle];
+
+ if (address == (ulong)opCode.Address)
+ {
+ break;
+ }
+
+ if (address < (ulong)opCode.Address)
+ {
+ right = middle - 1;
+ }
+ else
+ {
+ left = middle + 1;
+ }
+ }
+
+ return middle;
+ }
+
+ public OpCode GetLastOp()
+ {
+ if (OpCodes.Count > 0)
+ {
+ return OpCodes[OpCodes.Count - 1];
+ }
+
+ return null;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/Condition.cs b/src/ARMeilleure/Decoders/Condition.cs
new file mode 100644
index 00000000..727f897d
--- /dev/null
+++ b/src/ARMeilleure/Decoders/Condition.cs
@@ -0,0 +1,32 @@
+namespace ARMeilleure.Decoders
+{
+ enum Condition
+ {
+ Eq = 0,
+ Ne = 1,
+ GeUn = 2,
+ LtUn = 3,
+ Mi = 4,
+ Pl = 5,
+ Vs = 6,
+ Vc = 7,
+ GtUn = 8,
+ LeUn = 9,
+ Ge = 10,
+ Lt = 11,
+ Gt = 12,
+ Le = 13,
+ Al = 14,
+ Nv = 15
+ }
+
+ static class ConditionExtensions
+ {
+ public static Condition Invert(this Condition cond)
+ {
+ // Bit 0 of all conditions is basically a negation bit, so
+ // inverting this bit has the effect of inverting the condition.
+ return (Condition)((int)cond ^ 1);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/DataOp.cs b/src/ARMeilleure/Decoders/DataOp.cs
new file mode 100644
index 00000000..464d0089
--- /dev/null
+++ b/src/ARMeilleure/Decoders/DataOp.cs
@@ -0,0 +1,10 @@
+namespace ARMeilleure.Decoders
+{
+ enum DataOp
+ {
+ Adr = 0,
+ Arithmetic = 1,
+ Logical = 2,
+ BitField = 3
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/Decoder.cs b/src/ARMeilleure/Decoders/Decoder.cs
new file mode 100644
index 00000000..426465aa
--- /dev/null
+++ b/src/ARMeilleure/Decoders/Decoder.cs
@@ -0,0 +1,391 @@
+using ARMeilleure.Decoders.Optimizations;
+using ARMeilleure.Instructions;
+using ARMeilleure.Memory;
+using ARMeilleure.State;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+
+namespace ARMeilleure.Decoders
+{
+ static class Decoder
+ {
+ // We define a limit on the number of instructions that a function may have,
+ // this prevents functions being potentially too large, which would
+ // take too long to compile and use too much memory.
+ private const int MaxInstsPerFunction = 2500;
+
+ // For lower code quality translation, we set a lower limit since we're blocking execution.
+ private const int MaxInstsPerFunctionLowCq = 500;
+
+ public static Block[] Decode(IMemoryManager memory, ulong address, ExecutionMode mode, bool highCq, DecoderMode dMode)
+ {
+ List<Block> blocks = new List<Block>();
+
+ Queue<Block> workQueue = new Queue<Block>();
+
+ Dictionary<ulong, Block> visited = new Dictionary<ulong, Block>();
+
+ Debug.Assert(MaxInstsPerFunctionLowCq <= MaxInstsPerFunction);
+
+ int opsCount = 0;
+
+ int instructionLimit = highCq ? MaxInstsPerFunction : MaxInstsPerFunctionLowCq;
+
+ Block GetBlock(ulong blkAddress)
+ {
+ if (!visited.TryGetValue(blkAddress, out Block block))
+ {
+ block = new Block(blkAddress);
+
+ if ((dMode != DecoderMode.MultipleBlocks && visited.Count >= 1) || opsCount > instructionLimit || !memory.IsMapped(blkAddress))
+ {
+ block.Exit = true;
+ block.EndAddress = blkAddress;
+ }
+
+ workQueue.Enqueue(block);
+
+ visited.Add(blkAddress, block);
+ }
+
+ return block;
+ }
+
+ GetBlock(address);
+
+ while (workQueue.TryDequeue(out Block currBlock))
+ {
+ // Check if the current block is inside another block.
+ if (BinarySearch(blocks, currBlock.Address, out int nBlkIndex))
+ {
+ Block nBlock = blocks[nBlkIndex];
+
+ if (nBlock.Address == currBlock.Address)
+ {
+ throw new InvalidOperationException("Found duplicate block address on the list.");
+ }
+
+ currBlock.Exit = false;
+
+ nBlock.Split(currBlock);
+
+ blocks.Insert(nBlkIndex + 1, currBlock);
+
+ continue;
+ }
+
+ if (!currBlock.Exit)
+ {
+ // If we have a block after the current one, set the limit address.
+ ulong limitAddress = ulong.MaxValue;
+
+ if (nBlkIndex != blocks.Count)
+ {
+ Block nBlock = blocks[nBlkIndex];
+
+ int nextIndex = nBlkIndex + 1;
+
+ if (nBlock.Address < currBlock.Address && nextIndex < blocks.Count)
+ {
+ limitAddress = blocks[nextIndex].Address;
+ }
+ else if (nBlock.Address > currBlock.Address)
+ {
+ limitAddress = blocks[nBlkIndex].Address;
+ }
+ }
+
+ if (dMode == DecoderMode.SingleInstruction)
+ {
+ // Only read at most one instruction
+ limitAddress = currBlock.Address + 1;
+ }
+
+ FillBlock(memory, mode, currBlock, limitAddress);
+
+ opsCount += currBlock.OpCodes.Count;
+
+ if (currBlock.OpCodes.Count != 0)
+ {
+ // Set child blocks. "Branch" is the block the branch instruction
+ // points to (when taken), "Next" is the block at the next address,
+ // executed when the branch is not taken. For Unconditional Branches
+ // (except BL/BLR that are sub calls) or end of executable, Next is null.
+ OpCode lastOp = currBlock.GetLastOp();
+
+ bool isCall = IsCall(lastOp);
+
+ if (lastOp is IOpCodeBImm op && !isCall)
+ {
+ currBlock.Branch = GetBlock((ulong)op.Immediate);
+ }
+
+ if (isCall || !(IsUnconditionalBranch(lastOp) || IsTrap(lastOp)))
+ {
+ currBlock.Next = GetBlock(currBlock.EndAddress);
+ }
+ }
+ }
+
+ // Insert the new block on the list (sorted by address).
+ if (blocks.Count != 0)
+ {
+ Block nBlock = blocks[nBlkIndex];
+
+ blocks.Insert(nBlkIndex + (nBlock.Address < currBlock.Address ? 1 : 0), currBlock);
+ }
+ else
+ {
+ blocks.Add(currBlock);
+ }
+ }
+
+ if (blocks.Count == 1 && blocks[0].OpCodes.Count == 0)
+ {
+ Debug.Assert(blocks[0].Exit);
+ Debug.Assert(blocks[0].Address == blocks[0].EndAddress);
+
+ throw new InvalidOperationException($"Decoded a single empty exit block. Entry point = 0x{address:X}.");
+ }
+
+ if (dMode == DecoderMode.MultipleBlocks)
+ {
+ return TailCallRemover.RunPass(address, blocks);
+ }
+ else
+ {
+ return blocks.ToArray();
+ }
+ }
+
+ public static bool BinarySearch(List<Block> blocks, ulong address, out int index)
+ {
+ index = 0;
+
+ int left = 0;
+ int right = blocks.Count - 1;
+
+ while (left <= right)
+ {
+ int size = right - left;
+
+ int middle = left + (size >> 1);
+
+ Block block = blocks[middle];
+
+ index = middle;
+
+ if (address >= block.Address && address < block.EndAddress)
+ {
+ return true;
+ }
+
+ if (address < block.Address)
+ {
+ right = middle - 1;
+ }
+ else
+ {
+ left = middle + 1;
+ }
+ }
+
+ return false;
+ }
+
+ private static void FillBlock(
+ IMemoryManager memory,
+ ExecutionMode mode,
+ Block block,
+ ulong limitAddress)
+ {
+ ulong address = block.Address;
+ int itBlockSize = 0;
+
+ OpCode opCode;
+
+ do
+ {
+ if (address >= limitAddress && itBlockSize == 0)
+ {
+ break;
+ }
+
+ opCode = DecodeOpCode(memory, address, mode);
+
+ block.OpCodes.Add(opCode);
+
+ address += (ulong)opCode.OpCodeSizeInBytes;
+
+ if (opCode is OpCodeT16IfThen it)
+ {
+ itBlockSize = it.IfThenBlockSize;
+ }
+ else if (itBlockSize > 0)
+ {
+ itBlockSize--;
+ }
+ }
+ while (!(IsBranch(opCode) || IsException(opCode)));
+
+ block.EndAddress = address;
+ }
+
+ private static bool IsBranch(OpCode opCode)
+ {
+ return opCode is OpCodeBImm ||
+ opCode is OpCodeBReg || IsAarch32Branch(opCode);
+ }
+
+ private static bool IsUnconditionalBranch(OpCode opCode)
+ {
+ return opCode is OpCodeBImmAl ||
+ opCode is OpCodeBReg || IsAarch32UnconditionalBranch(opCode);
+ }
+
+ private static bool IsAarch32UnconditionalBranch(OpCode opCode)
+ {
+ if (!(opCode is OpCode32 op))
+ {
+ return false;
+ }
+
+ // Compare and branch instructions are always conditional.
+ if (opCode.Instruction.Name == InstName.Cbz ||
+ opCode.Instruction.Name == InstName.Cbnz)
+ {
+ return false;
+ }
+
+ // Note: On ARM32, most instructions have conditional execution,
+ // so there's no "Always" (unconditional) branch like on ARM64.
+ // We need to check if the condition is "Always" instead.
+ return IsAarch32Branch(op) && op.Cond >= Condition.Al;
+ }
+
+ private static bool IsAarch32Branch(OpCode opCode)
+ {
+ // Note: On ARM32, most ALU operations can write to R15 (PC),
+ // so we must consider such operations as a branch in potential aswell.
+ if (opCode is IOpCode32Alu opAlu && opAlu.Rd == RegisterAlias.Aarch32Pc)
+ {
+ if (opCode is OpCodeT32)
+ {
+ return opCode.Instruction.Name != InstName.Tst && opCode.Instruction.Name != InstName.Teq &&
+ opCode.Instruction.Name != InstName.Cmp && opCode.Instruction.Name != InstName.Cmn;
+ }
+ return true;
+ }
+
+ // Same thing for memory operations. We have the cases where PC is a target
+ // register (Rt == 15 or (mask & (1 << 15)) != 0), and cases where there is
+ // a write back to PC (wback == true && Rn == 15), however the later may
+ // be "undefined" depending on the CPU, so compilers should not produce that.
+ if (opCode is IOpCode32Mem || opCode is IOpCode32MemMult)
+ {
+ int rt, rn;
+
+ bool wBack, isLoad;
+
+ if (opCode is IOpCode32Mem opMem)
+ {
+ rt = opMem.Rt;
+ rn = opMem.Rn;
+ wBack = opMem.WBack;
+ isLoad = opMem.IsLoad;
+
+ // For the dual load, we also need to take into account the
+ // case were Rt2 == 15 (PC).
+ if (rt == 14 && opMem.Instruction.Name == InstName.Ldrd)
+ {
+ rt = RegisterAlias.Aarch32Pc;
+ }
+ }
+ else if (opCode is IOpCode32MemMult opMemMult)
+ {
+ const int pcMask = 1 << RegisterAlias.Aarch32Pc;
+
+ rt = (opMemMult.RegisterMask & pcMask) != 0 ? RegisterAlias.Aarch32Pc : 0;
+ rn = opMemMult.Rn;
+ wBack = opMemMult.PostOffset != 0;
+ isLoad = opMemMult.IsLoad;
+ }
+ else
+ {
+ throw new NotImplementedException($"The type \"{opCode.GetType().Name}\" is not implemented on the decoder.");
+ }
+
+ if ((rt == RegisterAlias.Aarch32Pc && isLoad) ||
+ (rn == RegisterAlias.Aarch32Pc && wBack))
+ {
+ return true;
+ }
+ }
+
+ // Explicit branch instructions.
+ return opCode is IOpCode32BImm ||
+ opCode is IOpCode32BReg;
+ }
+
+ private static bool IsCall(OpCode opCode)
+ {
+ return opCode.Instruction.Name == InstName.Bl ||
+ opCode.Instruction.Name == InstName.Blr ||
+ opCode.Instruction.Name == InstName.Blx;
+ }
+
+ private static bool IsException(OpCode opCode)
+ {
+ return IsTrap(opCode) || opCode.Instruction.Name == InstName.Svc;
+ }
+
+ private static bool IsTrap(OpCode opCode)
+ {
+ return opCode.Instruction.Name == InstName.Brk ||
+ opCode.Instruction.Name == InstName.Trap ||
+ opCode.Instruction.Name == InstName.Und;
+ }
+
+ public static OpCode DecodeOpCode(IMemoryManager memory, ulong address, ExecutionMode mode)
+ {
+ int opCode = memory.Read<int>(address);
+
+ InstDescriptor inst;
+
+ OpCodeTable.MakeOp makeOp;
+
+ if (mode == ExecutionMode.Aarch64)
+ {
+ (inst, makeOp) = OpCodeTable.GetInstA64(opCode);
+ }
+ else
+ {
+ if (mode == ExecutionMode.Aarch32Arm)
+ {
+ (inst, makeOp) = OpCodeTable.GetInstA32(opCode);
+ }
+ else /* if (mode == ExecutionMode.Aarch32Thumb) */
+ {
+ (inst, makeOp) = OpCodeTable.GetInstT32(opCode);
+ }
+ }
+
+ if (makeOp != null)
+ {
+ return makeOp(inst, address, opCode);
+ }
+ else
+ {
+ if (mode == ExecutionMode.Aarch32Thumb)
+ {
+ return new OpCodeT16(inst, address, opCode);
+ }
+ else
+ {
+ return new OpCode(inst, address, opCode);
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/DecoderHelper.cs b/src/ARMeilleure/Decoders/DecoderHelper.cs
new file mode 100644
index 00000000..5227e6a1
--- /dev/null
+++ b/src/ARMeilleure/Decoders/DecoderHelper.cs
@@ -0,0 +1,167 @@
+using ARMeilleure.Common;
+
+namespace ARMeilleure.Decoders
+{
+ static class DecoderHelper
+ {
+ static DecoderHelper()
+ {
+ Imm8ToFP32Table = BuildImm8ToFP32Table();
+ Imm8ToFP64Table = BuildImm8ToFP64Table();
+ }
+
+ public static readonly uint[] Imm8ToFP32Table;
+ public static readonly ulong[] Imm8ToFP64Table;
+
+ private static uint[] BuildImm8ToFP32Table()
+ {
+ uint[] tbl = new uint[256];
+
+ for (int idx = 0; idx < tbl.Length; idx++)
+ {
+ tbl[idx] = ExpandImm8ToFP32((uint)idx);
+ }
+
+ return tbl;
+ }
+
+ private static ulong[] BuildImm8ToFP64Table()
+ {
+ ulong[] tbl = new ulong[256];
+
+ for (int idx = 0; idx < tbl.Length; idx++)
+ {
+ tbl[idx] = ExpandImm8ToFP64((ulong)idx);
+ }
+
+ return tbl;
+ }
+
+ // abcdefgh -> aBbbbbbc defgh000 00000000 00000000 (B = ~b)
+ private static uint ExpandImm8ToFP32(uint imm)
+ {
+ uint MoveBit(uint bits, int from, int to)
+ {
+ return ((bits >> from) & 1U) << to;
+ }
+
+ return MoveBit(imm, 7, 31) | MoveBit(~imm, 6, 30) |
+ MoveBit(imm, 6, 29) | MoveBit( imm, 6, 28) |
+ MoveBit(imm, 6, 27) | MoveBit( imm, 6, 26) |
+ MoveBit(imm, 6, 25) | MoveBit( imm, 5, 24) |
+ MoveBit(imm, 4, 23) | MoveBit( imm, 3, 22) |
+ MoveBit(imm, 2, 21) | MoveBit( imm, 1, 20) |
+ MoveBit(imm, 0, 19);
+ }
+
+ // abcdefgh -> aBbbbbbb bbcdefgh 00000000 00000000 00000000 00000000 00000000 00000000 (B = ~b)
+ private static ulong ExpandImm8ToFP64(ulong imm)
+ {
+ ulong MoveBit(ulong bits, int from, int to)
+ {
+ return ((bits >> from) & 1UL) << to;
+ }
+
+ return MoveBit(imm, 7, 63) | MoveBit(~imm, 6, 62) |
+ MoveBit(imm, 6, 61) | MoveBit( imm, 6, 60) |
+ MoveBit(imm, 6, 59) | MoveBit( imm, 6, 58) |
+ MoveBit(imm, 6, 57) | MoveBit( imm, 6, 56) |
+ MoveBit(imm, 6, 55) | MoveBit( imm, 6, 54) |
+ MoveBit(imm, 5, 53) | MoveBit( imm, 4, 52) |
+ MoveBit(imm, 3, 51) | MoveBit( imm, 2, 50) |
+ MoveBit(imm, 1, 49) | MoveBit( imm, 0, 48);
+ }
+
+ public struct BitMask
+ {
+ public long WMask;
+ public long TMask;
+ public int Pos;
+ public int Shift;
+ public bool IsUndefined;
+
+ public static BitMask Invalid => new BitMask { IsUndefined = true };
+ }
+
+ public static BitMask DecodeBitMask(int opCode, bool immediate)
+ {
+ int immS = (opCode >> 10) & 0x3f;
+ int immR = (opCode >> 16) & 0x3f;
+
+ int n = (opCode >> 22) & 1;
+ int sf = (opCode >> 31) & 1;
+
+ int length = BitUtils.HighestBitSet((~immS & 0x3f) | (n << 6));
+
+ if (length < 1 || (sf == 0 && n != 0))
+ {
+ return BitMask.Invalid;
+ }
+
+ int size = 1 << length;
+
+ int levels = size - 1;
+
+ int s = immS & levels;
+ int r = immR & levels;
+
+ if (immediate && s == levels)
+ {
+ return BitMask.Invalid;
+ }
+
+ long wMask = BitUtils.FillWithOnes(s + 1);
+ long tMask = BitUtils.FillWithOnes(((s - r) & levels) + 1);
+
+ if (r > 0)
+ {
+ wMask = BitUtils.RotateRight(wMask, r, size);
+ wMask &= BitUtils.FillWithOnes(size);
+ }
+
+ return new BitMask()
+ {
+ WMask = BitUtils.Replicate(wMask, size),
+ TMask = BitUtils.Replicate(tMask, size),
+
+ Pos = immS,
+ Shift = immR
+ };
+ }
+
+ public static long DecodeImm24_2(int opCode)
+ {
+ return ((long)opCode << 40) >> 38;
+ }
+
+ public static long DecodeImm26_2(int opCode)
+ {
+ return ((long)opCode << 38) >> 36;
+ }
+
+ public static long DecodeImmS19_2(int opCode)
+ {
+ return (((long)opCode << 40) >> 43) & ~3;
+ }
+
+ public static long DecodeImmS14_2(int opCode)
+ {
+ return (((long)opCode << 45) >> 48) & ~3;
+ }
+
+ public static bool VectorArgumentsInvalid(bool q, params int[] args)
+ {
+ if (q)
+ {
+ for (int i = 0; i < args.Length; i++)
+ {
+ if ((args[i] & 1) == 1)
+ {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/DecoderMode.cs b/src/ARMeilleure/Decoders/DecoderMode.cs
new file mode 100644
index 00000000..55362084
--- /dev/null
+++ b/src/ARMeilleure/Decoders/DecoderMode.cs
@@ -0,0 +1,9 @@
+namespace ARMeilleure.Decoders
+{
+ enum DecoderMode
+ {
+ MultipleBlocks,
+ SingleBlock,
+ SingleInstruction,
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/IOpCode.cs b/src/ARMeilleure/Decoders/IOpCode.cs
new file mode 100644
index 00000000..37ba7a4c
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode.cs
@@ -0,0 +1,17 @@
+using ARMeilleure.IntermediateRepresentation;
+
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode
+ {
+ ulong Address { get; }
+
+ InstDescriptor Instruction { get; }
+
+ RegisterSize RegisterSize { get; }
+
+ int GetBitsCount();
+
+ OperandType GetOperandType();
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/IOpCode32.cs b/src/ARMeilleure/Decoders/IOpCode32.cs
new file mode 100644
index 00000000..126c1069
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode32.cs
@@ -0,0 +1,9 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32 : IOpCode
+ {
+ Condition Cond { get; }
+
+ uint GetPc();
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/IOpCode32Adr.cs b/src/ARMeilleure/Decoders/IOpCode32Adr.cs
new file mode 100644
index 00000000..40a4f526
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode32Adr.cs
@@ -0,0 +1,9 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32Adr
+ {
+ int Rd { get; }
+
+ int Immediate { get; }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/IOpCode32Alu.cs b/src/ARMeilleure/Decoders/IOpCode32Alu.cs
new file mode 100644
index 00000000..69fee164
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode32Alu.cs
@@ -0,0 +1,8 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32Alu : IOpCode32, IOpCode32HasSetFlags
+ {
+ int Rd { get; }
+ int Rn { get; }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/IOpCode32AluBf.cs b/src/ARMeilleure/Decoders/IOpCode32AluBf.cs
new file mode 100644
index 00000000..206c2965
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode32AluBf.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32AluBf
+ {
+ int Rd { get; }
+ int Rn { get; }
+
+ int Msb { get; }
+ int Lsb { get; }
+
+ int SourceMask => (int)(0xFFFFFFFF >> (31 - Msb));
+ int DestMask => SourceMask & (int)(0xFFFFFFFF << Lsb);
+ }
+}
diff --git a/src/ARMeilleure/Decoders/IOpCode32AluImm.cs b/src/ARMeilleure/Decoders/IOpCode32AluImm.cs
new file mode 100644
index 00000000..342fb8f6
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode32AluImm.cs
@@ -0,0 +1,9 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32AluImm : IOpCode32Alu
+ {
+ int Immediate { get; }
+
+ bool IsRotated { get; }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/IOpCode32AluImm16.cs b/src/ARMeilleure/Decoders/IOpCode32AluImm16.cs
new file mode 100644
index 00000000..cd128f65
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode32AluImm16.cs
@@ -0,0 +1,7 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32AluImm16 : IOpCode32Alu
+ {
+ int Immediate { get; }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/IOpCode32AluMla.cs b/src/ARMeilleure/Decoders/IOpCode32AluMla.cs
new file mode 100644
index 00000000..79b16425
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode32AluMla.cs
@@ -0,0 +1,11 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32AluMla : IOpCode32AluReg
+ {
+ int Ra { get; }
+
+ bool NHigh { get; }
+ bool MHigh { get; }
+ bool R { get; }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/IOpCode32AluReg.cs b/src/ARMeilleure/Decoders/IOpCode32AluReg.cs
new file mode 100644
index 00000000..1612cc5c
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode32AluReg.cs
@@ -0,0 +1,7 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32AluReg : IOpCode32Alu
+ {
+ int Rm { get; }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/IOpCode32AluRsImm.cs b/src/ARMeilleure/Decoders/IOpCode32AluRsImm.cs
new file mode 100644
index 00000000..e899a659
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode32AluRsImm.cs
@@ -0,0 +1,10 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32AluRsImm : IOpCode32Alu
+ {
+ int Rm { get; }
+ int Immediate { get; }
+
+ ShiftType ShiftType { get; }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/IOpCode32AluRsReg.cs b/src/ARMeilleure/Decoders/IOpCode32AluRsReg.cs
new file mode 100644
index 00000000..879db059
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode32AluRsReg.cs
@@ -0,0 +1,10 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32AluRsReg : IOpCode32Alu
+ {
+ int Rm { get; }
+ int Rs { get; }
+
+ ShiftType ShiftType { get; }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/IOpCode32AluUmull.cs b/src/ARMeilleure/Decoders/IOpCode32AluUmull.cs
new file mode 100644
index 00000000..79d2bb9b
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode32AluUmull.cs
@@ -0,0 +1,13 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32AluUmull : IOpCode32, IOpCode32HasSetFlags
+ {
+ int RdLo { get; }
+ int RdHi { get; }
+ int Rn { get; }
+ int Rm { get; }
+
+ bool NHigh { get; }
+ bool MHigh { get; }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/IOpCode32AluUx.cs b/src/ARMeilleure/Decoders/IOpCode32AluUx.cs
new file mode 100644
index 00000000..d03c7e21
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode32AluUx.cs
@@ -0,0 +1,8 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32AluUx : IOpCode32AluReg
+ {
+ int RotateBits { get; }
+ bool Add { get; }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/IOpCode32BImm.cs b/src/ARMeilleure/Decoders/IOpCode32BImm.cs
new file mode 100644
index 00000000..ec7db2c2
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode32BImm.cs
@@ -0,0 +1,4 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32BImm : IOpCode32, IOpCodeBImm { }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/IOpCode32BReg.cs b/src/ARMeilleure/Decoders/IOpCode32BReg.cs
new file mode 100644
index 00000000..097ab427
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode32BReg.cs
@@ -0,0 +1,7 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32BReg : IOpCode32
+ {
+ int Rm { get; }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/IOpCode32Exception.cs b/src/ARMeilleure/Decoders/IOpCode32Exception.cs
new file mode 100644
index 00000000..8f0fb81a
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode32Exception.cs
@@ -0,0 +1,7 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32Exception
+ {
+ int Id { get; }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/IOpCode32HasSetFlags.cs b/src/ARMeilleure/Decoders/IOpCode32HasSetFlags.cs
new file mode 100644
index 00000000..71ca6d19
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode32HasSetFlags.cs
@@ -0,0 +1,7 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32HasSetFlags
+ {
+ bool? SetFlags { get; }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/IOpCode32Mem.cs b/src/ARMeilleure/Decoders/IOpCode32Mem.cs
new file mode 100644
index 00000000..6664ddff
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode32Mem.cs
@@ -0,0 +1,16 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32Mem : IOpCode32
+ {
+ int Rt { get; }
+ int Rt2 => Rt | 1;
+ int Rn { get; }
+
+ bool WBack { get; }
+ bool IsLoad { get; }
+ bool Index { get; }
+ bool Add { get; }
+
+ int Immediate { get; }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/IOpCode32MemEx.cs b/src/ARMeilleure/Decoders/IOpCode32MemEx.cs
new file mode 100644
index 00000000..aca7200a
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode32MemEx.cs
@@ -0,0 +1,7 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32MemEx : IOpCode32Mem
+ {
+ int Rd { get; }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/IOpCode32MemMult.cs b/src/ARMeilleure/Decoders/IOpCode32MemMult.cs
new file mode 100644
index 00000000..4b891bc1
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode32MemMult.cs
@@ -0,0 +1,15 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32MemMult : IOpCode32
+ {
+ int Rn { get; }
+
+ int RegisterMask { get; }
+
+ int PostOffset { get; }
+
+ bool IsLoad { get; }
+
+ int Offset { get; }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/IOpCode32MemReg.cs b/src/ARMeilleure/Decoders/IOpCode32MemReg.cs
new file mode 100644
index 00000000..7fe1b022
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode32MemReg.cs
@@ -0,0 +1,7 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32MemReg : IOpCode32Mem
+ {
+ int Rm { get; }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/IOpCode32MemRsImm.cs b/src/ARMeilleure/Decoders/IOpCode32MemRsImm.cs
new file mode 100644
index 00000000..65b7ee0b
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode32MemRsImm.cs
@@ -0,0 +1,8 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32MemRsImm : IOpCode32Mem
+ {
+ int Rm { get; }
+ ShiftType ShiftType { get; }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/IOpCode32Simd.cs b/src/ARMeilleure/Decoders/IOpCode32Simd.cs
new file mode 100644
index 00000000..687254d9
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode32Simd.cs
@@ -0,0 +1,4 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32Simd : IOpCode32, IOpCodeSimd { }
+}
diff --git a/src/ARMeilleure/Decoders/IOpCode32SimdImm.cs b/src/ARMeilleure/Decoders/IOpCode32SimdImm.cs
new file mode 100644
index 00000000..a0cb669c
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode32SimdImm.cs
@@ -0,0 +1,9 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32SimdImm : IOpCode32Simd
+ {
+ int Vd { get; }
+ long Immediate { get; }
+ int Elems { get; }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/IOpCodeAlu.cs b/src/ARMeilleure/Decoders/IOpCodeAlu.cs
new file mode 100644
index 00000000..b8c28513
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCodeAlu.cs
@@ -0,0 +1,10 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCodeAlu : IOpCode
+ {
+ int Rd { get; }
+ int Rn { get; }
+
+ DataOp DataOp { get; }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/IOpCodeAluImm.cs b/src/ARMeilleure/Decoders/IOpCodeAluImm.cs
new file mode 100644
index 00000000..02f4c997
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCodeAluImm.cs
@@ -0,0 +1,7 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCodeAluImm : IOpCodeAlu
+ {
+ long Immediate { get; }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/IOpCodeAluRs.cs b/src/ARMeilleure/Decoders/IOpCodeAluRs.cs
new file mode 100644
index 00000000..22540b11
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCodeAluRs.cs
@@ -0,0 +1,10 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCodeAluRs : IOpCodeAlu
+ {
+ int Shift { get; }
+ int Rm { get; }
+
+ ShiftType ShiftType { get; }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/IOpCodeAluRx.cs b/src/ARMeilleure/Decoders/IOpCodeAluRx.cs
new file mode 100644
index 00000000..9d16be78
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCodeAluRx.cs
@@ -0,0 +1,10 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCodeAluRx : IOpCodeAlu
+ {
+ int Shift { get; }
+ int Rm { get; }
+
+ IntType IntType { get; }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/IOpCodeBImm.cs b/src/ARMeilleure/Decoders/IOpCodeBImm.cs
new file mode 100644
index 00000000..958bff28
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCodeBImm.cs
@@ -0,0 +1,7 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCodeBImm : IOpCode
+ {
+ long Immediate { get; }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/IOpCodeCond.cs b/src/ARMeilleure/Decoders/IOpCodeCond.cs
new file mode 100644
index 00000000..9808f7c0
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCodeCond.cs
@@ -0,0 +1,7 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCodeCond : IOpCode
+ {
+ Condition Cond { get; }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/IOpCodeLit.cs b/src/ARMeilleure/Decoders/IOpCodeLit.cs
new file mode 100644
index 00000000..74084a45
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCodeLit.cs
@@ -0,0 +1,11 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCodeLit : IOpCode
+ {
+ int Rt { get; }
+ long Immediate { get; }
+ int Size { get; }
+ bool Signed { get; }
+ bool Prefetch { get; }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/IOpCodeSimd.cs b/src/ARMeilleure/Decoders/IOpCodeSimd.cs
new file mode 100644
index 00000000..056ef045
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCodeSimd.cs
@@ -0,0 +1,7 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCodeSimd : IOpCode
+ {
+ int Size { get; }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/InstDescriptor.cs b/src/ARMeilleure/Decoders/InstDescriptor.cs
new file mode 100644
index 00000000..577ff394
--- /dev/null
+++ b/src/ARMeilleure/Decoders/InstDescriptor.cs
@@ -0,0 +1,18 @@
+using ARMeilleure.Instructions;
+
+namespace ARMeilleure.Decoders
+{
+ readonly struct InstDescriptor
+ {
+ public static InstDescriptor Undefined => new InstDescriptor(InstName.Und, InstEmit.Und);
+
+ public InstName Name { get; }
+ public InstEmitter Emitter { get; }
+
+ public InstDescriptor(InstName name, InstEmitter emitter)
+ {
+ Name = name;
+ Emitter = emitter;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/InstEmitter.cs b/src/ARMeilleure/Decoders/InstEmitter.cs
new file mode 100644
index 00000000..a8b52656
--- /dev/null
+++ b/src/ARMeilleure/Decoders/InstEmitter.cs
@@ -0,0 +1,6 @@
+using ARMeilleure.Translation;
+
+namespace ARMeilleure.Decoders
+{
+ delegate void InstEmitter(ArmEmitterContext context);
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/IntType.cs b/src/ARMeilleure/Decoders/IntType.cs
new file mode 100644
index 00000000..244e9680
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IntType.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.Decoders
+{
+ enum IntType
+ {
+ UInt8 = 0,
+ UInt16 = 1,
+ UInt32 = 2,
+ UInt64 = 3,
+ Int8 = 4,
+ Int16 = 5,
+ Int32 = 6,
+ Int64 = 7
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCode.cs b/src/ARMeilleure/Decoders/OpCode.cs
new file mode 100644
index 00000000..f9aed792
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode.cs
@@ -0,0 +1,49 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCode : IOpCode
+ {
+ public ulong Address { get; }
+ public int RawOpCode { get; }
+
+ public int OpCodeSizeInBytes { get; protected set; } = 4;
+
+ public InstDescriptor Instruction { get; protected set; }
+
+ public RegisterSize RegisterSize { get; protected set; }
+
+ public static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode(inst, address, opCode);
+
+ public OpCode(InstDescriptor inst, ulong address, int opCode)
+ {
+ Instruction = inst;
+ Address = address;
+ RawOpCode = opCode;
+
+ RegisterSize = RegisterSize.Int64;
+ }
+
+ public int GetPairsCount() => GetBitsCount() / 16;
+ public int GetBytesCount() => GetBitsCount() / 8;
+
+ public int GetBitsCount()
+ {
+ switch (RegisterSize)
+ {
+ case RegisterSize.Int32: return 32;
+ case RegisterSize.Int64: return 64;
+ case RegisterSize.Simd64: return 64;
+ case RegisterSize.Simd128: return 128;
+ }
+
+ throw new InvalidOperationException();
+ }
+
+ public OperandType GetOperandType()
+ {
+ return RegisterSize == RegisterSize.Int32 ? OperandType.I32 : OperandType.I64;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCode32.cs b/src/ARMeilleure/Decoders/OpCode32.cs
new file mode 100644
index 00000000..c2f14145
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32.cs
@@ -0,0 +1,34 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32 : OpCode
+ {
+ public Condition Cond { get; protected set; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32(inst, address, opCode);
+
+ public OpCode32(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ RegisterSize = RegisterSize.Int32;
+
+ Cond = (Condition)((uint)opCode >> 28);
+ }
+
+ public bool IsThumb { get; protected init; } = false;
+
+ public uint GetPc()
+ {
+ // Due to backwards compatibility and legacy behavior of ARMv4 CPUs pipeline,
+ // the PC actually points 2 instructions ahead.
+ if (IsThumb)
+ {
+ // PC is ahead by 4 in thumb mode whether or not the current instruction
+ // is 16 or 32 bit.
+ return (uint)Address + 4u;
+ }
+ else
+ {
+ return (uint)Address + 8u;
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCode32Alu.cs b/src/ARMeilleure/Decoders/OpCode32Alu.cs
new file mode 100644
index 00000000..1625aee0
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32Alu.cs
@@ -0,0 +1,20 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32Alu : OpCode32, IOpCode32Alu
+ {
+ public int Rd { get; }
+ public int Rn { get; }
+
+ public bool? SetFlags { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32Alu(inst, address, opCode);
+
+ public OpCode32Alu(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = (opCode >> 12) & 0xf;
+ Rn = (opCode >> 16) & 0xf;
+
+ SetFlags = ((opCode >> 20) & 1) != 0;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCode32AluBf.cs b/src/ARMeilleure/Decoders/OpCode32AluBf.cs
new file mode 100644
index 00000000..0cee34e6
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32AluBf.cs
@@ -0,0 +1,22 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32AluBf : OpCode32, IOpCode32AluBf
+ {
+ public int Rd { get; }
+ public int Rn { get; }
+
+ public int Msb { get; }
+ public int Lsb { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32AluBf(inst, address, opCode);
+
+ public OpCode32AluBf(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = (opCode >> 12) & 0xf;
+ Rn = (opCode >> 0) & 0xf;
+
+ Msb = (opCode >> 16) & 0x1f;
+ Lsb = (opCode >> 7) & 0x1f;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32AluImm.cs b/src/ARMeilleure/Decoders/OpCode32AluImm.cs
new file mode 100644
index 00000000..b5435aaf
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32AluImm.cs
@@ -0,0 +1,23 @@
+using ARMeilleure.Common;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCode32AluImm : OpCode32Alu, IOpCode32AluImm
+ {
+ public int Immediate { get; }
+
+ public bool IsRotated { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32AluImm(inst, address, opCode);
+
+ public OpCode32AluImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ int value = (opCode >> 0) & 0xff;
+ int shift = (opCode >> 8) & 0xf;
+
+ Immediate = BitUtils.RotateRight(value, shift * 2, 32);
+
+ IsRotated = shift != 0;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCode32AluImm16.cs b/src/ARMeilleure/Decoders/OpCode32AluImm16.cs
new file mode 100644
index 00000000..e24edeb4
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32AluImm16.cs
@@ -0,0 +1,17 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32AluImm16 : OpCode32Alu, IOpCode32AluImm16
+ {
+ public int Immediate { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32AluImm16(inst, address, opCode);
+
+ public OpCode32AluImm16(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ int imm12 = opCode & 0xfff;
+ int imm4 = (opCode >> 16) & 0xf;
+
+ Immediate = (imm4 << 12) | imm12;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32AluMla.cs b/src/ARMeilleure/Decoders/OpCode32AluMla.cs
new file mode 100644
index 00000000..2cd2b9dc
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32AluMla.cs
@@ -0,0 +1,30 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32AluMla : OpCode32, IOpCode32AluMla
+ {
+ public int Rn { get; }
+ public int Rm { get; }
+ public int Ra { get; }
+ public int Rd { get; }
+
+ public bool NHigh { get; }
+ public bool MHigh { get; }
+ public bool R { get; }
+ public bool? SetFlags { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32AluMla(inst, address, opCode);
+
+ public OpCode32AluMla(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rn = (opCode >> 0) & 0xf;
+ Rm = (opCode >> 8) & 0xf;
+ Ra = (opCode >> 12) & 0xf;
+ Rd = (opCode >> 16) & 0xf;
+ R = (opCode & (1 << 5)) != 0;
+
+ NHigh = ((opCode >> 5) & 0x1) == 1;
+ MHigh = ((opCode >> 6) & 0x1) == 1;
+ SetFlags = ((opCode >> 20) & 1) != 0;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32AluReg.cs b/src/ARMeilleure/Decoders/OpCode32AluReg.cs
new file mode 100644
index 00000000..493a977f
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32AluReg.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32AluReg : OpCode32Alu, IOpCode32AluReg
+ {
+ public int Rm { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32AluReg(inst, address, opCode);
+
+ public OpCode32AluReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rm = (opCode >> 0) & 0xf;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32AluRsImm.cs b/src/ARMeilleure/Decoders/OpCode32AluRsImm.cs
new file mode 100644
index 00000000..c2dee6c9
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32AluRsImm.cs
@@ -0,0 +1,20 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32AluRsImm : OpCode32Alu, IOpCode32AluRsImm
+ {
+ public int Rm { get; }
+ public int Immediate { get; }
+
+ public ShiftType ShiftType { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32AluRsImm(inst, address, opCode);
+
+ public OpCode32AluRsImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rm = (opCode >> 0) & 0xf;
+ Immediate = (opCode >> 7) & 0x1f;
+
+ ShiftType = (ShiftType)((opCode >> 5) & 3);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCode32AluRsReg.cs b/src/ARMeilleure/Decoders/OpCode32AluRsReg.cs
new file mode 100644
index 00000000..04740d08
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32AluRsReg.cs
@@ -0,0 +1,20 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32AluRsReg : OpCode32Alu, IOpCode32AluRsReg
+ {
+ public int Rm { get; }
+ public int Rs { get; }
+
+ public ShiftType ShiftType { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32AluRsReg(inst, address, opCode);
+
+ public OpCode32AluRsReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rm = (opCode >> 0) & 0xf;
+ Rs = (opCode >> 8) & 0xf;
+
+ ShiftType = (ShiftType)((opCode >> 5) & 3);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32AluUmull.cs b/src/ARMeilleure/Decoders/OpCode32AluUmull.cs
new file mode 100644
index 00000000..bf80df3f
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32AluUmull.cs
@@ -0,0 +1,30 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32AluUmull : OpCode32, IOpCode32AluUmull
+ {
+ public int RdLo { get; }
+ public int RdHi { get; }
+ public int Rn { get; }
+ public int Rm { get; }
+
+ public bool NHigh { get; }
+ public bool MHigh { get; }
+
+ public bool? SetFlags { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32AluUmull(inst, address, opCode);
+
+ public OpCode32AluUmull(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ RdLo = (opCode >> 12) & 0xf;
+ RdHi = (opCode >> 16) & 0xf;
+ Rm = (opCode >> 8) & 0xf;
+ Rn = (opCode >> 0) & 0xf;
+
+ NHigh = ((opCode >> 5) & 0x1) == 1;
+ MHigh = ((opCode >> 6) & 0x1) == 1;
+
+ SetFlags = ((opCode >> 20) & 0x1) != 0;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32AluUx.cs b/src/ARMeilleure/Decoders/OpCode32AluUx.cs
new file mode 100644
index 00000000..57068675
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32AluUx.cs
@@ -0,0 +1,18 @@
+using ARMeilleure.State;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCode32AluUx : OpCode32AluReg, IOpCode32AluUx
+ {
+ public int Rotate { get; }
+ public int RotateBits => Rotate * 8;
+ public bool Add => Rn != RegisterAlias.Aarch32Pc;
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32AluUx(inst, address, opCode);
+
+ public OpCode32AluUx(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rotate = (opCode >> 10) & 0x3;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32BImm.cs b/src/ARMeilleure/Decoders/OpCode32BImm.cs
new file mode 100644
index 00000000..f2959b33
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32BImm.cs
@@ -0,0 +1,29 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32BImm : OpCode32, IOpCode32BImm
+ {
+ public long Immediate { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32BImm(inst, address, opCode);
+
+ public OpCode32BImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ uint pc = GetPc();
+
+ // When the condition is never, the instruction is BLX to Thumb mode.
+ if (Cond != Condition.Nv)
+ {
+ pc &= ~3u;
+ }
+
+ Immediate = pc + DecoderHelper.DecodeImm24_2(opCode);
+
+ if (Cond == Condition.Nv)
+ {
+ long H = (opCode >> 23) & 2;
+
+ Immediate |= H;
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCode32BReg.cs b/src/ARMeilleure/Decoders/OpCode32BReg.cs
new file mode 100644
index 00000000..d4f5f760
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32BReg.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32BReg : OpCode32, IOpCode32BReg
+ {
+ public int Rm { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32BReg(inst, address, opCode);
+
+ public OpCode32BReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rm = opCode & 0xf;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCode32Exception.cs b/src/ARMeilleure/Decoders/OpCode32Exception.cs
new file mode 100644
index 00000000..b4edcc10
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32Exception.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32Exception : OpCode32, IOpCode32Exception
+ {
+ public int Id { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32Exception(inst, address, opCode);
+
+ public OpCode32Exception(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Id = opCode & 0xFFFFFF;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32Mem.cs b/src/ARMeilleure/Decoders/OpCode32Mem.cs
new file mode 100644
index 00000000..ceb1e49f
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32Mem.cs
@@ -0,0 +1,39 @@
+using ARMeilleure.Instructions;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCode32Mem : OpCode32, IOpCode32Mem
+ {
+ public int Rt { get; protected set; }
+ public int Rn { get; }
+
+ public int Immediate { get; protected set; }
+
+ public bool Index { get; }
+ public bool Add { get; }
+ public bool WBack { get; }
+ public bool Unprivileged { get; }
+
+ public bool IsLoad { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32Mem(inst, address, opCode);
+
+ public OpCode32Mem(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rt = (opCode >> 12) & 0xf;
+ Rn = (opCode >> 16) & 0xf;
+
+ bool isLoad = (opCode & (1 << 20)) != 0;
+ bool w = (opCode & (1 << 21)) != 0;
+ bool u = (opCode & (1 << 23)) != 0;
+ bool p = (opCode & (1 << 24)) != 0;
+
+ Index = p;
+ Add = u;
+ WBack = !p || w;
+ Unprivileged = !p && w;
+
+ IsLoad = isLoad || inst.Name == InstName.Ldrd;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCode32MemImm.cs b/src/ARMeilleure/Decoders/OpCode32MemImm.cs
new file mode 100644
index 00000000..3af4b6f7
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32MemImm.cs
@@ -0,0 +1,12 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32MemImm : OpCode32Mem
+ {
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32MemImm(inst, address, opCode);
+
+ public OpCode32MemImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Immediate = opCode & 0xfff;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCode32MemImm8.cs b/src/ARMeilleure/Decoders/OpCode32MemImm8.cs
new file mode 100644
index 00000000..1b8a57de
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32MemImm8.cs
@@ -0,0 +1,15 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32MemImm8 : OpCode32Mem
+ {
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32MemImm8(inst, address, opCode);
+
+ public OpCode32MemImm8(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ int imm4L = (opCode >> 0) & 0xf;
+ int imm4H = (opCode >> 8) & 0xf;
+
+ Immediate = imm4L | (imm4H << 4);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCode32MemLdEx.cs b/src/ARMeilleure/Decoders/OpCode32MemLdEx.cs
new file mode 100644
index 00000000..520113f4
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32MemLdEx.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32MemLdEx : OpCode32Mem, IOpCode32MemEx
+ {
+ public int Rd { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32MemLdEx(inst, address, opCode);
+
+ public OpCode32MemLdEx(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = opCode & 0xf;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32MemMult.cs b/src/ARMeilleure/Decoders/OpCode32MemMult.cs
new file mode 100644
index 00000000..522b96bb
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32MemMult.cs
@@ -0,0 +1,52 @@
+using System.Numerics;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCode32MemMult : OpCode32, IOpCode32MemMult
+ {
+ public int Rn { get; }
+
+ public int RegisterMask { get; }
+ public int Offset { get; }
+ public int PostOffset { get; }
+
+ public bool IsLoad { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32MemMult(inst, address, opCode);
+
+ public OpCode32MemMult(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rn = (opCode >> 16) & 0xf;
+
+ bool isLoad = (opCode & (1 << 20)) != 0;
+ bool w = (opCode & (1 << 21)) != 0;
+ bool u = (opCode & (1 << 23)) != 0;
+ bool p = (opCode & (1 << 24)) != 0;
+
+ RegisterMask = opCode & 0xffff;
+
+ int regsSize = BitOperations.PopCount((uint)RegisterMask) * 4;
+
+ if (!u)
+ {
+ Offset -= regsSize;
+ }
+
+ if (u == p)
+ {
+ Offset += 4;
+ }
+
+ if (w)
+ {
+ PostOffset = u ? regsSize : -regsSize;
+ }
+ else
+ {
+ PostOffset = 0;
+ }
+
+ IsLoad = isLoad;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCode32MemReg.cs b/src/ARMeilleure/Decoders/OpCode32MemReg.cs
new file mode 100644
index 00000000..786f37fa
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32MemReg.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32MemReg : OpCode32Mem, IOpCode32MemReg
+ {
+ public int Rm { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32MemReg(inst, address, opCode);
+
+ public OpCode32MemReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rm = (opCode >> 0) & 0xf;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32MemRsImm.cs b/src/ARMeilleure/Decoders/OpCode32MemRsImm.cs
new file mode 100644
index 00000000..e1284cf7
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32MemRsImm.cs
@@ -0,0 +1,18 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32MemRsImm : OpCode32Mem, IOpCode32MemRsImm
+ {
+ public int Rm { get; }
+ public ShiftType ShiftType { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32MemRsImm(inst, address, opCode);
+
+ public OpCode32MemRsImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rm = (opCode >> 0) & 0xf;
+ Immediate = (opCode >> 7) & 0x1f;
+
+ ShiftType = (ShiftType)((opCode >> 5) & 3);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32MemStEx.cs b/src/ARMeilleure/Decoders/OpCode32MemStEx.cs
new file mode 100644
index 00000000..dcf93b22
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32MemStEx.cs
@@ -0,0 +1,15 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32MemStEx : OpCode32Mem, IOpCode32MemEx
+ {
+ public int Rd { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32MemStEx(inst, address, opCode);
+
+ public OpCode32MemStEx(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = (opCode >> 12) & 0xf;
+ Rt = (opCode >> 0) & 0xf;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32Mrs.cs b/src/ARMeilleure/Decoders/OpCode32Mrs.cs
new file mode 100644
index 00000000..c34a8b99
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32Mrs.cs
@@ -0,0 +1,16 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32Mrs : OpCode32
+ {
+ public bool R { get; }
+ public int Rd { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32Mrs(inst, address, opCode);
+
+ public OpCode32Mrs(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ R = ((opCode >> 22) & 1) != 0;
+ Rd = (opCode >> 12) & 0xf;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32MsrReg.cs b/src/ARMeilleure/Decoders/OpCode32MsrReg.cs
new file mode 100644
index 00000000..d897ffd8
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32MsrReg.cs
@@ -0,0 +1,29 @@
+using ARMeilleure.State;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCode32MsrReg : OpCode32
+ {
+ public bool R { get; }
+ public int Mask { get; }
+ public int Rd { get; }
+ public bool Banked { get; }
+ public int Rn { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32MsrReg(inst, address, opCode);
+
+ public OpCode32MsrReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ R = ((opCode >> 22) & 1) != 0;
+ Mask = (opCode >> 16) & 0xf;
+ Rd = (opCode >> 12) & 0xf;
+ Banked = ((opCode >> 9) & 1) != 0;
+ Rn = (opCode >> 0) & 0xf;
+
+ if (Rn == RegisterAlias.Aarch32Pc || Mask == 0)
+ {
+ Instruction = InstDescriptor.Undefined;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32Sat.cs b/src/ARMeilleure/Decoders/OpCode32Sat.cs
new file mode 100644
index 00000000..621def27
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32Sat.cs
@@ -0,0 +1,24 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32Sat : OpCode32
+ {
+ public int Rn { get; }
+ public int Imm5 { get; }
+ public int Rd { get; }
+ public int SatImm { get; }
+
+ public ShiftType ShiftType { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32Sat(inst, address, opCode);
+
+ public OpCode32Sat(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rn = (opCode >> 0) & 0xf;
+ Imm5 = (opCode >> 7) & 0x1f;
+ Rd = (opCode >> 12) & 0xf;
+ SatImm = (opCode >> 16) & 0x1f;
+
+ ShiftType = (ShiftType)((opCode >> 5) & 2);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCode32Sat16.cs b/src/ARMeilleure/Decoders/OpCode32Sat16.cs
new file mode 100644
index 00000000..51061b07
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32Sat16.cs
@@ -0,0 +1,18 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32Sat16 : OpCode32
+ {
+ public int Rn { get; }
+ public int Rd { get; }
+ public int SatImm { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32Sat16(inst, address, opCode);
+
+ public OpCode32Sat16(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rn = (opCode >> 0) & 0xf;
+ Rd = (opCode >> 12) & 0xf;
+ SatImm = (opCode >> 16) & 0xf;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCode32Simd.cs b/src/ARMeilleure/Decoders/OpCode32Simd.cs
new file mode 100644
index 00000000..636aa0a8
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32Simd.cs
@@ -0,0 +1,33 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32Simd : OpCode32SimdBase
+ {
+ public int Opc { get; protected set; }
+ public bool Q { get; protected set; }
+ public bool F { get; protected set; }
+ public bool U { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32Simd(inst, address, opCode, false);
+ public static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32Simd(inst, address, opCode, true);
+
+ public OpCode32Simd(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb)
+ {
+ Size = (opCode >> 20) & 0x3;
+ Q = ((opCode >> 6) & 0x1) != 0;
+ F = ((opCode >> 10) & 0x1) != 0;
+ U = ((opCode >> (isThumb ? 28 : 24)) & 0x1) != 0;
+ Opc = (opCode >> 7) & 0x3;
+
+ RegisterSize = Q ? RegisterSize.Simd128 : RegisterSize.Simd64;
+
+ Vd = ((opCode >> 18) & 0x10) | ((opCode >> 12) & 0xf);
+ Vm = ((opCode >> 1) & 0x10) | ((opCode >> 0) & 0xf);
+
+ // Subclasses have their own handling of Vx to account for before checking.
+ if (GetType() == typeof(OpCode32Simd) && DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vm))
+ {
+ Instruction = InstDescriptor.Undefined;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdBase.cs b/src/ARMeilleure/Decoders/OpCode32SimdBase.cs
new file mode 100644
index 00000000..4382fc2a
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdBase.cs
@@ -0,0 +1,55 @@
+using System;
+
+namespace ARMeilleure.Decoders
+{
+ abstract class OpCode32SimdBase : OpCode32, IOpCode32Simd
+ {
+ public int Vd { get; protected set; }
+ public int Vm { get; protected set; }
+ public int Size { get; protected set; }
+
+ // Helpers to index doublewords within quad words. Essentially, looping over the vector starts at quadword Q and index Fx or Ix within it,
+ // depending on instruction type.
+ //
+ // Qx: The quadword register that the target vector is contained in.
+ // Ix: The starting index of the target vector within the quadword, with size treated as integer.
+ // Fx: The starting index of the target vector within the quadword, with size treated as floating point. (16 or 32)
+ public int Qd => GetQuadwordIndex(Vd);
+ public int Id => GetQuadwordSubindex(Vd) << (3 - Size);
+ public int Fd => GetQuadwordSubindex(Vd) << (1 - (Size & 1)); // When the top bit is truncated, 1 is fp16 which is an optional extension in ARMv8.2. We always assume 64.
+
+ public int Qm => GetQuadwordIndex(Vm);
+ public int Im => GetQuadwordSubindex(Vm) << (3 - Size);
+ public int Fm => GetQuadwordSubindex(Vm) << (1 - (Size & 1));
+
+ protected int GetQuadwordIndex(int index)
+ {
+ switch (RegisterSize)
+ {
+ case RegisterSize.Simd128:
+ case RegisterSize.Simd64:
+ return index >> 1;
+ }
+
+ throw new InvalidOperationException();
+ }
+
+ protected int GetQuadwordSubindex(int index)
+ {
+ switch (RegisterSize)
+ {
+ case RegisterSize.Simd128:
+ return 0;
+ case RegisterSize.Simd64:
+ return index & 1;
+ }
+
+ throw new InvalidOperationException();
+ }
+
+ protected OpCode32SimdBase(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode)
+ {
+ IsThumb = isThumb;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdBinary.cs b/src/ARMeilleure/Decoders/OpCode32SimdBinary.cs
new file mode 100644
index 00000000..ba190de9
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdBinary.cs
@@ -0,0 +1,21 @@
+namespace ARMeilleure.Decoders
+{
+ /// <summary>
+ /// A special alias that always runs in 64 bit int, to speed up binary ops a little.
+ /// </summary>
+ class OpCode32SimdBinary : OpCode32SimdReg
+ {
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdBinary(inst, address, opCode, false);
+ public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdBinary(inst, address, opCode, true);
+
+ public OpCode32SimdBinary(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb)
+ {
+ Size = 3;
+
+ if (DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vm, Vn))
+ {
+ Instruction = InstDescriptor.Undefined;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdCmpZ.cs b/src/ARMeilleure/Decoders/OpCode32SimdCmpZ.cs
new file mode 100644
index 00000000..445e6781
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdCmpZ.cs
@@ -0,0 +1,18 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdCmpZ : OpCode32Simd
+ {
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdCmpZ(inst, address, opCode, false);
+ public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdCmpZ(inst, address, opCode, true);
+
+ public OpCode32SimdCmpZ(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb)
+ {
+ Size = (opCode >> 18) & 0x3;
+
+ if (DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vm))
+ {
+ Instruction = InstDescriptor.Undefined;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdCvtFI.cs b/src/ARMeilleure/Decoders/OpCode32SimdCvtFI.cs
new file mode 100644
index 00000000..41cf4d88
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdCvtFI.cs
@@ -0,0 +1,24 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdCvtFI : OpCode32SimdS
+ {
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdCvtFI(inst, address, opCode, false);
+ public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdCvtFI(inst, address, opCode, true);
+
+ public OpCode32SimdCvtFI(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb)
+ {
+ Opc = (opCode >> 7) & 0x1;
+
+ bool toInteger = (Opc2 & 0b100) != 0;
+
+ if (toInteger)
+ {
+ Vd = ((opCode >> 22) & 0x1) | ((opCode >> 11) & 0x1e);
+ }
+ else
+ {
+ Vm = ((opCode >> 5) & 0x1) | ((opCode << 1) & 0x1e);
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdCvtTB.cs b/src/ARMeilleure/Decoders/OpCode32SimdCvtTB.cs
new file mode 100644
index 00000000..a95b32ab
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdCvtTB.cs
@@ -0,0 +1,44 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdCvtTB : OpCode32, IOpCode32Simd
+ {
+ public int Vd { get; }
+ public int Vm { get; }
+ public bool Op { get; } // Convert to Half / Convert from Half
+ public bool T { get; } // Top / Bottom
+ public int Size { get; } // Double / Single
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdCvtTB(inst, address, opCode, false);
+ public static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdCvtTB(inst, address, opCode, true);
+
+ public OpCode32SimdCvtTB(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode)
+ {
+ IsThumb = isThumb;
+
+ Op = ((opCode >> 16) & 0x1) != 0;
+ T = ((opCode >> 7) & 0x1) != 0;
+ Size = ((opCode >> 8) & 0x1);
+
+ RegisterSize = Size == 1 ? RegisterSize.Int64 : RegisterSize.Int32;
+
+ if (Size == 1)
+ {
+ if (Op)
+ {
+ Vm = ((opCode >> 1) & 0x10) | ((opCode >> 0) & 0xf);
+ Vd = ((opCode >> 22) & 0x1) | ((opCode >> 11) & 0x1e);
+ }
+ else
+ {
+ Vm = ((opCode >> 5) & 0x1) | ((opCode << 1) & 0x1e);
+ Vd = ((opCode >> 18) & 0x10) | ((opCode >> 12) & 0xf);
+ }
+ }
+ else
+ {
+ Vm = ((opCode >> 5) & 0x1) | ((opCode << 1) & 0x1e);
+ Vd = ((opCode >> 22) & 0x1) | ((opCode >> 11) & 0x1e);
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdDupElem.cs b/src/ARMeilleure/Decoders/OpCode32SimdDupElem.cs
new file mode 100644
index 00000000..c455b5b4
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdDupElem.cs
@@ -0,0 +1,43 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdDupElem : OpCode32Simd
+ {
+ public int Index { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdDupElem(inst, address, opCode, false);
+ public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdDupElem(inst, address, opCode, true);
+
+ public OpCode32SimdDupElem(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb)
+ {
+ var opc = (opCode >> 16) & 0xf;
+
+ if ((opc & 0b1) == 1)
+ {
+ Size = 0;
+ Index = (opc >> 1) & 0x7;
+ }
+ else if ((opc & 0b11) == 0b10)
+ {
+ Size = 1;
+ Index = (opc >> 2) & 0x3;
+ }
+ else if ((opc & 0b111) == 0b100)
+ {
+ Size = 2;
+ Index = (opc >> 3) & 0x1;
+ }
+ else
+ {
+ Instruction = InstDescriptor.Undefined;
+ }
+
+ Vd = ((opCode >> 18) & 0x10) | ((opCode >> 12) & 0xf);
+ Vm = ((opCode >> 1) & 0x10) | ((opCode >> 0) & 0xf);
+
+ if (DecoderHelper.VectorArgumentsInvalid(Q, Vd))
+ {
+ Instruction = InstDescriptor.Undefined;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdDupGP.cs b/src/ARMeilleure/Decoders/OpCode32SimdDupGP.cs
new file mode 100644
index 00000000..31546ea3
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdDupGP.cs
@@ -0,0 +1,36 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdDupGP : OpCode32, IOpCode32Simd
+ {
+ public int Size { get; }
+ public int Vd { get; }
+ public int Rt { get; }
+ public bool Q { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdDupGP(inst, address, opCode, false);
+ public static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdDupGP(inst, address, opCode, true);
+
+ public OpCode32SimdDupGP(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode)
+ {
+ IsThumb = isThumb;
+
+ Size = 2 - (((opCode >> 21) & 0x2) | ((opCode >> 5) & 0x1)); // B:E - 0 for 32, 16 then 8.
+ if (Size == -1)
+ {
+ Instruction = InstDescriptor.Undefined;
+ return;
+ }
+ Q = ((opCode >> 21) & 0x1) != 0;
+
+ RegisterSize = Q ? RegisterSize.Simd128 : RegisterSize.Simd64;
+
+ Vd = ((opCode >> 3) & 0x10) | ((opCode >> 16) & 0xf);
+ Rt = ((opCode >> 12) & 0xf);
+
+ if (DecoderHelper.VectorArgumentsInvalid(Q, Vd))
+ {
+ Instruction = InstDescriptor.Undefined;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdExt.cs b/src/ARMeilleure/Decoders/OpCode32SimdExt.cs
new file mode 100644
index 00000000..6dbb5b66
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdExt.cs
@@ -0,0 +1,20 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdExt : OpCode32SimdReg
+ {
+ public int Immediate { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdExt(inst, address, opCode, false);
+ public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdExt(inst, address, opCode, true);
+
+ public OpCode32SimdExt(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb)
+ {
+ Immediate = (opCode >> 8) & 0xf;
+ Size = 0;
+ if (DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vm, Vn) || (!Q && Immediate > 7))
+ {
+ Instruction = InstDescriptor.Undefined;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdImm.cs b/src/ARMeilleure/Decoders/OpCode32SimdImm.cs
new file mode 100644
index 00000000..bf0ca527
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdImm.cs
@@ -0,0 +1,38 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdImm : OpCode32SimdBase, IOpCode32SimdImm
+ {
+ public bool Q { get; }
+ public long Immediate { get; }
+ public int Elems => GetBytesCount() >> Size;
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdImm(inst, address, opCode, false);
+ public static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdImm(inst, address, opCode, true);
+
+ public OpCode32SimdImm(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb)
+ {
+ Vd = (opCode >> 12) & 0xf;
+ Vd |= (opCode >> 18) & 0x10;
+
+ Q = ((opCode >> 6) & 0x1) > 0;
+
+ int cMode = (opCode >> 8) & 0xf;
+ int op = (opCode >> 5) & 0x1;
+
+ long imm;
+
+ imm = ((uint)opCode >> 0) & 0xf;
+ imm |= ((uint)opCode >> 12) & 0x70;
+ imm |= ((uint)opCode >> (isThumb ? 21 : 17)) & 0x80;
+
+ (Immediate, Size) = OpCodeSimdHelper.GetSimdImmediateAndSize(cMode, op, imm);
+
+ RegisterSize = Q ? RegisterSize.Simd128 : RegisterSize.Simd64;
+
+ if (DecoderHelper.VectorArgumentsInvalid(Q, Vd))
+ {
+ Instruction = InstDescriptor.Undefined;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdImm44.cs b/src/ARMeilleure/Decoders/OpCode32SimdImm44.cs
new file mode 100644
index 00000000..fa00a935
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdImm44.cs
@@ -0,0 +1,41 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdImm44 : OpCode32, IOpCode32SimdImm
+ {
+ public int Vd { get; }
+ public long Immediate { get; }
+ public int Size { get; }
+ public int Elems { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdImm44(inst, address, opCode, false);
+ public static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdImm44(inst, address, opCode, true);
+
+ public OpCode32SimdImm44(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode)
+ {
+ IsThumb = isThumb;
+
+ Size = (opCode >> 8) & 0x3;
+
+ bool single = Size != 3;
+
+ if (single)
+ {
+ Vd = ((opCode >> 22) & 0x1) | ((opCode >> 11) & 0x1e);
+ }
+ else
+ {
+ Vd = ((opCode >> 18) & 0x10) | ((opCode >> 12) & 0xf);
+ }
+
+ long imm;
+
+ imm = ((uint)opCode >> 0) & 0xf;
+ imm |= ((uint)opCode >> 12) & 0xf0;
+
+ Immediate = (Size == 3) ? (long)DecoderHelper.Imm8ToFP64Table[(int)imm] : DecoderHelper.Imm8ToFP32Table[(int)imm];
+
+ RegisterSize = (!single) ? RegisterSize.Int64 : RegisterSize.Int32;
+ Elems = 1;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdLong.cs b/src/ARMeilleure/Decoders/OpCode32SimdLong.cs
new file mode 100644
index 00000000..8d64d673
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdLong.cs
@@ -0,0 +1,30 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdLong : OpCode32SimdBase
+ {
+ public bool U { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdLong(inst, address, opCode, false);
+ public static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdLong(inst, address, opCode, true);
+
+ public OpCode32SimdLong(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb)
+ {
+ int imm3h = (opCode >> 19) & 0x7;
+
+ // The value must be a power of 2, otherwise it is the encoding of another instruction.
+ switch (imm3h)
+ {
+ case 1: Size = 0; break;
+ case 2: Size = 1; break;
+ case 4: Size = 2; break;
+ }
+
+ U = ((opCode >> (isThumb ? 28 : 24)) & 0x1) != 0;
+
+ RegisterSize = RegisterSize.Simd64;
+
+ Vd = ((opCode >> 18) & 0x10) | ((opCode >> 12) & 0xf);
+ Vm = ((opCode >> 1) & 0x10) | ((opCode >> 0) & 0xf);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdMemImm.cs b/src/ARMeilleure/Decoders/OpCode32SimdMemImm.cs
new file mode 100644
index 00000000..c933a5ad
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdMemImm.cs
@@ -0,0 +1,40 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdMemImm : OpCode32, IOpCode32Simd
+ {
+ public int Vd { get; }
+ public int Rn { get; }
+ public int Size { get; }
+ public bool Add { get; }
+ public int Immediate { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMemImm(inst, address, opCode, false);
+ public static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMemImm(inst, address, opCode, true);
+
+ public OpCode32SimdMemImm(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode)
+ {
+ IsThumb = isThumb;
+
+ Immediate = opCode & 0xff;
+
+ Rn = (opCode >> 16) & 0xf;
+ Size = (opCode >> 8) & 0x3;
+
+ Immediate <<= (Size == 1) ? 1 : 2;
+
+ bool u = (opCode & (1 << 23)) != 0;
+ Add = u;
+
+ bool single = Size != 3;
+
+ if (single)
+ {
+ Vd = ((opCode >> 22) & 0x1) | ((opCode >> 11) & 0x1e);
+ }
+ else
+ {
+ Vd = ((opCode >> 18) & 0x10) | ((opCode >> 12) & 0xf);
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdMemMult.cs b/src/ARMeilleure/Decoders/OpCode32SimdMemMult.cs
new file mode 100644
index 00000000..a16a03d3
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdMemMult.cs
@@ -0,0 +1,76 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdMemMult : OpCode32
+ {
+ public int Rn { get; }
+ public int Vd { get; }
+
+ public int RegisterRange { get; }
+ public int Offset { get; }
+ public int PostOffset { get; }
+ public bool IsLoad { get; }
+ public bool DoubleWidth { get; }
+ public bool Add { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMemMult(inst, address, opCode, false);
+ public static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMemMult(inst, address, opCode, true);
+
+ public OpCode32SimdMemMult(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode)
+ {
+ IsThumb = isThumb;
+
+ Rn = (opCode >> 16) & 0xf;
+
+ bool isLoad = (opCode & (1 << 20)) != 0;
+ bool w = (opCode & (1 << 21)) != 0;
+ bool u = (opCode & (1 << 23)) != 0;
+ bool p = (opCode & (1 << 24)) != 0;
+
+ if (p == u && w)
+ {
+ Instruction = InstDescriptor.Undefined;
+ return;
+ }
+
+ DoubleWidth = (opCode & (1 << 8)) != 0;
+
+ if (!DoubleWidth)
+ {
+ Vd = ((opCode >> 22) & 0x1) | ((opCode >> 11) & 0x1e);
+ }
+ else
+ {
+ Vd = ((opCode >> 18) & 0x10) | ((opCode >> 12) & 0xf);
+ }
+
+ Add = u;
+
+ RegisterRange = opCode & 0xff;
+
+ int regsSize = RegisterRange * 4; // Double mode is still measured in single register size.
+
+ if (!u)
+ {
+ Offset -= regsSize;
+ }
+
+ if (w)
+ {
+ PostOffset = u ? regsSize : -regsSize;
+ }
+ else
+ {
+ PostOffset = 0;
+ }
+
+ IsLoad = isLoad;
+
+ int regs = DoubleWidth ? RegisterRange / 2 : RegisterRange;
+
+ if (RegisterRange == 0 || RegisterRange > 32 || Vd + regs > 32)
+ {
+ Instruction = InstDescriptor.Undefined;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdMemPair.cs b/src/ARMeilleure/Decoders/OpCode32SimdMemPair.cs
new file mode 100644
index 00000000..da88eed2
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdMemPair.cs
@@ -0,0 +1,50 @@
+using ARMeilleure.State;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdMemPair : OpCode32, IOpCode32Simd
+ {
+ private static int[] _regsMap =
+ {
+ 1, 1, 4, 2,
+ 1, 1, 3, 1,
+ 1, 1, 2, 1,
+ 1, 1, 1, 1
+ };
+
+ public int Vd { get; }
+ public int Rn { get; }
+ public int Rm { get; }
+ public int Align { get; }
+ public bool WBack { get; }
+ public bool RegisterIndex { get; }
+ public int Size { get; }
+ public int Elems => 8 >> Size;
+ public int Regs { get; }
+ public int Increment { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMemPair(inst, address, opCode, false);
+ public static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMemPair(inst, address, opCode, true);
+
+ public OpCode32SimdMemPair(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode)
+ {
+ IsThumb = isThumb;
+
+ Vd = (opCode >> 12) & 0xf;
+ Vd |= (opCode >> 18) & 0x10;
+
+ Size = (opCode >> 6) & 0x3;
+
+ Align = (opCode >> 4) & 0x3;
+ Rm = (opCode >> 0) & 0xf;
+ Rn = (opCode >> 16) & 0xf;
+
+ WBack = Rm != RegisterAlias.Aarch32Pc;
+ RegisterIndex = Rm != RegisterAlias.Aarch32Pc && Rm != RegisterAlias.Aarch32Sp;
+
+ Regs = _regsMap[(opCode >> 8) & 0xf];
+
+ Increment = ((opCode >> 8) & 0x1) + 1;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdMemSingle.cs b/src/ARMeilleure/Decoders/OpCode32SimdMemSingle.cs
new file mode 100644
index 00000000..35dd41c2
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdMemSingle.cs
@@ -0,0 +1,51 @@
+using ARMeilleure.State;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdMemSingle : OpCode32, IOpCode32Simd
+ {
+ public int Vd { get; }
+ public int Rn { get; }
+ public int Rm { get; }
+ public int IndexAlign { get; }
+ public int Index { get; }
+ public bool WBack { get; }
+ public bool RegisterIndex { get; }
+ public int Size { get; }
+ public bool Replicate { get; }
+ public int Increment { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMemSingle(inst, address, opCode, false);
+ public static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMemSingle(inst, address, opCode, true);
+
+ public OpCode32SimdMemSingle(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode)
+ {
+ IsThumb = isThumb;
+
+ Vd = (opCode >> 12) & 0xf;
+ Vd |= (opCode >> 18) & 0x10;
+
+ IndexAlign = (opCode >> 4) & 0xf;
+
+ Size = (opCode >> 10) & 0x3;
+ Replicate = Size == 3;
+ if (Replicate)
+ {
+ Size = (opCode >> 6) & 0x3;
+ Increment = ((opCode >> 5) & 1) + 1;
+ Index = 0;
+ }
+ else
+ {
+ Increment = (((IndexAlign >> Size) & 1) == 0) ? 1 : 2;
+ Index = IndexAlign >> (1 + Size);
+ }
+
+ Rm = (opCode >> 0) & 0xf;
+ Rn = (opCode >> 16) & 0xf;
+
+ WBack = Rm != RegisterAlias.Aarch32Pc;
+ RegisterIndex = Rm != RegisterAlias.Aarch32Pc && Rm != RegisterAlias.Aarch32Sp;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdMovGp.cs b/src/ARMeilleure/Decoders/OpCode32SimdMovGp.cs
new file mode 100644
index 00000000..5afd3488
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdMovGp.cs
@@ -0,0 +1,31 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdMovGp : OpCode32, IOpCode32Simd
+ {
+ public int Size => 2;
+
+ public int Vn { get; }
+ public int Rt { get; }
+ public int Op { get; }
+
+ public int Opc1 { get; }
+ public int Opc2 { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMovGp(inst, address, opCode, false);
+ public static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMovGp(inst, address, opCode, true);
+
+ public OpCode32SimdMovGp(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode)
+ {
+ IsThumb = isThumb;
+
+ // Which one is used is instruction dependant.
+ Op = (opCode >> 20) & 0x1;
+
+ Opc1 = (opCode >> 21) & 0x3;
+ Opc2 = (opCode >> 5) & 0x3;
+
+ Vn = ((opCode >> 7) & 0x1) | ((opCode >> 15) & 0x1e);
+ Rt = (opCode >> 12) & 0xf;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdMovGpDouble.cs b/src/ARMeilleure/Decoders/OpCode32SimdMovGpDouble.cs
new file mode 100644
index 00000000..2d693119
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdMovGpDouble.cs
@@ -0,0 +1,36 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdMovGpDouble : OpCode32, IOpCode32Simd
+ {
+ public int Size => 3;
+
+ public int Vm { get; }
+ public int Rt { get; }
+ public int Rt2 { get; }
+ public int Op { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMovGpDouble(inst, address, opCode, false);
+ public static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMovGpDouble(inst, address, opCode, true);
+
+ public OpCode32SimdMovGpDouble(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode)
+ {
+ IsThumb = isThumb;
+
+ // Which one is used is instruction dependant.
+ Op = (opCode >> 20) & 0x1;
+
+ Rt = (opCode >> 12) & 0xf;
+ Rt2 = (opCode >> 16) & 0xf;
+
+ bool single = (opCode & (1 << 8)) == 0;
+ if (single)
+ {
+ Vm = ((opCode >> 5) & 0x1) | ((opCode << 1) & 0x1e);
+ }
+ else
+ {
+ Vm = ((opCode >> 1) & 0x10) | ((opCode >> 0) & 0xf);
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdMovGpElem.cs b/src/ARMeilleure/Decoders/OpCode32SimdMovGpElem.cs
new file mode 100644
index 00000000..7816665f
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdMovGpElem.cs
@@ -0,0 +1,51 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdMovGpElem : OpCode32, IOpCode32Simd
+ {
+ public int Size { get; }
+
+ public int Vd { get; }
+ public int Rt { get; }
+ public int Op { get; }
+ public bool U { get; }
+
+ public int Index { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMovGpElem(inst, address, opCode, false);
+ public static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMovGpElem(inst, address, opCode, true);
+
+ public OpCode32SimdMovGpElem(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode)
+ {
+ IsThumb = isThumb;
+
+ Op = (opCode >> 20) & 0x1;
+ U = ((opCode >> 23) & 1) != 0;
+
+ var opc = (((opCode >> 23) & 1) << 4) | (((opCode >> 21) & 0x3) << 2) | ((opCode >> 5) & 0x3);
+
+ if ((opc & 0b01000) == 0b01000)
+ {
+ Size = 0;
+ Index = opc & 0x7;
+ }
+ else if ((opc & 0b01001) == 0b00001)
+ {
+ Size = 1;
+ Index = (opc >> 1) & 0x3;
+ }
+ else if ((opc & 0b11011) == 0)
+ {
+ Size = 2;
+ Index = (opc >> 2) & 0x1;
+ }
+ else
+ {
+ Instruction = InstDescriptor.Undefined;
+ return;
+ }
+
+ Vd = ((opCode >> 3) & 0x10) | ((opCode >> 16) & 0xf);
+ Rt = (opCode >> 12) & 0xf;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdMovn.cs b/src/ARMeilleure/Decoders/OpCode32SimdMovn.cs
new file mode 100644
index 00000000..576e12cc
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdMovn.cs
@@ -0,0 +1,13 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdMovn : OpCode32Simd
+ {
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMovn(inst, address, opCode, false);
+ public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMovn(inst, address, opCode, true);
+
+ public OpCode32SimdMovn(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb)
+ {
+ Size = (opCode >> 18) & 0x3;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdReg.cs b/src/ARMeilleure/Decoders/OpCode32SimdReg.cs
new file mode 100644
index 00000000..1c46b0e0
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdReg.cs
@@ -0,0 +1,25 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdReg : OpCode32Simd
+ {
+ public int Vn { get; }
+
+ public int Qn => GetQuadwordIndex(Vn);
+ public int In => GetQuadwordSubindex(Vn) << (3 - Size);
+ public int Fn => GetQuadwordSubindex(Vn) << (1 - (Size & 1));
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdReg(inst, address, opCode, false);
+ public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdReg(inst, address, opCode, true);
+
+ public OpCode32SimdReg(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb)
+ {
+ Vn = ((opCode >> 3) & 0x10) | ((opCode >> 16) & 0xf);
+
+ // Subclasses have their own handling of Vx to account for before checking.
+ if (GetType() == typeof(OpCode32SimdReg) && DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vm, Vn))
+ {
+ Instruction = InstDescriptor.Undefined;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdRegElem.cs b/src/ARMeilleure/Decoders/OpCode32SimdRegElem.cs
new file mode 100644
index 00000000..173c5265
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdRegElem.cs
@@ -0,0 +1,31 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdRegElem : OpCode32SimdReg
+ {
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdRegElem(inst, address, opCode, false);
+ public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdRegElem(inst, address, opCode, true);
+
+ public OpCode32SimdRegElem(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb)
+ {
+ Q = ((opCode >> (isThumb ? 28 : 24)) & 0x1) != 0;
+ F = ((opCode >> 8) & 0x1) != 0;
+ Size = (opCode >> 20) & 0x3;
+
+ RegisterSize = Q ? RegisterSize.Simd128 : RegisterSize.Simd64;
+
+ if (Size == 1)
+ {
+ Vm = ((opCode >> 3) & 0x1) | ((opCode >> 4) & 0x2) | ((opCode << 2) & 0x1c);
+ }
+ else /* if (Size == 2) */
+ {
+ Vm = ((opCode >> 5) & 0x1) | ((opCode << 1) & 0x1e);
+ }
+
+ if (GetType() == typeof(OpCode32SimdRegElem) && DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vn) || Size == 0 || (Size == 1 && F))
+ {
+ Instruction = InstDescriptor.Undefined;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdRegElemLong.cs b/src/ARMeilleure/Decoders/OpCode32SimdRegElemLong.cs
new file mode 100644
index 00000000..b87ac413
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdRegElemLong.cs
@@ -0,0 +1,22 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdRegElemLong : OpCode32SimdRegElem
+ {
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdRegElemLong(inst, address, opCode, false);
+ public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdRegElemLong(inst, address, opCode, true);
+
+ public OpCode32SimdRegElemLong(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb)
+ {
+ Q = false;
+ F = false;
+
+ RegisterSize = RegisterSize.Simd64;
+
+ // (Vd & 1) != 0 || Size == 3 are also invalid, but they are checked on encoding.
+ if (Size == 0)
+ {
+ Instruction = InstDescriptor.Undefined;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdRegLong.cs b/src/ARMeilleure/Decoders/OpCode32SimdRegLong.cs
new file mode 100644
index 00000000..11069383
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdRegLong.cs
@@ -0,0 +1,24 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdRegLong : OpCode32SimdReg
+ {
+ public bool Polynomial { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdRegLong(inst, address, opCode, false);
+ public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdRegLong(inst, address, opCode, true);
+
+ public OpCode32SimdRegLong(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb)
+ {
+ Q = false;
+ RegisterSize = RegisterSize.Simd64;
+
+ Polynomial = ((opCode >> 9) & 0x1) != 0;
+
+ // Subclasses have their own handling of Vx to account for before checking.
+ if (GetType() == typeof(OpCode32SimdRegLong) && DecoderHelper.VectorArgumentsInvalid(true, Vd))
+ {
+ Instruction = InstDescriptor.Undefined;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdRegS.cs b/src/ARMeilleure/Decoders/OpCode32SimdRegS.cs
new file mode 100644
index 00000000..8168e83f
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdRegS.cs
@@ -0,0 +1,23 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdRegS : OpCode32SimdS
+ {
+ public int Vn { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdRegS(inst, address, opCode, false);
+ public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdRegS(inst, address, opCode, true);
+
+ public OpCode32SimdRegS(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb)
+ {
+ bool single = Size != 3;
+ if (single)
+ {
+ Vn = ((opCode >> 7) & 0x1) | ((opCode >> 15) & 0x1e);
+ }
+ else
+ {
+ Vn = ((opCode >> 3) & 0x10) | ((opCode >> 16) & 0xf);
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdRegWide.cs b/src/ARMeilleure/Decoders/OpCode32SimdRegWide.cs
new file mode 100644
index 00000000..fd2b3bf1
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdRegWide.cs
@@ -0,0 +1,20 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdRegWide : OpCode32SimdReg
+ {
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdRegWide(inst, address, opCode, false);
+ public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdRegWide(inst, address, opCode, true);
+
+ public OpCode32SimdRegWide(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb)
+ {
+ Q = false;
+ RegisterSize = RegisterSize.Simd64;
+
+ // Subclasses have their own handling of Vx to account for before checking.
+ if (GetType() == typeof(OpCode32SimdRegWide) && DecoderHelper.VectorArgumentsInvalid(true, Vd, Vn))
+ {
+ Instruction = InstDescriptor.Undefined;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdRev.cs b/src/ARMeilleure/Decoders/OpCode32SimdRev.cs
new file mode 100644
index 00000000..cb64765f
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdRev.cs
@@ -0,0 +1,23 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdRev : OpCode32SimdCmpZ
+ {
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdRev(inst, address, opCode, false);
+ public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdRev(inst, address, opCode, true);
+
+ public OpCode32SimdRev(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb)
+ {
+ if (Opc + Size >= 3)
+ {
+ Instruction = InstDescriptor.Undefined;
+ return;
+ }
+
+ // Currently, this instruction is treated as though it's OPCODE is the true size,
+ // which lets us deal with reversing vectors on a single element basis (eg. math magic an I64 rather than insert lots of I8s).
+ int tempSize = Size;
+ Size = 3 - Opc; // Op 0 is 64 bit, 1 is 32 and so on.
+ Opc = tempSize;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdS.cs b/src/ARMeilleure/Decoders/OpCode32SimdS.cs
new file mode 100644
index 00000000..63c03c01
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdS.cs
@@ -0,0 +1,39 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdS : OpCode32, IOpCode32Simd
+ {
+ public int Vd { get; protected set; }
+ public int Vm { get; protected set; }
+ public int Opc { get; protected set; } // "with_zero" (Opc<1>) [Vcmp, Vcmpe].
+ public int Opc2 { get; } // opc2 or RM (opc2<1:0>) [Vcvt, Vrint].
+ public int Size { get; protected set; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdS(inst, address, opCode, false);
+ public static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdS(inst, address, opCode, true);
+
+ public OpCode32SimdS(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode)
+ {
+ IsThumb = isThumb;
+
+ Opc = (opCode >> 15) & 0x3;
+ Opc2 = (opCode >> 16) & 0x7;
+
+ Size = (opCode >> 8) & 0x3;
+
+ bool single = Size != 3;
+
+ RegisterSize = single ? RegisterSize.Int32 : RegisterSize.Int64;
+
+ if (single)
+ {
+ Vm = ((opCode >> 5) & 0x1) | ((opCode << 1) & 0x1e);
+ Vd = ((opCode >> 22) & 0x1) | ((opCode >> 11) & 0x1e);
+ }
+ else
+ {
+ Vm = ((opCode >> 1) & 0x10) | ((opCode >> 0) & 0xf);
+ Vd = ((opCode >> 18) & 0x10) | ((opCode >> 12) & 0xf);
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdSel.cs b/src/ARMeilleure/Decoders/OpCode32SimdSel.cs
new file mode 100644
index 00000000..37fd714a
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdSel.cs
@@ -0,0 +1,23 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdSel : OpCode32SimdRegS
+ {
+ public OpCode32SimdSelMode Cc { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdSel(inst, address, opCode, false);
+ public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdSel(inst, address, opCode, true);
+
+ public OpCode32SimdSel(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb)
+ {
+ Cc = (OpCode32SimdSelMode)((opCode >> 20) & 3);
+ }
+ }
+
+ enum OpCode32SimdSelMode : int
+ {
+ Eq = 0,
+ Vs,
+ Ge,
+ Gt
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdShImm.cs b/src/ARMeilleure/Decoders/OpCode32SimdShImm.cs
new file mode 100644
index 00000000..55ddc395
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdShImm.cs
@@ -0,0 +1,46 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdShImm : OpCode32Simd
+ {
+ public int Shift { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdShImm(inst, address, opCode, false);
+ public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdShImm(inst, address, opCode, true);
+
+ public OpCode32SimdShImm(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb)
+ {
+ int imm6 = (opCode >> 16) & 0x3f;
+ int limm6 = ((opCode >> 1) & 0x40) | imm6;
+
+ if ((limm6 & 0x40) == 0b1000000)
+ {
+ Size = 3;
+ Shift = imm6;
+ }
+ else if ((limm6 & 0x60) == 0b0100000)
+ {
+ Size = 2;
+ Shift = imm6 - 32;
+ }
+ else if ((limm6 & 0x70) == 0b0010000)
+ {
+ Size = 1;
+ Shift = imm6 - 16;
+ }
+ else if ((limm6 & 0x78) == 0b0001000)
+ {
+ Size = 0;
+ Shift = imm6 - 8;
+ }
+ else
+ {
+ Instruction = InstDescriptor.Undefined;
+ }
+
+ if (GetType() == typeof(OpCode32SimdShImm) && DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vm))
+ {
+ Instruction = InstDescriptor.Undefined;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdShImmLong.cs b/src/ARMeilleure/Decoders/OpCode32SimdShImmLong.cs
new file mode 100644
index 00000000..6b1b0ad1
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdShImmLong.cs
@@ -0,0 +1,43 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdShImmLong : OpCode32Simd
+ {
+ public int Shift { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdShImmLong(inst, address, opCode, false);
+ public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdShImmLong(inst, address, opCode, true);
+
+ public OpCode32SimdShImmLong(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb)
+ {
+ Q = false;
+ RegisterSize = RegisterSize.Simd64;
+
+ int imm6 = (opCode >> 16) & 0x3f;
+
+ if ((imm6 & 0x20) == 0b100000)
+ {
+ Size = 2;
+ Shift = imm6 - 32;
+ }
+ else if ((imm6 & 0x30) == 0b010000)
+ {
+ Size = 1;
+ Shift = imm6 - 16;
+ }
+ else if ((imm6 & 0x38) == 0b001000)
+ {
+ Size = 0;
+ Shift = imm6 - 8;
+ }
+ else
+ {
+ Instruction = InstDescriptor.Undefined;
+ }
+
+ if (GetType() == typeof(OpCode32SimdShImmLong) && DecoderHelper.VectorArgumentsInvalid(true, Vd))
+ {
+ Instruction = InstDescriptor.Undefined;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdShImmNarrow.cs b/src/ARMeilleure/Decoders/OpCode32SimdShImmNarrow.cs
new file mode 100644
index 00000000..5351e65f
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdShImmNarrow.cs
@@ -0,0 +1,10 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdShImmNarrow : OpCode32SimdShImm
+ {
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdShImmNarrow(inst, address, opCode, false);
+ public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdShImmNarrow(inst, address, opCode, true);
+
+ public OpCode32SimdShImmNarrow(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb) { }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdSpecial.cs b/src/ARMeilleure/Decoders/OpCode32SimdSpecial.cs
new file mode 100644
index 00000000..61a9f387
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdSpecial.cs
@@ -0,0 +1,19 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdSpecial : OpCode32
+ {
+ public int Rt { get; }
+ public int Sreg { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdSpecial(inst, address, opCode, false);
+ public static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdSpecial(inst, address, opCode, true);
+
+ public OpCode32SimdSpecial(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode)
+ {
+ IsThumb = isThumb;
+
+ Rt = (opCode >> 12) & 0xf;
+ Sreg = (opCode >> 16) & 0xf;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdSqrte.cs b/src/ARMeilleure/Decoders/OpCode32SimdSqrte.cs
new file mode 100644
index 00000000..5b715535
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdSqrte.cs
@@ -0,0 +1,19 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdSqrte : OpCode32Simd
+ {
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdSqrte(inst, address, opCode, false);
+ public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdSqrte(inst, address, opCode, true);
+
+ public OpCode32SimdSqrte(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb)
+ {
+ Size = (opCode >> 18) & 0x1;
+ F = ((opCode >> 8) & 0x1) != 0;
+
+ if (DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vm))
+ {
+ Instruction = InstDescriptor.Undefined;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdTbl.cs b/src/ARMeilleure/Decoders/OpCode32SimdTbl.cs
new file mode 100644
index 00000000..c4fb4b9c
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdTbl.cs
@@ -0,0 +1,24 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdTbl : OpCode32SimdReg
+ {
+ public int Length { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdTbl(inst, address, opCode, false);
+ public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdTbl(inst, address, opCode, true);
+
+ public OpCode32SimdTbl(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb)
+ {
+ Length = (opCode >> 8) & 3;
+ Size = 0;
+ Opc = Q ? 1 : 0;
+ Q = false;
+ RegisterSize = RegisterSize.Simd64;
+
+ if (Vn + Length + 1 > 32)
+ {
+ Instruction = InstDescriptor.Undefined;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32System.cs b/src/ARMeilleure/Decoders/OpCode32System.cs
new file mode 100644
index 00000000..89e93349
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32System.cs
@@ -0,0 +1,28 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32System : OpCode32
+ {
+ public int Opc1 { get; }
+ public int CRn { get; }
+ public int Rt { get; }
+ public int Opc2 { get; }
+ public int CRm { get; }
+ public int MrrcOp { get; }
+
+ public int Coproc { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32System(inst, address, opCode);
+
+ public OpCode32System(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Opc1 = (opCode >> 21) & 0x7;
+ CRn = (opCode >> 16) & 0xf;
+ Rt = (opCode >> 12) & 0xf;
+ Opc2 = (opCode >> 5) & 0x7;
+ CRm = (opCode >> 0) & 0xf;
+ MrrcOp = (opCode >> 4) & 0xf;
+
+ Coproc = (opCode >> 8) & 0xf;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeAdr.cs b/src/ARMeilleure/Decoders/OpCodeAdr.cs
new file mode 100644
index 00000000..9655c766
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeAdr.cs
@@ -0,0 +1,19 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeAdr : OpCode
+ {
+ public int Rd { get; }
+
+ public long Immediate { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeAdr(inst, address, opCode);
+
+ public OpCodeAdr(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = opCode & 0x1f;
+
+ Immediate = DecoderHelper.DecodeImmS19_2(opCode);
+ Immediate |= ((long)opCode >> 29) & 3;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeAlu.cs b/src/ARMeilleure/Decoders/OpCodeAlu.cs
new file mode 100644
index 00000000..4d7f03a7
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeAlu.cs
@@ -0,0 +1,23 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeAlu : OpCode, IOpCodeAlu
+ {
+ public int Rd { get; protected set; }
+ public int Rn { get; }
+
+ public DataOp DataOp { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeAlu(inst, address, opCode);
+
+ public OpCodeAlu(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = (opCode >> 0) & 0x1f;
+ Rn = (opCode >> 5) & 0x1f;
+ DataOp = (DataOp)((opCode >> 24) & 0x3);
+
+ RegisterSize = (opCode >> 31) != 0
+ ? RegisterSize.Int64
+ : RegisterSize.Int32;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeAluBinary.cs b/src/ARMeilleure/Decoders/OpCodeAluBinary.cs
new file mode 100644
index 00000000..e8b10656
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeAluBinary.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeAluBinary : OpCodeAlu
+ {
+ public int Rm { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeAluBinary(inst, address, opCode);
+
+ public OpCodeAluBinary(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rm = (opCode >> 16) & 0x1f;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeAluImm.cs b/src/ARMeilleure/Decoders/OpCodeAluImm.cs
new file mode 100644
index 00000000..91aa9553
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeAluImm.cs
@@ -0,0 +1,40 @@
+using System;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCodeAluImm : OpCodeAlu, IOpCodeAluImm
+ {
+ public long Immediate { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeAluImm(inst, address, opCode);
+
+ public OpCodeAluImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ if (DataOp == DataOp.Arithmetic)
+ {
+ Immediate = (opCode >> 10) & 0xfff;
+
+ int shift = (opCode >> 22) & 3;
+
+ Immediate <<= shift * 12;
+ }
+ else if (DataOp == DataOp.Logical)
+ {
+ var bm = DecoderHelper.DecodeBitMask(opCode, true);
+
+ if (bm.IsUndefined)
+ {
+ Instruction = InstDescriptor.Undefined;
+
+ return;
+ }
+
+ Immediate = bm.WMask;
+ }
+ else
+ {
+ throw new ArgumentException(nameof(opCode));
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeAluRs.cs b/src/ARMeilleure/Decoders/OpCodeAluRs.cs
new file mode 100644
index 00000000..94983336
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeAluRs.cs
@@ -0,0 +1,29 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeAluRs : OpCodeAlu, IOpCodeAluRs
+ {
+ public int Shift { get; }
+ public int Rm { get; }
+
+ public ShiftType ShiftType { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeAluRs(inst, address, opCode);
+
+ public OpCodeAluRs(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ int shift = (opCode >> 10) & 0x3f;
+
+ if (shift >= GetBitsCount())
+ {
+ Instruction = InstDescriptor.Undefined;
+
+ return;
+ }
+
+ Shift = shift;
+
+ Rm = (opCode >> 16) & 0x1f;
+ ShiftType = (ShiftType)((opCode >> 22) & 0x3);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeAluRx.cs b/src/ARMeilleure/Decoders/OpCodeAluRx.cs
new file mode 100644
index 00000000..d39da9e7
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeAluRx.cs
@@ -0,0 +1,19 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeAluRx : OpCodeAlu, IOpCodeAluRx
+ {
+ public int Shift { get; }
+ public int Rm { get; }
+
+ public IntType IntType { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeAluRx(inst, address, opCode);
+
+ public OpCodeAluRx(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Shift = (opCode >> 10) & 0x7;
+ IntType = (IntType)((opCode >> 13) & 0x7);
+ Rm = (opCode >> 16) & 0x1f;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeBImm.cs b/src/ARMeilleure/Decoders/OpCodeBImm.cs
new file mode 100644
index 00000000..e302516e
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeBImm.cs
@@ -0,0 +1,11 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeBImm : OpCode, IOpCodeBImm
+ {
+ public long Immediate { get; protected set; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeBImm(inst, address, opCode);
+
+ public OpCodeBImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) { }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeBImmAl.cs b/src/ARMeilleure/Decoders/OpCodeBImmAl.cs
new file mode 100644
index 00000000..47ae5f56
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeBImmAl.cs
@@ -0,0 +1,12 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeBImmAl : OpCodeBImm
+ {
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeBImmAl(inst, address, opCode);
+
+ public OpCodeBImmAl(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Immediate = (long)address + DecoderHelper.DecodeImm26_2(opCode);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeBImmCmp.cs b/src/ARMeilleure/Decoders/OpCodeBImmCmp.cs
new file mode 100644
index 00000000..a5246569
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeBImmCmp.cs
@@ -0,0 +1,20 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeBImmCmp : OpCodeBImm
+ {
+ public int Rt { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeBImmCmp(inst, address, opCode);
+
+ public OpCodeBImmCmp(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rt = opCode & 0x1f;
+
+ Immediate = (long)address + DecoderHelper.DecodeImmS19_2(opCode);
+
+ RegisterSize = (opCode >> 31) != 0
+ ? RegisterSize.Int64
+ : RegisterSize.Int32;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeBImmCond.cs b/src/ARMeilleure/Decoders/OpCodeBImmCond.cs
new file mode 100644
index 00000000..b57a7ea8
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeBImmCond.cs
@@ -0,0 +1,25 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeBImmCond : OpCodeBImm, IOpCodeCond
+ {
+ public Condition Cond { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeBImmCond(inst, address, opCode);
+
+ public OpCodeBImmCond(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ int o0 = (opCode >> 4) & 1;
+
+ if (o0 != 0)
+ {
+ Instruction = InstDescriptor.Undefined;
+
+ return;
+ }
+
+ Cond = (Condition)(opCode & 0xf);
+
+ Immediate = (long)address + DecoderHelper.DecodeImmS19_2(opCode);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeBImmTest.cs b/src/ARMeilleure/Decoders/OpCodeBImmTest.cs
new file mode 100644
index 00000000..bad98405
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeBImmTest.cs
@@ -0,0 +1,20 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeBImmTest : OpCodeBImm
+ {
+ public int Rt { get; }
+ public int Bit { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeBImmTest(inst, address, opCode);
+
+ public OpCodeBImmTest(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rt = opCode & 0x1f;
+
+ Immediate = (long)address + DecoderHelper.DecodeImmS14_2(opCode);
+
+ Bit = (opCode >> 19) & 0x1f;
+ Bit |= (opCode >> 26) & 0x20;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeBReg.cs b/src/ARMeilleure/Decoders/OpCodeBReg.cs
new file mode 100644
index 00000000..b5dcbfd8
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeBReg.cs
@@ -0,0 +1,24 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeBReg : OpCode
+ {
+ public int Rn { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeBReg(inst, address, opCode);
+
+ public OpCodeBReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ int op4 = (opCode >> 0) & 0x1f;
+ int op2 = (opCode >> 16) & 0x1f;
+
+ if (op2 != 0b11111 || op4 != 0b00000)
+ {
+ Instruction = InstDescriptor.Undefined;
+
+ return;
+ }
+
+ Rn = (opCode >> 5) & 0x1f;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeBfm.cs b/src/ARMeilleure/Decoders/OpCodeBfm.cs
new file mode 100644
index 00000000..8e1c7836
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeBfm.cs
@@ -0,0 +1,29 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeBfm : OpCodeAlu
+ {
+ public long WMask { get; }
+ public long TMask { get; }
+ public int Pos { get; }
+ public int Shift { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeBfm(inst, address, opCode);
+
+ public OpCodeBfm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ var bm = DecoderHelper.DecodeBitMask(opCode, false);
+
+ if (bm.IsUndefined)
+ {
+ Instruction = InstDescriptor.Undefined;
+
+ return;
+ }
+
+ WMask = bm.WMask;
+ TMask = bm.TMask;
+ Pos = bm.Pos;
+ Shift = bm.Shift;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeCcmp.cs b/src/ARMeilleure/Decoders/OpCodeCcmp.cs
new file mode 100644
index 00000000..aa47146f
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeCcmp.cs
@@ -0,0 +1,32 @@
+using ARMeilleure.State;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCodeCcmp : OpCodeAlu, IOpCodeCond
+ {
+ public int Nzcv { get; }
+ protected int RmImm;
+
+ public Condition Cond { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeCcmp(inst, address, opCode);
+
+ public OpCodeCcmp(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ int o3 = (opCode >> 4) & 1;
+
+ if (o3 != 0)
+ {
+ Instruction = InstDescriptor.Undefined;
+
+ return;
+ }
+
+ Nzcv = (opCode >> 0) & 0xf;
+ Cond = (Condition)((opCode >> 12) & 0xf);
+ RmImm = (opCode >> 16) & 0x1f;
+
+ Rd = RegisterAlias.Zr;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeCcmpImm.cs b/src/ARMeilleure/Decoders/OpCodeCcmpImm.cs
new file mode 100644
index 00000000..3548f2da
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeCcmpImm.cs
@@ -0,0 +1,11 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeCcmpImm : OpCodeCcmp, IOpCodeAluImm
+ {
+ public long Immediate => RmImm;
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeCcmpImm(inst, address, opCode);
+
+ public OpCodeCcmpImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) { }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeCcmpReg.cs b/src/ARMeilleure/Decoders/OpCodeCcmpReg.cs
new file mode 100644
index 00000000..d5df3b10
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeCcmpReg.cs
@@ -0,0 +1,15 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeCcmpReg : OpCodeCcmp, IOpCodeAluRs
+ {
+ public int Rm => RmImm;
+
+ public int Shift => 0;
+
+ public ShiftType ShiftType => ShiftType.Lsl;
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeCcmpReg(inst, address, opCode);
+
+ public OpCodeCcmpReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) { }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeCsel.cs b/src/ARMeilleure/Decoders/OpCodeCsel.cs
new file mode 100644
index 00000000..4b8dc7fd
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeCsel.cs
@@ -0,0 +1,17 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeCsel : OpCodeAlu, IOpCodeCond
+ {
+ public int Rm { get; }
+
+ public Condition Cond { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeCsel(inst, address, opCode);
+
+ public OpCodeCsel(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rm = (opCode >> 16) & 0x1f;
+ Cond = (Condition)((opCode >> 12) & 0xf);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeException.cs b/src/ARMeilleure/Decoders/OpCodeException.cs
new file mode 100644
index 00000000..6b72138e
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeException.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeException : OpCode
+ {
+ public int Id { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeException(inst, address, opCode);
+
+ public OpCodeException(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Id = (opCode >> 5) & 0xffff;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeMem.cs b/src/ARMeilleure/Decoders/OpCodeMem.cs
new file mode 100644
index 00000000..0ba2bcd1
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeMem.cs
@@ -0,0 +1,19 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeMem : OpCode
+ {
+ public int Rt { get; protected set; }
+ public int Rn { get; protected set; }
+ public int Size { get; protected set; }
+ public bool Extend64 { get; protected set; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeMem(inst, address, opCode);
+
+ public OpCodeMem(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rt = (opCode >> 0) & 0x1f;
+ Rn = (opCode >> 5) & 0x1f;
+ Size = (opCode >> 30) & 0x3;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeMemEx.cs b/src/ARMeilleure/Decoders/OpCodeMemEx.cs
new file mode 100644
index 00000000..89902485
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeMemEx.cs
@@ -0,0 +1,16 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeMemEx : OpCodeMem
+ {
+ public int Rt2 { get; }
+ public int Rs { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeMemEx(inst, address, opCode);
+
+ public OpCodeMemEx(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rt2 = (opCode >> 10) & 0x1f;
+ Rs = (opCode >> 16) & 0x1f;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeMemImm.cs b/src/ARMeilleure/Decoders/OpCodeMemImm.cs
new file mode 100644
index 00000000..d6ed2282
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeMemImm.cs
@@ -0,0 +1,53 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeMemImm : OpCodeMem
+ {
+ public long Immediate { get; protected set; }
+ public bool WBack { get; protected set; }
+ public bool PostIdx { get; protected set; }
+ protected bool Unscaled { get; }
+
+ private enum MemOp
+ {
+ Unscaled = 0,
+ PostIndexed = 1,
+ Unprivileged = 2,
+ PreIndexed = 3,
+ Unsigned
+ }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeMemImm(inst, address, opCode);
+
+ public OpCodeMemImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Extend64 = ((opCode >> 22) & 3) == 2;
+ WBack = ((opCode >> 24) & 1) == 0;
+
+ // The type is not valid for the Unsigned Immediate 12-bits encoding,
+ // because the bits 11:10 are used for the larger Immediate offset.
+ MemOp type = WBack ? (MemOp)((opCode >> 10) & 3) : MemOp.Unsigned;
+
+ PostIdx = type == MemOp.PostIndexed;
+ Unscaled = type == MemOp.Unscaled ||
+ type == MemOp.Unprivileged;
+
+ // Unscaled and Unprivileged doesn't write back,
+ // but they do use the 9-bits Signed Immediate.
+ if (Unscaled)
+ {
+ WBack = false;
+ }
+
+ if (WBack || Unscaled)
+ {
+ // 9-bits Signed Immediate.
+ Immediate = (opCode << 11) >> 23;
+ }
+ else
+ {
+ // 12-bits Unsigned Immediate.
+ Immediate = ((opCode >> 10) & 0xfff) << Size;
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeMemLit.cs b/src/ARMeilleure/Decoders/OpCodeMemLit.cs
new file mode 100644
index 00000000..986d6634
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeMemLit.cs
@@ -0,0 +1,28 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeMemLit : OpCode, IOpCodeLit
+ {
+ public int Rt { get; }
+ public long Immediate { get; }
+ public int Size { get; }
+ public bool Signed { get; }
+ public bool Prefetch { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeMemLit(inst, address, opCode);
+
+ public OpCodeMemLit(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rt = opCode & 0x1f;
+
+ Immediate = (long)address + DecoderHelper.DecodeImmS19_2(opCode);
+
+ switch ((opCode >> 30) & 3)
+ {
+ case 0: Size = 2; Signed = false; Prefetch = false; break;
+ case 1: Size = 3; Signed = false; Prefetch = false; break;
+ case 2: Size = 2; Signed = true; Prefetch = false; break;
+ case 3: Size = 0; Signed = false; Prefetch = true; break;
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeMemPair.cs b/src/ARMeilleure/Decoders/OpCodeMemPair.cs
new file mode 100644
index 00000000..21018033
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeMemPair.cs
@@ -0,0 +1,25 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeMemPair : OpCodeMemImm
+ {
+ public int Rt2 { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeMemPair(inst, address, opCode);
+
+ public OpCodeMemPair(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rt2 = (opCode >> 10) & 0x1f;
+ WBack = ((opCode >> 23) & 0x1) != 0;
+ PostIdx = ((opCode >> 23) & 0x3) == 1;
+ Extend64 = ((opCode >> 30) & 0x3) == 1;
+ Size = ((opCode >> 31) & 0x1) | 2;
+
+ DecodeImm(opCode);
+ }
+
+ protected void DecodeImm(int opCode)
+ {
+ Immediate = ((long)(opCode >> 15) << 57) >> (57 - Size);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeMemReg.cs b/src/ARMeilleure/Decoders/OpCodeMemReg.cs
new file mode 100644
index 00000000..73d6c5d2
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeMemReg.cs
@@ -0,0 +1,20 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeMemReg : OpCodeMem
+ {
+ public bool Shift { get; }
+ public int Rm { get; }
+
+ public IntType IntType { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeMemReg(inst, address, opCode);
+
+ public OpCodeMemReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Shift = ((opCode >> 12) & 0x1) != 0;
+ IntType = (IntType)((opCode >> 13) & 0x7);
+ Rm = (opCode >> 16) & 0x1f;
+ Extend64 = ((opCode >> 22) & 0x3) == 2;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeMov.cs b/src/ARMeilleure/Decoders/OpCodeMov.cs
new file mode 100644
index 00000000..50af88cb
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeMov.cs
@@ -0,0 +1,38 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeMov : OpCode
+ {
+ public int Rd { get; }
+
+ public long Immediate { get; }
+
+ public int Bit { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeMov(inst, address, opCode);
+
+ public OpCodeMov(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ int p1 = (opCode >> 22) & 1;
+ int sf = (opCode >> 31) & 1;
+
+ if (sf == 0 && p1 != 0)
+ {
+ Instruction = InstDescriptor.Undefined;
+
+ return;
+ }
+
+ Rd = (opCode >> 0) & 0x1f;
+ Immediate = (opCode >> 5) & 0xffff;
+ Bit = (opCode >> 21) & 0x3;
+
+ Bit <<= 4;
+
+ Immediate <<= Bit;
+
+ RegisterSize = (opCode >> 31) != 0
+ ? RegisterSize.Int64
+ : RegisterSize.Int32;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeMul.cs b/src/ARMeilleure/Decoders/OpCodeMul.cs
new file mode 100644
index 00000000..31d140a6
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeMul.cs
@@ -0,0 +1,16 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeMul : OpCodeAlu
+ {
+ public int Rm { get; }
+ public int Ra { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeMul(inst, address, opCode);
+
+ public OpCodeMul(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Ra = (opCode >> 10) & 0x1f;
+ Rm = (opCode >> 16) & 0x1f;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeSimd.cs b/src/ARMeilleure/Decoders/OpCodeSimd.cs
new file mode 100644
index 00000000..85713690
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeSimd.cs
@@ -0,0 +1,24 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimd : OpCode, IOpCodeSimd
+ {
+ public int Rd { get; }
+ public int Rn { get; }
+ public int Opc { get; }
+ public int Size { get; protected set; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimd(inst, address, opCode);
+
+ public OpCodeSimd(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = (opCode >> 0) & 0x1f;
+ Rn = (opCode >> 5) & 0x1f;
+ Opc = (opCode >> 15) & 0x3;
+ Size = (opCode >> 22) & 0x3;
+
+ RegisterSize = ((opCode >> 30) & 1) != 0
+ ? RegisterSize.Simd128
+ : RegisterSize.Simd64;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeSimdCvt.cs b/src/ARMeilleure/Decoders/OpCodeSimdCvt.cs
new file mode 100644
index 00000000..05b32941
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeSimdCvt.cs
@@ -0,0 +1,21 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdCvt : OpCodeSimd
+ {
+ public int FBits { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdCvt(inst, address, opCode);
+
+ public OpCodeSimdCvt(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ int scale = (opCode >> 10) & 0x3f;
+ int sf = (opCode >> 31) & 0x1;
+
+ FBits = 64 - scale;
+
+ RegisterSize = sf != 0
+ ? RegisterSize.Int64
+ : RegisterSize.Int32;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeSimdExt.cs b/src/ARMeilleure/Decoders/OpCodeSimdExt.cs
new file mode 100644
index 00000000..a0e264d9
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeSimdExt.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdExt : OpCodeSimdReg
+ {
+ public int Imm4 { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdExt(inst, address, opCode);
+
+ public OpCodeSimdExt(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Imm4 = (opCode >> 11) & 0xf;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeSimdFcond.cs b/src/ARMeilleure/Decoders/OpCodeSimdFcond.cs
new file mode 100644
index 00000000..aa16e0c1
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeSimdFcond.cs
@@ -0,0 +1,17 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdFcond : OpCodeSimdReg, IOpCodeCond
+ {
+ public int Nzcv { get; }
+
+ public Condition Cond { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdFcond(inst, address, opCode);
+
+ public OpCodeSimdFcond(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Nzcv = (opCode >> 0) & 0xf;
+ Cond = (Condition)((opCode >> 12) & 0xf);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeSimdFmov.cs b/src/ARMeilleure/Decoders/OpCodeSimdFmov.cs
new file mode 100644
index 00000000..9f9062b8
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeSimdFmov.cs
@@ -0,0 +1,32 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdFmov : OpCode, IOpCodeSimd
+ {
+ public int Rd { get; }
+ public long Immediate { get; }
+ public int Size { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdFmov(inst, address, opCode);
+
+ public OpCodeSimdFmov(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ int type = (opCode >> 22) & 0x3;
+
+ Size = type;
+
+ long imm;
+
+ Rd = (opCode >> 0) & 0x1f;
+ imm = (opCode >> 13) & 0xff;
+
+ if (type == 0)
+ {
+ Immediate = (long)DecoderHelper.Imm8ToFP32Table[(int)imm];
+ }
+ else /* if (type == 1) */
+ {
+ Immediate = (long)DecoderHelper.Imm8ToFP64Table[(int)imm];
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeSimdHelper.cs b/src/ARMeilleure/Decoders/OpCodeSimdHelper.cs
new file mode 100644
index 00000000..02f74d03
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeSimdHelper.cs
@@ -0,0 +1,88 @@
+namespace ARMeilleure.Decoders
+{
+ public static class OpCodeSimdHelper
+ {
+ public static (long Immediate, int Size) GetSimdImmediateAndSize(int cMode, int op, long imm)
+ {
+ int modeLow = cMode & 1;
+ int modeHigh = cMode >> 1;
+ int size = 0;
+
+ if (modeHigh == 0b111)
+ {
+ switch (op | (modeLow << 1))
+ {
+ case 0:
+ // 64-bits Immediate.
+ // Transform abcd efgh into abcd efgh abcd efgh ...
+ size = 3;
+ imm = (long)((ulong)imm * 0x0101010101010101);
+ break;
+
+ case 1:
+ // 64-bits Immediate.
+ // Transform abcd efgh into aaaa aaaa bbbb bbbb ...
+ size = 3;
+ imm = (imm & 0xf0) >> 4 | (imm & 0x0f) << 4;
+ imm = (imm & 0xcc) >> 2 | (imm & 0x33) << 2;
+ imm = (imm & 0xaa) >> 1 | (imm & 0x55) << 1;
+
+ imm = (long)((ulong)imm * 0x8040201008040201);
+ imm = (long)((ulong)imm & 0x8080808080808080);
+
+ imm |= imm >> 4;
+ imm |= imm >> 2;
+ imm |= imm >> 1;
+ break;
+
+ case 2:
+ // 2 x 32-bits floating point Immediate.
+ size = 3;
+ imm = (long)DecoderHelper.Imm8ToFP32Table[(int)imm];
+ imm |= imm << 32;
+ break;
+
+ case 3:
+ // 64-bits floating point Immediate.
+ size = 3;
+ imm = (long)DecoderHelper.Imm8ToFP64Table[(int)imm];
+ break;
+ }
+ }
+ else if ((modeHigh & 0b110) == 0b100)
+ {
+ // 16-bits shifted Immediate.
+ size = 1; imm <<= (modeHigh & 1) << 3;
+ }
+ else if ((modeHigh & 0b100) == 0b000)
+ {
+ // 32-bits shifted Immediate.
+ size = 2; imm <<= modeHigh << 3;
+ }
+ else if ((modeHigh & 0b111) == 0b110)
+ {
+ // 32-bits shifted Immediate (fill with ones).
+ size = 2; imm = ShlOnes(imm, 8 << modeLow);
+ }
+ else
+ {
+ // 8-bits without shift.
+ size = 0;
+ }
+
+ return (imm, size);
+ }
+
+ private static long ShlOnes(long value, int shift)
+ {
+ if (shift != 0)
+ {
+ return value << shift | (long)(ulong.MaxValue >> (64 - shift));
+ }
+ else
+ {
+ return value;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeSimdImm.cs b/src/ARMeilleure/Decoders/OpCodeSimdImm.cs
new file mode 100644
index 00000000..eeca7709
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeSimdImm.cs
@@ -0,0 +1,107 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdImm : OpCode, IOpCodeSimd
+ {
+ public int Rd { get; }
+ public long Immediate { get; }
+ public int Size { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdImm(inst, address, opCode);
+
+ public OpCodeSimdImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = opCode & 0x1f;
+
+ int cMode = (opCode >> 12) & 0xf;
+ int op = (opCode >> 29) & 0x1;
+
+ int modeLow = cMode & 1;
+ int modeHigh = cMode >> 1;
+
+ long imm;
+
+ imm = ((uint)opCode >> 5) & 0x1f;
+ imm |= ((uint)opCode >> 11) & 0xe0;
+
+ if (modeHigh == 0b111)
+ {
+ switch (op | (modeLow << 1))
+ {
+ case 0:
+ // 64-bits Immediate.
+ // Transform abcd efgh into abcd efgh abcd efgh ...
+ Size = 3;
+ imm = (long)((ulong)imm * 0x0101010101010101);
+ break;
+
+ case 1:
+ // 64-bits Immediate.
+ // Transform abcd efgh into aaaa aaaa bbbb bbbb ...
+ Size = 3;
+ imm = (imm & 0xf0) >> 4 | (imm & 0x0f) << 4;
+ imm = (imm & 0xcc) >> 2 | (imm & 0x33) << 2;
+ imm = (imm & 0xaa) >> 1 | (imm & 0x55) << 1;
+
+ imm = (long)((ulong)imm * 0x8040201008040201);
+ imm = (long)((ulong)imm & 0x8080808080808080);
+
+ imm |= imm >> 4;
+ imm |= imm >> 2;
+ imm |= imm >> 1;
+ break;
+
+ case 2:
+ // 2 x 32-bits floating point Immediate.
+ Size = 0;
+ imm = (long)DecoderHelper.Imm8ToFP32Table[(int)imm];
+ imm |= imm << 32;
+ break;
+
+ case 3:
+ // 64-bits floating point Immediate.
+ Size = 1;
+ imm = (long)DecoderHelper.Imm8ToFP64Table[(int)imm];
+ break;
+ }
+ }
+ else if ((modeHigh & 0b110) == 0b100)
+ {
+ // 16-bits shifted Immediate.
+ Size = 1; imm <<= (modeHigh & 1) << 3;
+ }
+ else if ((modeHigh & 0b100) == 0b000)
+ {
+ // 32-bits shifted Immediate.
+ Size = 2; imm <<= modeHigh << 3;
+ }
+ else if ((modeHigh & 0b111) == 0b110)
+ {
+ // 32-bits shifted Immediate (fill with ones).
+ Size = 2; imm = ShlOnes(imm, 8 << modeLow);
+ }
+ else
+ {
+ // 8-bits without shift.
+ Size = 0;
+ }
+
+ Immediate = imm;
+
+ RegisterSize = ((opCode >> 30) & 1) != 0
+ ? RegisterSize.Simd128
+ : RegisterSize.Simd64;
+ }
+
+ private static long ShlOnes(long value, int shift)
+ {
+ if (shift != 0)
+ {
+ return value << shift | (long)(ulong.MaxValue >> (64 - shift));
+ }
+ else
+ {
+ return value;
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeSimdIns.cs b/src/ARMeilleure/Decoders/OpCodeSimdIns.cs
new file mode 100644
index 00000000..f6f9249d
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeSimdIns.cs
@@ -0,0 +1,36 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdIns : OpCodeSimd
+ {
+ public int SrcIndex { get; }
+ public int DstIndex { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdIns(inst, address, opCode);
+
+ public OpCodeSimdIns(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ int imm4 = (opCode >> 11) & 0xf;
+ int imm5 = (opCode >> 16) & 0x1f;
+
+ if (imm5 == 0b10000)
+ {
+ Instruction = InstDescriptor.Undefined;
+
+ return;
+ }
+
+ Size = imm5 & -imm5;
+
+ switch (Size)
+ {
+ case 1: Size = 0; break;
+ case 2: Size = 1; break;
+ case 4: Size = 2; break;
+ case 8: Size = 3; break;
+ }
+
+ SrcIndex = imm4 >> Size;
+ DstIndex = imm5 >> (Size + 1);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeSimdMemImm.cs b/src/ARMeilleure/Decoders/OpCodeSimdMemImm.cs
new file mode 100644
index 00000000..c11594cb
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeSimdMemImm.cs
@@ -0,0 +1,28 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdMemImm : OpCodeMemImm, IOpCodeSimd
+ {
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdMemImm(inst, address, opCode);
+
+ public OpCodeSimdMemImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Size |= (opCode >> 21) & 4;
+
+ if (Size > 4)
+ {
+ Instruction = InstDescriptor.Undefined;
+
+ return;
+ }
+
+ // Base class already shifts the immediate, we only
+ // need to shift it if size (scale) is 4, since this value is only set here.
+ if (!WBack && !Unscaled && Size == 4)
+ {
+ Immediate <<= 4;
+ }
+
+ Extend64 = false;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeSimdMemLit.cs b/src/ARMeilleure/Decoders/OpCodeSimdMemLit.cs
new file mode 100644
index 00000000..8e212966
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeSimdMemLit.cs
@@ -0,0 +1,31 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdMemLit : OpCode, IOpCodeSimd, IOpCodeLit
+ {
+ public int Rt { get; }
+ public long Immediate { get; }
+ public int Size { get; }
+ public bool Signed => false;
+ public bool Prefetch => false;
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdMemLit(inst, address, opCode);
+
+ public OpCodeSimdMemLit(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ int opc = (opCode >> 30) & 3;
+
+ if (opc == 3)
+ {
+ Instruction = InstDescriptor.Undefined;
+
+ return;
+ }
+
+ Rt = opCode & 0x1f;
+
+ Immediate = (long)address + DecoderHelper.DecodeImmS19_2(opCode);
+
+ Size = opc + 2;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeSimdMemMs.cs b/src/ARMeilleure/Decoders/OpCodeSimdMemMs.cs
new file mode 100644
index 00000000..8922c18f
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeSimdMemMs.cs
@@ -0,0 +1,48 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdMemMs : OpCodeMemReg, IOpCodeSimd
+ {
+ public int Reps { get; }
+ public int SElems { get; }
+ public int Elems { get; }
+ public bool WBack { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdMemMs(inst, address, opCode);
+
+ public OpCodeSimdMemMs(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ switch ((opCode >> 12) & 0xf)
+ {
+ case 0b0000: Reps = 1; SElems = 4; break;
+ case 0b0010: Reps = 4; SElems = 1; break;
+ case 0b0100: Reps = 1; SElems = 3; break;
+ case 0b0110: Reps = 3; SElems = 1; break;
+ case 0b0111: Reps = 1; SElems = 1; break;
+ case 0b1000: Reps = 1; SElems = 2; break;
+ case 0b1010: Reps = 2; SElems = 1; break;
+
+ default: Instruction = InstDescriptor.Undefined; return;
+ }
+
+ Size = (opCode >> 10) & 3;
+ WBack = ((opCode >> 23) & 1) != 0;
+
+ bool q = ((opCode >> 30) & 1) != 0;
+
+ if (!q && Size == 3 && SElems != 1)
+ {
+ Instruction = InstDescriptor.Undefined;
+
+ return;
+ }
+
+ Extend64 = false;
+
+ RegisterSize = q
+ ? RegisterSize.Simd128
+ : RegisterSize.Simd64;
+
+ Elems = (GetBitsCount() >> 3) >> Size;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeSimdMemPair.cs b/src/ARMeilleure/Decoders/OpCodeSimdMemPair.cs
new file mode 100644
index 00000000..1ab95367
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeSimdMemPair.cs
@@ -0,0 +1,16 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdMemPair : OpCodeMemPair, IOpCodeSimd
+ {
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdMemPair(inst, address, opCode);
+
+ public OpCodeSimdMemPair(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Size = ((opCode >> 30) & 3) + 2;
+
+ Extend64 = false;
+
+ DecodeImm(opCode);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeSimdMemReg.cs b/src/ARMeilleure/Decoders/OpCodeSimdMemReg.cs
new file mode 100644
index 00000000..9ea6dda3
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeSimdMemReg.cs
@@ -0,0 +1,21 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdMemReg : OpCodeMemReg, IOpCodeSimd
+ {
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdMemReg(inst, address, opCode);
+
+ public OpCodeSimdMemReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Size |= (opCode >> 21) & 4;
+
+ if (Size > 4)
+ {
+ Instruction = InstDescriptor.Undefined;
+
+ return;
+ }
+
+ Extend64 = false;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeSimdMemSs.cs b/src/ARMeilleure/Decoders/OpCodeSimdMemSs.cs
new file mode 100644
index 00000000..44abdd38
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeSimdMemSs.cs
@@ -0,0 +1,97 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdMemSs : OpCodeMemReg, IOpCodeSimd
+ {
+ public int SElems { get; }
+ public int Index { get; }
+ public bool Replicate { get; }
+ public bool WBack { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdMemSs(inst, address, opCode);
+
+ public OpCodeSimdMemSs(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ int size = (opCode >> 10) & 3;
+ int s = (opCode >> 12) & 1;
+ int sElems = (opCode >> 12) & 2;
+ int scale = (opCode >> 14) & 3;
+ int l = (opCode >> 22) & 1;
+ int q = (opCode >> 30) & 1;
+
+ sElems |= (opCode >> 21) & 1;
+
+ sElems++;
+
+ int index = (q << 3) | (s << 2) | size;
+
+ switch (scale)
+ {
+ case 1:
+ {
+ if ((size & 1) != 0)
+ {
+ Instruction = InstDescriptor.Undefined;
+
+ return;
+ }
+
+ index >>= 1;
+
+ break;
+ }
+
+ case 2:
+ {
+ if ((size & 2) != 0 ||
+ ((size & 1) != 0 && s != 0))
+ {
+ Instruction = InstDescriptor.Undefined;
+
+ return;
+ }
+
+ if ((size & 1) != 0)
+ {
+ index >>= 3;
+
+ scale = 3;
+ }
+ else
+ {
+ index >>= 2;
+ }
+
+ break;
+ }
+
+ case 3:
+ {
+ if (l == 0 || s != 0)
+ {
+ Instruction = InstDescriptor.Undefined;
+
+ return;
+ }
+
+ scale = size;
+
+ Replicate = true;
+
+ break;
+ }
+ }
+
+ Index = index;
+ SElems = sElems;
+ Size = scale;
+
+ Extend64 = false;
+
+ WBack = ((opCode >> 23) & 1) != 0;
+
+ RegisterSize = q != 0
+ ? RegisterSize.Simd128
+ : RegisterSize.Simd64;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeSimdReg.cs b/src/ARMeilleure/Decoders/OpCodeSimdReg.cs
new file mode 100644
index 00000000..ac4f71da
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeSimdReg.cs
@@ -0,0 +1,18 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdReg : OpCodeSimd
+ {
+ public bool Bit3 { get; }
+ public int Ra { get; }
+ public int Rm { get; protected set; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdReg(inst, address, opCode);
+
+ public OpCodeSimdReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Bit3 = ((opCode >> 3) & 0x1) != 0;
+ Ra = (opCode >> 10) & 0x1f;
+ Rm = (opCode >> 16) & 0x1f;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeSimdRegElem.cs b/src/ARMeilleure/Decoders/OpCodeSimdRegElem.cs
new file mode 100644
index 00000000..92368dee
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeSimdRegElem.cs
@@ -0,0 +1,31 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdRegElem : OpCodeSimdReg
+ {
+ public int Index { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdRegElem(inst, address, opCode);
+
+ public OpCodeSimdRegElem(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ switch (Size)
+ {
+ case 1:
+ Index = (opCode >> 20) & 3 |
+ (opCode >> 9) & 4;
+
+ Rm &= 0xf;
+
+ break;
+
+ case 2:
+ Index = (opCode >> 21) & 1 |
+ (opCode >> 10) & 2;
+
+ break;
+
+ default: Instruction = InstDescriptor.Undefined; break;
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeSimdRegElemF.cs b/src/ARMeilleure/Decoders/OpCodeSimdRegElemF.cs
new file mode 100644
index 00000000..d46dd57e
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeSimdRegElemF.cs
@@ -0,0 +1,33 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdRegElemF : OpCodeSimdReg
+ {
+ public int Index { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdRegElemF(inst, address, opCode);
+
+ public OpCodeSimdRegElemF(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ switch ((opCode >> 21) & 3) // sz:L
+ {
+ case 0: // H:0
+ Index = (opCode >> 10) & 2; // 0, 2
+
+ break;
+
+ case 1: // H:1
+ Index = (opCode >> 10) & 2;
+ Index++; // 1, 3
+
+ break;
+
+ case 2: // H
+ Index = (opCode >> 11) & 1; // 0, 1
+
+ break;
+
+ default: Instruction = InstDescriptor.Undefined; break;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeSimdShImm.cs b/src/ARMeilleure/Decoders/OpCodeSimdShImm.cs
new file mode 100644
index 00000000..7064f1d2
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeSimdShImm.cs
@@ -0,0 +1,18 @@
+using ARMeilleure.Common;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdShImm : OpCodeSimd
+ {
+ public int Imm { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdShImm(inst, address, opCode);
+
+ public OpCodeSimdShImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Imm = (opCode >> 16) & 0x7f;
+
+ Size = BitUtils.HighestBitSetNibble(Imm >> 3);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeSimdTbl.cs b/src/ARMeilleure/Decoders/OpCodeSimdTbl.cs
new file mode 100644
index 00000000..9c631e48
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeSimdTbl.cs
@@ -0,0 +1,12 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdTbl : OpCodeSimdReg
+ {
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdTbl(inst, address, opCode);
+
+ public OpCodeSimdTbl(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Size = ((opCode >> 13) & 3) + 1;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeSystem.cs b/src/ARMeilleure/Decoders/OpCodeSystem.cs
new file mode 100644
index 00000000..4d79421a
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeSystem.cs
@@ -0,0 +1,24 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSystem : OpCode
+ {
+ public int Rt { get; }
+ public int Op2 { get; }
+ public int CRm { get; }
+ public int CRn { get; }
+ public int Op1 { get; }
+ public int Op0 { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSystem(inst, address, opCode);
+
+ public OpCodeSystem(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rt = (opCode >> 0) & 0x1f;
+ Op2 = (opCode >> 5) & 0x7;
+ CRm = (opCode >> 8) & 0xf;
+ CRn = (opCode >> 12) & 0xf;
+ Op1 = (opCode >> 16) & 0x7;
+ Op0 = ((opCode >> 19) & 0x1) | 2;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeT16.cs b/src/ARMeilleure/Decoders/OpCodeT16.cs
new file mode 100644
index 00000000..9c3d6b00
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16.cs
@@ -0,0 +1,15 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16 : OpCode32
+ {
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16(inst, address, opCode);
+
+ public OpCodeT16(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Cond = Condition.Al;
+
+ IsThumb = true;
+ OpCodeSizeInBytes = 2;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeT16AddSubImm3.cs b/src/ARMeilleure/Decoders/OpCodeT16AddSubImm3.cs
new file mode 100644
index 00000000..95f18054
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16AddSubImm3.cs
@@ -0,0 +1,24 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16AddSubImm3: OpCodeT16, IOpCode32AluImm
+ {
+ public int Rd { get; }
+ public int Rn { get; }
+
+ public bool? SetFlags => null;
+
+ public int Immediate { get; }
+
+ public bool IsRotated { get; }
+
+ public static new OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16AddSubImm3(inst, address, opCode);
+
+ public OpCodeT16AddSubImm3(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = (opCode >> 0) & 0x7;
+ Rn = (opCode >> 3) & 0x7;
+ Immediate = (opCode >> 6) & 0x7;
+ IsRotated = false;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16AddSubReg.cs b/src/ARMeilleure/Decoders/OpCodeT16AddSubReg.cs
new file mode 100644
index 00000000..2a407b2d
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16AddSubReg.cs
@@ -0,0 +1,20 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16AddSubReg : OpCodeT16, IOpCode32AluReg
+ {
+ public int Rm { get; }
+ public int Rd { get; }
+ public int Rn { get; }
+
+ public bool? SetFlags => null;
+
+ public static new OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16AddSubReg(inst, address, opCode);
+
+ public OpCodeT16AddSubReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = (opCode >> 0) & 0x7;
+ Rn = (opCode >> 3) & 0x7;
+ Rm = (opCode >> 6) & 0x7;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16AddSubSp.cs b/src/ARMeilleure/Decoders/OpCodeT16AddSubSp.cs
new file mode 100644
index 00000000..b66fe0cd
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16AddSubSp.cs
@@ -0,0 +1,23 @@
+using ARMeilleure.State;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16AddSubSp : OpCodeT16, IOpCode32AluImm
+ {
+ public int Rd => RegisterAlias.Aarch32Sp;
+ public int Rn => RegisterAlias.Aarch32Sp;
+
+ public bool? SetFlags => false;
+
+ public int Immediate { get; }
+
+ public bool IsRotated => false;
+
+ public static new OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16AddSubSp(inst, address, opCode);
+
+ public OpCodeT16AddSubSp(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Immediate = ((opCode >> 0) & 0x7f) << 2;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16Adr.cs b/src/ARMeilleure/Decoders/OpCodeT16Adr.cs
new file mode 100644
index 00000000..03abd499
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16Adr.cs
@@ -0,0 +1,19 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16Adr : OpCodeT16, IOpCode32Adr
+ {
+ public int Rd { get; }
+
+ public int Immediate { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16Adr(inst, address, opCode);
+
+ public OpCodeT16Adr(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = (opCode >> 8) & 7;
+
+ int imm = (opCode & 0xff) << 2;
+ Immediate = (int)(GetPc() & 0xfffffffc) + imm;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16AluImm8.cs b/src/ARMeilleure/Decoders/OpCodeT16AluImm8.cs
new file mode 100644
index 00000000..673a4604
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16AluImm8.cs
@@ -0,0 +1,24 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16AluImm8 : OpCodeT16, IOpCode32AluImm
+ {
+ public int Rd { get; }
+ public int Rn { get; }
+
+ public bool? SetFlags => null;
+
+ public int Immediate { get; }
+
+ public bool IsRotated { get; }
+
+ public static new OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16AluImm8(inst, address, opCode);
+
+ public OpCodeT16AluImm8(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = (opCode >> 8) & 0x7;
+ Rn = (opCode >> 8) & 0x7;
+ Immediate = (opCode >> 0) & 0xff;
+ IsRotated = false;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16AluImmZero.cs b/src/ARMeilleure/Decoders/OpCodeT16AluImmZero.cs
new file mode 100644
index 00000000..b23f8fe0
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16AluImmZero.cs
@@ -0,0 +1,24 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16AluImmZero : OpCodeT16, IOpCode32AluImm
+ {
+ public int Rd { get; }
+ public int Rn { get; }
+
+ public bool? SetFlags => null;
+
+ public int Immediate { get; }
+
+ public bool IsRotated { get; }
+
+ public static new OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16AluImmZero(inst, address, opCode);
+
+ public OpCodeT16AluImmZero(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = (opCode >> 0) & 0x7;
+ Rn = (opCode >> 3) & 0x7;
+ Immediate = 0;
+ IsRotated = false;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16AluRegHigh.cs b/src/ARMeilleure/Decoders/OpCodeT16AluRegHigh.cs
new file mode 100644
index 00000000..6d5ac8fd
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16AluRegHigh.cs
@@ -0,0 +1,20 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16AluRegHigh : OpCodeT16, IOpCode32AluReg
+ {
+ public int Rm { get; }
+ public int Rd { get; }
+ public int Rn { get; }
+
+ public bool? SetFlags => false;
+
+ public static new OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16AluRegHigh(inst, address, opCode);
+
+ public OpCodeT16AluRegHigh(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = ((opCode >> 0) & 0x7) | ((opCode >> 4) & 0x8);
+ Rn = ((opCode >> 0) & 0x7) | ((opCode >> 4) & 0x8);
+ Rm = (opCode >> 3) & 0xf;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16AluRegLow.cs b/src/ARMeilleure/Decoders/OpCodeT16AluRegLow.cs
new file mode 100644
index 00000000..b37b4f66
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16AluRegLow.cs
@@ -0,0 +1,20 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16AluRegLow : OpCodeT16, IOpCode32AluReg
+ {
+ public int Rm { get; }
+ public int Rd { get; }
+ public int Rn { get; }
+
+ public bool? SetFlags => null;
+
+ public static new OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16AluRegLow(inst, address, opCode);
+
+ public OpCodeT16AluRegLow(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = (opCode >> 0) & 0x7;
+ Rn = (opCode >> 0) & 0x7;
+ Rm = (opCode >> 3) & 0x7;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16AluUx.cs b/src/ARMeilleure/Decoders/OpCodeT16AluUx.cs
new file mode 100644
index 00000000..11d3a8fe
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16AluUx.cs
@@ -0,0 +1,22 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16AluUx : OpCodeT16, IOpCode32AluUx
+ {
+ public int Rm { get; }
+ public int Rd { get; }
+ public int Rn { get; }
+
+ public bool? SetFlags => false;
+
+ public int RotateBits => 0;
+ public bool Add => false;
+
+ public static new OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16AluUx(inst, address, opCode);
+
+ public OpCodeT16AluUx(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = (opCode >> 0) & 0x7;
+ Rm = (opCode >> 3) & 0x7;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16BImm11.cs b/src/ARMeilleure/Decoders/OpCodeT16BImm11.cs
new file mode 100644
index 00000000..f230b20e
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16BImm11.cs
@@ -0,0 +1,15 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16BImm11 : OpCodeT16, IOpCode32BImm
+ {
+ public long Immediate { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16BImm11(inst, address, opCode);
+
+ public OpCodeT16BImm11(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ int imm = (opCode << 21) >> 20;
+ Immediate = GetPc() + imm;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16BImm8.cs b/src/ARMeilleure/Decoders/OpCodeT16BImm8.cs
new file mode 100644
index 00000000..5f684298
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16BImm8.cs
@@ -0,0 +1,17 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16BImm8 : OpCodeT16, IOpCode32BImm
+ {
+ public long Immediate { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16BImm8(inst, address, opCode);
+
+ public OpCodeT16BImm8(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Cond = (Condition)((opCode >> 8) & 0xf);
+
+ int imm = (opCode << 24) >> 23;
+ Immediate = GetPc() + imm;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16BImmCmp.cs b/src/ARMeilleure/Decoders/OpCodeT16BImmCmp.cs
new file mode 100644
index 00000000..68ebac75
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16BImmCmp.cs
@@ -0,0 +1,19 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16BImmCmp : OpCodeT16, IOpCode32BImm
+ {
+ public int Rn { get; }
+
+ public long Immediate { get; }
+
+ public static new OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16BImmCmp(inst, address, opCode);
+
+ public OpCodeT16BImmCmp(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rn = (opCode >> 0) & 0x7;
+
+ int imm = ((opCode >> 2) & 0x3e) | ((opCode >> 3) & 0x40);
+ Immediate = (int)GetPc() + imm;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16BReg.cs b/src/ARMeilleure/Decoders/OpCodeT16BReg.cs
new file mode 100644
index 00000000..3122cd07
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16BReg.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16BReg : OpCodeT16, IOpCode32BReg
+ {
+ public int Rm { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16BReg(inst, address, opCode);
+
+ public OpCodeT16BReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rm = (opCode >> 3) & 0xf;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16Exception.cs b/src/ARMeilleure/Decoders/OpCodeT16Exception.cs
new file mode 100644
index 00000000..bb005083
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16Exception.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16Exception : OpCodeT16, IOpCode32Exception
+ {
+ public int Id { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16Exception(inst, address, opCode);
+
+ public OpCodeT16Exception(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Id = opCode & 0xFF;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16IfThen.cs b/src/ARMeilleure/Decoders/OpCodeT16IfThen.cs
new file mode 100644
index 00000000..8c3de689
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16IfThen.cs
@@ -0,0 +1,33 @@
+using System.Collections.Generic;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16IfThen : OpCodeT16
+ {
+ public Condition[] IfThenBlockConds { get; }
+
+ public int IfThenBlockSize { get { return IfThenBlockConds.Length; } }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16IfThen(inst, address, opCode);
+
+ public OpCodeT16IfThen(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ List<Condition> conds = new();
+
+ int cond = (opCode >> 4) & 0xf;
+ int mask = opCode & 0xf;
+
+ conds.Add((Condition)cond);
+
+ while ((mask & 7) != 0)
+ {
+ int newLsb = (mask >> 3) & 1;
+ cond = (cond & 0xe) | newLsb;
+ mask <<= 1;
+ conds.Add((Condition)cond);
+ }
+
+ IfThenBlockConds = conds.ToArray();
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16MemImm5.cs b/src/ARMeilleure/Decoders/OpCodeT16MemImm5.cs
new file mode 100644
index 00000000..20ef31e2
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16MemImm5.cs
@@ -0,0 +1,58 @@
+using ARMeilleure.Instructions;
+using System;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16MemImm5 : OpCodeT16, IOpCode32Mem
+ {
+ public int Rt { get; }
+ public int Rn { get; }
+
+ public bool WBack => false;
+ public bool IsLoad { get; }
+ public bool Index => true;
+ public bool Add => true;
+
+ public int Immediate { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16MemImm5(inst, address, opCode);
+
+ public OpCodeT16MemImm5(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rt = (opCode >> 0) & 7;
+ Rn = (opCode >> 3) & 7;
+
+ switch (inst.Name)
+ {
+ case InstName.Ldr:
+ case InstName.Ldrb:
+ case InstName.Ldrh:
+ IsLoad = true;
+ break;
+ case InstName.Str:
+ case InstName.Strb:
+ case InstName.Strh:
+ IsLoad = false;
+ break;
+ }
+
+ switch (inst.Name)
+ {
+ case InstName.Str:
+ case InstName.Ldr:
+ Immediate = ((opCode >> 6) & 0x1f) << 2;
+ break;
+ case InstName.Strb:
+ case InstName.Ldrb:
+ Immediate = ((opCode >> 6) & 0x1f);
+ break;
+ case InstName.Strh:
+ case InstName.Ldrh:
+ Immediate = ((opCode >> 6) & 0x1f) << 1;
+ break;
+ default:
+ throw new InvalidOperationException();
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16MemLit.cs b/src/ARMeilleure/Decoders/OpCodeT16MemLit.cs
new file mode 100644
index 00000000..f8c16e29
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16MemLit.cs
@@ -0,0 +1,26 @@
+using ARMeilleure.State;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16MemLit : OpCodeT16, IOpCode32Mem
+ {
+ public int Rt { get; }
+ public int Rn => RegisterAlias.Aarch32Pc;
+
+ public bool WBack => false;
+ public bool IsLoad => true;
+ public bool Index => true;
+ public bool Add => true;
+
+ public int Immediate { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16MemLit(inst, address, opCode);
+
+ public OpCodeT16MemLit(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rt = (opCode >> 8) & 7;
+
+ Immediate = (opCode & 0xff) << 2;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16MemMult.cs b/src/ARMeilleure/Decoders/OpCodeT16MemMult.cs
new file mode 100644
index 00000000..f4185cfc
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16MemMult.cs
@@ -0,0 +1,34 @@
+using ARMeilleure.Instructions;
+using System;
+using System.Numerics;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16MemMult : OpCodeT16, IOpCode32MemMult
+ {
+ public int Rn { get; }
+ public int RegisterMask { get; }
+ public int PostOffset { get; }
+ public bool IsLoad { get; }
+ public int Offset { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16MemMult(inst, address, opCode);
+
+ public OpCodeT16MemMult(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ RegisterMask = opCode & 0xff;
+ Rn = (opCode >> 8) & 7;
+
+ int regCount = BitOperations.PopCount((uint)RegisterMask);
+
+ Offset = 0;
+ PostOffset = 4 * regCount;
+ IsLoad = inst.Name switch
+ {
+ InstName.Ldm => true,
+ InstName.Stm => false,
+ _ => throw new InvalidOperationException()
+ };
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16MemReg.cs b/src/ARMeilleure/Decoders/OpCodeT16MemReg.cs
new file mode 100644
index 00000000..71100112
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16MemReg.cs
@@ -0,0 +1,27 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16MemReg : OpCodeT16, IOpCode32MemReg
+ {
+ public int Rm { get; }
+ public int Rt { get; }
+ public int Rn { get; }
+
+ public bool WBack => false;
+ public bool IsLoad { get; }
+ public bool Index => true;
+ public bool Add => true;
+
+ public int Immediate => throw new System.InvalidOperationException();
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16MemReg(inst, address, opCode);
+
+ public OpCodeT16MemReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rt = (opCode >> 0) & 7;
+ Rn = (opCode >> 3) & 7;
+ Rm = (opCode >> 6) & 7;
+
+ IsLoad = ((opCode >> 9) & 7) >= 3;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16MemSp.cs b/src/ARMeilleure/Decoders/OpCodeT16MemSp.cs
new file mode 100644
index 00000000..a038b915
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16MemSp.cs
@@ -0,0 +1,28 @@
+using ARMeilleure.State;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16MemSp : OpCodeT16, IOpCode32Mem
+ {
+ public int Rt { get; }
+ public int Rn => RegisterAlias.Aarch32Sp;
+
+ public bool WBack => false;
+ public bool IsLoad { get; }
+ public bool Index => true;
+ public bool Add => true;
+
+ public int Immediate { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16MemSp(inst, address, opCode);
+
+ public OpCodeT16MemSp(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rt = (opCode >> 8) & 7;
+
+ IsLoad = ((opCode >> 11) & 1) != 0;
+
+ Immediate = ((opCode >> 0) & 0xff) << 2;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16MemStack.cs b/src/ARMeilleure/Decoders/OpCodeT16MemStack.cs
new file mode 100644
index 00000000..9d7b0d20
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16MemStack.cs
@@ -0,0 +1,42 @@
+using ARMeilleure.Instructions;
+using ARMeilleure.State;
+using System;
+using System.Numerics;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16MemStack : OpCodeT16, IOpCode32MemMult
+ {
+ public int Rn => RegisterAlias.Aarch32Sp;
+ public int RegisterMask { get; }
+ public int PostOffset { get; }
+ public bool IsLoad { get; }
+ public int Offset { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16MemStack(inst, address, opCode);
+
+ public OpCodeT16MemStack(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ int extra = (opCode >> 8) & 1;
+ int regCount = BitOperations.PopCount((uint)opCode & 0x1ff);
+
+ switch (inst.Name)
+ {
+ case InstName.Push:
+ RegisterMask = (opCode & 0xff) | (extra << 14);
+ IsLoad = false;
+ Offset = -4 * regCount;
+ PostOffset = -4 * regCount;
+ break;
+ case InstName.Pop:
+ RegisterMask = (opCode & 0xff) | (extra << 15);
+ IsLoad = true;
+ Offset = 0;
+ PostOffset = 4 * regCount;
+ break;
+ default:
+ throw new InvalidOperationException();
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16ShiftImm.cs b/src/ARMeilleure/Decoders/OpCodeT16ShiftImm.cs
new file mode 100644
index 00000000..a540026e
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16ShiftImm.cs
@@ -0,0 +1,24 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16ShiftImm : OpCodeT16, IOpCode32AluRsImm
+ {
+ public int Rd { get; }
+ public int Rn { get; }
+ public int Rm { get; }
+
+ public int Immediate { get; }
+ public ShiftType ShiftType { get; }
+
+ public bool? SetFlags => null;
+
+ public static new OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16ShiftImm(inst, address, opCode);
+
+ public OpCodeT16ShiftImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = (opCode >> 0) & 0x7;
+ Rm = (opCode >> 3) & 0x7;
+ Immediate = (opCode >> 6) & 0x1F;
+ ShiftType = (ShiftType)((opCode >> 11) & 3);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16ShiftReg.cs b/src/ARMeilleure/Decoders/OpCodeT16ShiftReg.cs
new file mode 100644
index 00000000..9f898281
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16ShiftReg.cs
@@ -0,0 +1,27 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16ShiftReg : OpCodeT16, IOpCode32AluRsReg
+ {
+ public int Rm { get; }
+ public int Rs { get; }
+ public int Rd { get; }
+
+ public int Rn { get; }
+
+ public ShiftType ShiftType { get; }
+
+ public bool? SetFlags => null;
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16ShiftReg(inst, address, opCode);
+
+ public OpCodeT16ShiftReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = (opCode >> 0) & 7;
+ Rm = (opCode >> 0) & 7;
+ Rn = (opCode >> 3) & 7;
+ Rs = (opCode >> 3) & 7;
+
+ ShiftType = (ShiftType)(((opCode >> 6) & 1) | ((opCode >> 7) & 2));
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16SpRel.cs b/src/ARMeilleure/Decoders/OpCodeT16SpRel.cs
new file mode 100644
index 00000000..d737f5bd
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16SpRel.cs
@@ -0,0 +1,24 @@
+using ARMeilleure.State;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16SpRel : OpCodeT16, IOpCode32AluImm
+ {
+ public int Rd { get; }
+ public int Rn => RegisterAlias.Aarch32Sp;
+
+ public bool? SetFlags => false;
+
+ public int Immediate { get; }
+
+ public bool IsRotated => false;
+
+ public static new OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16SpRel(inst, address, opCode);
+
+ public OpCodeT16SpRel(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = (opCode >> 8) & 0x7;
+ Immediate = ((opCode >> 0) & 0xff) << 2;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT32.cs b/src/ARMeilleure/Decoders/OpCodeT32.cs
new file mode 100644
index 00000000..cf43d429
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT32.cs
@@ -0,0 +1,15 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT32 : OpCode32
+ {
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32(inst, address, opCode);
+
+ public OpCodeT32(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Cond = Condition.Al;
+
+ IsThumb = true;
+ OpCodeSizeInBytes = 4;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeT32Alu.cs b/src/ARMeilleure/Decoders/OpCodeT32Alu.cs
new file mode 100644
index 00000000..a81b3b3d
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT32Alu.cs
@@ -0,0 +1,20 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT32Alu : OpCodeT32, IOpCode32Alu
+ {
+ public int Rd { get; }
+ public int Rn { get; }
+
+ public bool? SetFlags { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32Alu(inst, address, opCode);
+
+ public OpCodeT32Alu(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = (opCode >> 8) & 0xf;
+ Rn = (opCode >> 16) & 0xf;
+
+ SetFlags = ((opCode >> 20) & 1) != 0;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeT32AluBf.cs b/src/ARMeilleure/Decoders/OpCodeT32AluBf.cs
new file mode 100644
index 00000000..57ad422f
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT32AluBf.cs
@@ -0,0 +1,22 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT32AluBf : OpCodeT32, IOpCode32AluBf
+ {
+ public int Rd { get; }
+ public int Rn { get; }
+
+ public int Msb { get; }
+ public int Lsb { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32AluBf(inst, address, opCode);
+
+ public OpCodeT32AluBf(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = (opCode >> 8) & 0xf;
+ Rn = (opCode >> 16) & 0xf;
+
+ Msb = (opCode >> 0) & 0x1f;
+ Lsb = ((opCode >> 6) & 0x3) | ((opCode >> 10) & 0x1c);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT32AluImm.cs b/src/ARMeilleure/Decoders/OpCodeT32AluImm.cs
new file mode 100644
index 00000000..0895c29b
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT32AluImm.cs
@@ -0,0 +1,38 @@
+using ARMeilleure.Common;
+using System.Runtime.Intrinsics;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT32AluImm : OpCodeT32Alu, IOpCode32AluImm
+ {
+ public int Immediate { get; }
+
+ public bool IsRotated { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32AluImm(inst, address, opCode);
+
+ private static readonly Vector128<int> _factor = Vector128.Create(1, 0x00010001, 0x01000100, 0x01010101);
+
+ public OpCodeT32AluImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ int imm8 = (opCode >> 0) & 0xff;
+ int imm3 = (opCode >> 12) & 7;
+ int imm1 = (opCode >> 26) & 1;
+
+ int imm12 = imm8 | (imm3 << 8) | (imm1 << 11);
+
+ if ((imm12 >> 10) == 0)
+ {
+ Immediate = imm8 * _factor.GetElement((imm12 >> 8) & 3);
+ IsRotated = false;
+ }
+ else
+ {
+ int shift = imm12 >> 7;
+
+ Immediate = BitUtils.RotateRight(0x80 | (imm12 & 0x7f), shift, 32);
+ IsRotated = shift != 0;
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeT32AluImm12.cs b/src/ARMeilleure/Decoders/OpCodeT32AluImm12.cs
new file mode 100644
index 00000000..31de63dd
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT32AluImm12.cs
@@ -0,0 +1,16 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT32AluImm12 : OpCodeT32Alu, IOpCode32AluImm
+ {
+ public int Immediate { get; }
+
+ public bool IsRotated => false;
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32AluImm12(inst, address, opCode);
+
+ public OpCodeT32AluImm12(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Immediate = (opCode & 0xff) | ((opCode >> 4) & 0x700) | ((opCode >> 15) & 0x800);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeT32AluMla.cs b/src/ARMeilleure/Decoders/OpCodeT32AluMla.cs
new file mode 100644
index 00000000..6cb604da
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT32AluMla.cs
@@ -0,0 +1,29 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT32AluMla : OpCodeT32, IOpCode32AluMla
+ {
+ public int Rn { get; }
+ public int Rm { get; }
+ public int Ra { get; }
+ public int Rd { get; }
+
+ public bool NHigh { get; }
+ public bool MHigh { get; }
+ public bool R { get; }
+ public bool? SetFlags => false;
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32AluMla(inst, address, opCode);
+
+ public OpCodeT32AluMla(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rm = (opCode >> 0) & 0xf;
+ Rd = (opCode >> 8) & 0xf;
+ Ra = (opCode >> 12) & 0xf;
+ Rn = (opCode >> 16) & 0xf;
+ R = (opCode & (1 << 4)) != 0;
+
+ MHigh = ((opCode >> 4) & 0x1) == 1;
+ NHigh = ((opCode >> 5) & 0x1) == 1;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT32AluReg.cs b/src/ARMeilleure/Decoders/OpCodeT32AluReg.cs
new file mode 100644
index 00000000..a487f55a
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT32AluReg.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT32AluReg : OpCodeT32Alu, IOpCode32AluReg
+ {
+ public int Rm { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32AluReg(inst, address, opCode);
+
+ public OpCodeT32AluReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rm = (opCode >> 0) & 0xf;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeT32AluRsImm.cs b/src/ARMeilleure/Decoders/OpCodeT32AluRsImm.cs
new file mode 100644
index 00000000..1c9ba7a2
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT32AluRsImm.cs
@@ -0,0 +1,20 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT32AluRsImm : OpCodeT32Alu, IOpCode32AluRsImm
+ {
+ public int Rm { get; }
+ public int Immediate { get; }
+
+ public ShiftType ShiftType { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32AluRsImm(inst, address, opCode);
+
+ public OpCodeT32AluRsImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rm = (opCode >> 0) & 0xf;
+ Immediate = ((opCode >> 6) & 3) | ((opCode >> 10) & 0x1c);
+
+ ShiftType = (ShiftType)((opCode >> 4) & 3);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeT32AluUmull.cs b/src/ARMeilleure/Decoders/OpCodeT32AluUmull.cs
new file mode 100644
index 00000000..a1b2e612
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT32AluUmull.cs
@@ -0,0 +1,28 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT32AluUmull : OpCodeT32, IOpCode32AluUmull
+ {
+ public int RdLo { get; }
+ public int RdHi { get; }
+ public int Rn { get; }
+ public int Rm { get; }
+
+ public bool NHigh { get; }
+ public bool MHigh { get; }
+
+ public bool? SetFlags => false;
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32AluUmull(inst, address, opCode);
+
+ public OpCodeT32AluUmull(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rm = (opCode >> 0) & 0xf;
+ RdHi = (opCode >> 8) & 0xf;
+ RdLo = (opCode >> 12) & 0xf;
+ Rn = (opCode >> 16) & 0xf;
+
+ MHigh = ((opCode >> 4) & 0x1) == 1;
+ NHigh = ((opCode >> 5) & 0x1) == 1;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT32AluUx.cs b/src/ARMeilleure/Decoders/OpCodeT32AluUx.cs
new file mode 100644
index 00000000..861dc904
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT32AluUx.cs
@@ -0,0 +1,18 @@
+using ARMeilleure.State;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT32AluUx : OpCodeT32AluReg, IOpCode32AluUx
+ {
+ public int Rotate { get; }
+ public int RotateBits => Rotate * 8;
+ public bool Add => Rn != RegisterAlias.Aarch32Pc;
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32AluUx(inst, address, opCode);
+
+ public OpCodeT32AluUx(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rotate = (opCode >> 4) & 0x3;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT32BImm20.cs b/src/ARMeilleure/Decoders/OpCodeT32BImm20.cs
new file mode 100644
index 00000000..b6da8abd
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT32BImm20.cs
@@ -0,0 +1,27 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT32BImm20 : OpCodeT32, IOpCode32BImm
+ {
+ public long Immediate { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32BImm20(inst, address, opCode);
+
+ public OpCodeT32BImm20(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ uint pc = GetPc();
+
+ int imm11 = (opCode >> 0) & 0x7ff;
+ int j2 = (opCode >> 11) & 1;
+ int j1 = (opCode >> 13) & 1;
+ int imm6 = (opCode >> 16) & 0x3f;
+ int s = (opCode >> 26) & 1;
+
+ int imm32 = imm11 | (imm6 << 11) | (j1 << 17) | (j2 << 18) | (s << 19);
+ imm32 = (imm32 << 13) >> 12;
+
+ Immediate = pc + imm32;
+
+ Cond = (Condition)((opCode >> 22) & 0xf);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeT32BImm24.cs b/src/ARMeilleure/Decoders/OpCodeT32BImm24.cs
new file mode 100644
index 00000000..774ec3a6
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT32BImm24.cs
@@ -0,0 +1,35 @@
+using ARMeilleure.Instructions;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT32BImm24 : OpCodeT32, IOpCode32BImm
+ {
+ public long Immediate { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32BImm24(inst, address, opCode);
+
+ public OpCodeT32BImm24(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ uint pc = GetPc();
+
+ if (inst.Name == InstName.Blx)
+ {
+ pc &= ~3u;
+ }
+
+ int imm11 = (opCode >> 0) & 0x7ff;
+ int j2 = (opCode >> 11) & 1;
+ int j1 = (opCode >> 13) & 1;
+ int imm10 = (opCode >> 16) & 0x3ff;
+ int s = (opCode >> 26) & 1;
+
+ int i1 = j1 ^ s ^ 1;
+ int i2 = j2 ^ s ^ 1;
+
+ int imm32 = imm11 | (imm10 << 11) | (i2 << 21) | (i1 << 22) | (s << 23);
+ imm32 = (imm32 << 8) >> 7;
+
+ Immediate = pc + imm32;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeT32MemImm12.cs b/src/ARMeilleure/Decoders/OpCodeT32MemImm12.cs
new file mode 100644
index 00000000..7838604b
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT32MemImm12.cs
@@ -0,0 +1,25 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT32MemImm12 : OpCodeT32, IOpCode32Mem
+ {
+ public int Rt { get; }
+ public int Rn { get; }
+ public bool WBack => false;
+ public bool IsLoad { get; }
+ public bool Index => true;
+ public bool Add => true;
+ public int Immediate { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32MemImm12(inst, address, opCode);
+
+ public OpCodeT32MemImm12(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rt = (opCode >> 12) & 0xf;
+ Rn = (opCode >> 16) & 0xf;
+
+ Immediate = opCode & 0xfff;
+
+ IsLoad = ((opCode >> 20) & 1) != 0;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeT32MemImm8.cs b/src/ARMeilleure/Decoders/OpCodeT32MemImm8.cs
new file mode 100644
index 00000000..d8b7763c
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT32MemImm8.cs
@@ -0,0 +1,29 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT32MemImm8 : OpCodeT32, IOpCode32Mem
+ {
+ public int Rt { get; }
+ public int Rn { get; }
+ public bool WBack { get; }
+ public bool IsLoad { get; }
+ public bool Index { get; }
+ public bool Add { get; }
+ public int Immediate { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32MemImm8(inst, address, opCode);
+
+ public OpCodeT32MemImm8(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rt = (opCode >> 12) & 0xf;
+ Rn = (opCode >> 16) & 0xf;
+
+ Index = ((opCode >> 10) & 1) != 0;
+ Add = ((opCode >> 9) & 1) != 0;
+ WBack = ((opCode >> 8) & 1) != 0;
+
+ Immediate = opCode & 0xff;
+
+ IsLoad = ((opCode >> 20) & 1) != 0;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeT32MemImm8D.cs b/src/ARMeilleure/Decoders/OpCodeT32MemImm8D.cs
new file mode 100644
index 00000000..7a078c48
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT32MemImm8D.cs
@@ -0,0 +1,31 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT32MemImm8D : OpCodeT32, IOpCode32Mem
+ {
+ public int Rt { get; }
+ public int Rt2 { get; }
+ public int Rn { get; }
+ public bool WBack { get; }
+ public bool IsLoad { get; }
+ public bool Index { get; }
+ public bool Add { get; }
+ public int Immediate { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32MemImm8D(inst, address, opCode);
+
+ public OpCodeT32MemImm8D(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rt2 = (opCode >> 8) & 0xf;
+ Rt = (opCode >> 12) & 0xf;
+ Rn = (opCode >> 16) & 0xf;
+
+ Index = ((opCode >> 24) & 1) != 0;
+ Add = ((opCode >> 23) & 1) != 0;
+ WBack = ((opCode >> 21) & 1) != 0;
+
+ Immediate = (opCode & 0xff) << 2;
+
+ IsLoad = ((opCode >> 20) & 1) != 0;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeT32MemLdEx.cs b/src/ARMeilleure/Decoders/OpCodeT32MemLdEx.cs
new file mode 100644
index 00000000..c8eb36b3
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT32MemLdEx.cs
@@ -0,0 +1,26 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT32MemLdEx : OpCodeT32, IOpCode32MemEx
+ {
+ public int Rd => 0;
+ public int Rt { get; }
+ public int Rt2 { get; }
+ public int Rn { get; }
+
+ public bool WBack => false;
+ public bool IsLoad => true;
+ public bool Index => false;
+ public bool Add => false;
+
+ public int Immediate => 0;
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32MemLdEx(inst, address, opCode);
+
+ public OpCodeT32MemLdEx(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rt2 = (opCode >> 8) & 0xf;
+ Rt = (opCode >> 12) & 0xf;
+ Rn = (opCode >> 16) & 0xf;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT32MemMult.cs b/src/ARMeilleure/Decoders/OpCodeT32MemMult.cs
new file mode 100644
index 00000000..a9ba306d
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT32MemMult.cs
@@ -0,0 +1,52 @@
+using System.Numerics;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT32MemMult : OpCodeT32, IOpCode32MemMult
+ {
+ public int Rn { get; }
+
+ public int RegisterMask { get; }
+ public int Offset { get; }
+ public int PostOffset { get; }
+
+ public bool IsLoad { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32MemMult(inst, address, opCode);
+
+ public OpCodeT32MemMult(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rn = (opCode >> 16) & 0xf;
+
+ bool isLoad = (opCode & (1 << 20)) != 0;
+ bool w = (opCode & (1 << 21)) != 0;
+ bool u = (opCode & (1 << 23)) != 0;
+ bool p = (opCode & (1 << 24)) != 0;
+
+ RegisterMask = opCode & 0xffff;
+
+ int regsSize = BitOperations.PopCount((uint)RegisterMask) * 4;
+
+ if (!u)
+ {
+ Offset -= regsSize;
+ }
+
+ if (u == p)
+ {
+ Offset += 4;
+ }
+
+ if (w)
+ {
+ PostOffset = u ? regsSize : -regsSize;
+ }
+ else
+ {
+ PostOffset = 0;
+ }
+
+ IsLoad = isLoad;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeT32MemRsImm.cs b/src/ARMeilleure/Decoders/OpCodeT32MemRsImm.cs
new file mode 100644
index 00000000..056d3b46
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT32MemRsImm.cs
@@ -0,0 +1,30 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT32MemRsImm : OpCodeT32, IOpCode32MemRsImm
+ {
+ public int Rt { get; }
+ public int Rn { get; }
+ public int Rm { get; }
+ public ShiftType ShiftType => ShiftType.Lsl;
+
+ public bool WBack => false;
+ public bool IsLoad { get; }
+ public bool Index => true;
+ public bool Add => true;
+
+ public int Immediate { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32MemRsImm(inst, address, opCode);
+
+ public OpCodeT32MemRsImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rm = (opCode >> 0) & 0xf;
+ Rt = (opCode >> 12) & 0xf;
+ Rn = (opCode >> 16) & 0xf;
+
+ IsLoad = (opCode & (1 << 20)) != 0;
+
+ Immediate = (opCode >> 4) & 3;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT32MemStEx.cs b/src/ARMeilleure/Decoders/OpCodeT32MemStEx.cs
new file mode 100644
index 00000000..6a0a6bb1
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT32MemStEx.cs
@@ -0,0 +1,27 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT32MemStEx : OpCodeT32, IOpCode32MemEx
+ {
+ public int Rd { get; }
+ public int Rt { get; }
+ public int Rt2 { get; }
+ public int Rn { get; }
+
+ public bool WBack => false;
+ public bool IsLoad => false;
+ public bool Index => false;
+ public bool Add => false;
+
+ public int Immediate => 0;
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32MemStEx(inst, address, opCode);
+
+ public OpCodeT32MemStEx(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = (opCode >> 0) & 0xf;
+ Rt2 = (opCode >> 8) & 0xf;
+ Rt = (opCode >> 12) & 0xf;
+ Rn = (opCode >> 16) & 0xf;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT32MovImm16.cs b/src/ARMeilleure/Decoders/OpCodeT32MovImm16.cs
new file mode 100644
index 00000000..5161892b
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT32MovImm16.cs
@@ -0,0 +1,16 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT32MovImm16 : OpCodeT32Alu, IOpCode32AluImm16
+ {
+ public int Immediate { get; }
+
+ public bool IsRotated => false;
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32MovImm16(inst, address, opCode);
+
+ public OpCodeT32MovImm16(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Immediate = (opCode & 0xff) | ((opCode >> 4) & 0x700) | ((opCode >> 15) & 0x800) | ((opCode >> 4) & 0xf000);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeT32ShiftReg.cs b/src/ARMeilleure/Decoders/OpCodeT32ShiftReg.cs
new file mode 100644
index 00000000..36055975
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT32ShiftReg.cs
@@ -0,0 +1,19 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT32ShiftReg : OpCodeT32Alu, IOpCode32AluRsReg
+ {
+ public int Rm => Rn;
+ public int Rs { get; }
+
+ public ShiftType ShiftType { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32ShiftReg(inst, address, opCode);
+
+ public OpCodeT32ShiftReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rs = (opCode >> 0) & 0xf;
+
+ ShiftType = (ShiftType)((opCode >> 21) & 3);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT32Tb.cs b/src/ARMeilleure/Decoders/OpCodeT32Tb.cs
new file mode 100644
index 00000000..527754b1
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT32Tb.cs
@@ -0,0 +1,16 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT32Tb : OpCodeT32, IOpCode32BReg
+ {
+ public int Rm { get; }
+ public int Rn { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32Tb(inst, address, opCode);
+
+ public OpCodeT32Tb(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rm = (opCode >> 0) & 0xf;
+ Rn = (opCode >> 16) & 0xf;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/OpCodeTable.cs b/src/ARMeilleure/Decoders/OpCodeTable.cs
new file mode 100644
index 00000000..4f359958
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeTable.cs
@@ -0,0 +1,1509 @@
+using ARMeilleure.Instructions;
+using System;
+using System.Collections.Generic;
+using System.Numerics;
+
+namespace ARMeilleure.Decoders
+{
+ static class OpCodeTable
+ {
+ public delegate OpCode MakeOp(InstDescriptor inst, ulong address, int opCode);
+
+ private const int FastLookupSize = 0x1000;
+
+ private readonly struct InstInfo
+ {
+ public int Mask { get; }
+ public int Value { get; }
+
+ public InstDescriptor Inst { get; }
+
+ public MakeOp MakeOp { get; }
+
+ public InstInfo(int mask, int value, InstDescriptor inst, MakeOp makeOp)
+ {
+ Mask = mask;
+ Value = value;
+ Inst = inst;
+ MakeOp = makeOp;
+ }
+ }
+
+ private static List<InstInfo> AllInstA32 = new();
+ private static List<InstInfo> AllInstT32 = new();
+ private static List<InstInfo> AllInstA64 = new();
+
+ private static InstInfo[][] InstA32FastLookup = new InstInfo[FastLookupSize][];
+ private static InstInfo[][] InstT32FastLookup = new InstInfo[FastLookupSize][];
+ private static InstInfo[][] InstA64FastLookup = new InstInfo[FastLookupSize][];
+
+ static OpCodeTable()
+ {
+#region "OpCode Table (AArch64)"
+ // Base
+ SetA64("x0011010000xxxxx000000xxxxxxxxxx", InstName.Adc, InstEmit.Adc, OpCodeAluRs.Create);
+ SetA64("x0111010000xxxxx000000xxxxxxxxxx", InstName.Adcs, InstEmit.Adcs, OpCodeAluRs.Create);
+ SetA64("x00100010xxxxxxxxxxxxxxxxxxxxxxx", InstName.Add, InstEmit.Add, OpCodeAluImm.Create);
+ SetA64("00001011<<0xxxxx0xxxxxxxxxxxxxxx", InstName.Add, InstEmit.Add, OpCodeAluRs.Create);
+ SetA64("10001011<<0xxxxxxxxxxxxxxxxxxxxx", InstName.Add, InstEmit.Add, OpCodeAluRs.Create);
+ SetA64("x0001011001xxxxxxxx0xxxxxxxxxxxx", InstName.Add, InstEmit.Add, OpCodeAluRx.Create);
+ SetA64("x0001011001xxxxxxxx100xxxxxxxxxx", InstName.Add, InstEmit.Add, OpCodeAluRx.Create);
+ SetA64("x01100010xxxxxxxxxxxxxxxxxxxxxxx", InstName.Adds, InstEmit.Adds, OpCodeAluImm.Create);
+ SetA64("00101011<<0xxxxx0xxxxxxxxxxxxxxx", InstName.Adds, InstEmit.Adds, OpCodeAluRs.Create);
+ SetA64("10101011<<0xxxxxxxxxxxxxxxxxxxxx", InstName.Adds, InstEmit.Adds, OpCodeAluRs.Create);
+ SetA64("x0101011001xxxxxxxx0xxxxxxxxxxxx", InstName.Adds, InstEmit.Adds, OpCodeAluRx.Create);
+ SetA64("x0101011001xxxxxxxx100xxxxxxxxxx", InstName.Adds, InstEmit.Adds, OpCodeAluRx.Create);
+ SetA64("0xx10000xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Adr, InstEmit.Adr, OpCodeAdr.Create);
+ SetA64("1xx10000xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Adrp, InstEmit.Adrp, OpCodeAdr.Create);
+ SetA64("0001001000xxxxxxxxxxxxxxxxxxxxxx", InstName.And, InstEmit.And, OpCodeAluImm.Create);
+ SetA64("100100100xxxxxxxxxxxxxxxxxxxxxxx", InstName.And, InstEmit.And, OpCodeAluImm.Create);
+ SetA64("00001010xx0xxxxx0xxxxxxxxxxxxxxx", InstName.And, InstEmit.And, OpCodeAluRs.Create);
+ SetA64("10001010xx0xxxxxxxxxxxxxxxxxxxxx", InstName.And, InstEmit.And, OpCodeAluRs.Create);
+ SetA64("0111001000xxxxxxxxxxxxxxxxxxxxxx", InstName.Ands, InstEmit.Ands, OpCodeAluImm.Create);
+ SetA64("111100100xxxxxxxxxxxxxxxxxxxxxxx", InstName.Ands, InstEmit.Ands, OpCodeAluImm.Create);
+ SetA64("01101010xx0xxxxx0xxxxxxxxxxxxxxx", InstName.Ands, InstEmit.Ands, OpCodeAluRs.Create);
+ SetA64("11101010xx0xxxxxxxxxxxxxxxxxxxxx", InstName.Ands, InstEmit.Ands, OpCodeAluRs.Create);
+ SetA64("x0011010110xxxxx001010xxxxxxxxxx", InstName.Asrv, InstEmit.Asrv, OpCodeAluRs.Create);
+ SetA64("000101xxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.B, InstEmit.B, OpCodeBImmAl.Create);
+ SetA64("01010100xxxxxxxxxxxxxxxxxxx0xxxx", InstName.B_Cond, InstEmit.B_Cond, OpCodeBImmCond.Create);
+ SetA64("00110011000xxxxx0xxxxxxxxxxxxxxx", InstName.Bfm, InstEmit.Bfm, OpCodeBfm.Create);
+ SetA64("1011001101xxxxxxxxxxxxxxxxxxxxxx", InstName.Bfm, InstEmit.Bfm, OpCodeBfm.Create);
+ SetA64("00001010xx1xxxxx0xxxxxxxxxxxxxxx", InstName.Bic, InstEmit.Bic, OpCodeAluRs.Create);
+ SetA64("10001010xx1xxxxxxxxxxxxxxxxxxxxx", InstName.Bic, InstEmit.Bic, OpCodeAluRs.Create);
+ SetA64("01101010xx1xxxxx0xxxxxxxxxxxxxxx", InstName.Bics, InstEmit.Bics, OpCodeAluRs.Create);
+ SetA64("11101010xx1xxxxxxxxxxxxxxxxxxxxx", InstName.Bics, InstEmit.Bics, OpCodeAluRs.Create);
+ SetA64("100101xxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Bl, InstEmit.Bl, OpCodeBImmAl.Create);
+ SetA64("1101011000111111000000xxxxx00000", InstName.Blr, InstEmit.Blr, OpCodeBReg.Create);
+ SetA64("1101011000011111000000xxxxx00000", InstName.Br, InstEmit.Br, OpCodeBReg.Create);
+ SetA64("11010100001xxxxxxxxxxxxxxxx00000", InstName.Brk, InstEmit.Brk, OpCodeException.Create);
+ SetA64("x0110101xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cbnz, InstEmit.Cbnz, OpCodeBImmCmp.Create);
+ SetA64("x0110100xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cbz, InstEmit.Cbz, OpCodeBImmCmp.Create);
+ SetA64("x0111010010xxxxxxxxx10xxxxx0xxxx", InstName.Ccmn, InstEmit.Ccmn, OpCodeCcmpImm.Create);
+ SetA64("x0111010010xxxxxxxxx00xxxxx0xxxx", InstName.Ccmn, InstEmit.Ccmn, OpCodeCcmpReg.Create);
+ SetA64("x1111010010xxxxxxxxx10xxxxx0xxxx", InstName.Ccmp, InstEmit.Ccmp, OpCodeCcmpImm.Create);
+ SetA64("x1111010010xxxxxxxxx00xxxxx0xxxx", InstName.Ccmp, InstEmit.Ccmp, OpCodeCcmpReg.Create);
+ SetA64("11010101000000110011xxxx01011111", InstName.Clrex, InstEmit.Clrex, OpCodeSystem.Create);
+ SetA64("x101101011000000000101xxxxxxxxxx", InstName.Cls, InstEmit.Cls, OpCodeAlu.Create);
+ SetA64("x101101011000000000100xxxxxxxxxx", InstName.Clz, InstEmit.Clz, OpCodeAlu.Create);
+ SetA64("00011010110xxxxx010000xxxxxxxxxx", InstName.Crc32b, InstEmit.Crc32b, OpCodeAluBinary.Create);
+ SetA64("00011010110xxxxx010001xxxxxxxxxx", InstName.Crc32h, InstEmit.Crc32h, OpCodeAluBinary.Create);
+ SetA64("00011010110xxxxx010010xxxxxxxxxx", InstName.Crc32w, InstEmit.Crc32w, OpCodeAluBinary.Create);
+ SetA64("10011010110xxxxx010011xxxxxxxxxx", InstName.Crc32x, InstEmit.Crc32x, OpCodeAluBinary.Create);
+ SetA64("00011010110xxxxx010100xxxxxxxxxx", InstName.Crc32cb, InstEmit.Crc32cb, OpCodeAluBinary.Create);
+ SetA64("00011010110xxxxx010101xxxxxxxxxx", InstName.Crc32ch, InstEmit.Crc32ch, OpCodeAluBinary.Create);
+ SetA64("00011010110xxxxx010110xxxxxxxxxx", InstName.Crc32cw, InstEmit.Crc32cw, OpCodeAluBinary.Create);
+ SetA64("10011010110xxxxx010111xxxxxxxxxx", InstName.Crc32cx, InstEmit.Crc32cx, OpCodeAluBinary.Create);
+ SetA64("11010101000000110010001010011111", InstName.Csdb, InstEmit.Csdb, OpCodeSystem.Create);
+ SetA64("x0011010100xxxxxxxxx00xxxxxxxxxx", InstName.Csel, InstEmit.Csel, OpCodeCsel.Create);
+ SetA64("x0011010100xxxxxxxxx01xxxxxxxxxx", InstName.Csinc, InstEmit.Csinc, OpCodeCsel.Create);
+ SetA64("x1011010100xxxxxxxxx00xxxxxxxxxx", InstName.Csinv, InstEmit.Csinv, OpCodeCsel.Create);
+ SetA64("x1011010100xxxxxxxxx01xxxxxxxxxx", InstName.Csneg, InstEmit.Csneg, OpCodeCsel.Create);
+ SetA64("11010101000000110011xxxx10111111", InstName.Dmb, InstEmit.Dmb, OpCodeSystem.Create);
+ SetA64("11010101000000110011xxxx10011111", InstName.Dsb, InstEmit.Dsb, OpCodeSystem.Create);
+ SetA64("01001010xx1xxxxx0xxxxxxxxxxxxxxx", InstName.Eon, InstEmit.Eon, OpCodeAluRs.Create);
+ SetA64("11001010xx1xxxxxxxxxxxxxxxxxxxxx", InstName.Eon, InstEmit.Eon, OpCodeAluRs.Create);
+ SetA64("0101001000xxxxxxxxxxxxxxxxxxxxxx", InstName.Eor, InstEmit.Eor, OpCodeAluImm.Create);
+ SetA64("110100100xxxxxxxxxxxxxxxxxxxxxxx", InstName.Eor, InstEmit.Eor, OpCodeAluImm.Create);
+ SetA64("01001010xx0xxxxx0xxxxxxxxxxxxxxx", InstName.Eor, InstEmit.Eor, OpCodeAluRs.Create);
+ SetA64("11001010xx0xxxxxxxxxxxxxxxxxxxxx", InstName.Eor, InstEmit.Eor, OpCodeAluRs.Create);
+ SetA64("00010011100xxxxx0xxxxxxxxxxxxxxx", InstName.Extr, InstEmit.Extr, OpCodeAluRs.Create);
+ SetA64("10010011110xxxxxxxxxxxxxxxxxxxxx", InstName.Extr, InstEmit.Extr, OpCodeAluRs.Create);
+ SetA64("11010101000000110010000011011111", InstName.Hint, InstEmit.Nop, OpCodeSystem.Create); // Reserved Hint
+ SetA64("11010101000000110010000011111111", InstName.Hint, InstEmit.Nop, OpCodeSystem.Create); // Reserved Hint
+ SetA64("110101010000001100100001xxx11111", InstName.Hint, InstEmit.Nop, OpCodeSystem.Create); // Reserved Hint
+ SetA64("1101010100000011001000100xx11111", InstName.Hint, InstEmit.Nop, OpCodeSystem.Create); // Reserved Hint
+ SetA64("1101010100000011001000101>>11111", InstName.Hint, InstEmit.Nop, OpCodeSystem.Create); // Reserved Hint
+ SetA64("110101010000001100100011xxx11111", InstName.Hint, InstEmit.Nop, OpCodeSystem.Create); // Reserved Hint
+ SetA64("11010101000000110010>>xxxxx11111", InstName.Hint, InstEmit.Nop, OpCodeSystem.Create); // Reserved Hint
+ SetA64("11010101000000110011xxxx11011111", InstName.Isb, InstEmit.Isb, OpCodeSystem.Create);
+ SetA64("xx001000110xxxxx1xxxxxxxxxxxxxxx", InstName.Ldar, InstEmit.Ldar, OpCodeMemEx.Create);
+ SetA64("1x001000011xxxxx1xxxxxxxxxxxxxxx", InstName.Ldaxp, InstEmit.Ldaxp, OpCodeMemEx.Create);
+ SetA64("xx001000010xxxxx1xxxxxxxxxxxxxxx", InstName.Ldaxr, InstEmit.Ldaxr, OpCodeMemEx.Create);
+ SetA64("<<10100xx1xxxxxxxxxxxxxxxxxxxxxx", InstName.Ldp, InstEmit.Ldp, OpCodeMemPair.Create);
+ SetA64("xx111000010xxxxxxxxxxxxxxxxxxxxx", InstName.Ldr, InstEmit.Ldr, OpCodeMemImm.Create);
+ SetA64("xx11100101xxxxxxxxxxxxxxxxxxxxxx", InstName.Ldr, InstEmit.Ldr, OpCodeMemImm.Create);
+ SetA64("xx111000011xxxxxxxxx10xxxxxxxxxx", InstName.Ldr, InstEmit.Ldr, OpCodeMemReg.Create);
+ SetA64("xx011000xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ldr_Literal, InstEmit.Ldr_Literal, OpCodeMemLit.Create);
+ SetA64("0x1110001x0xxxxxxxxxxxxxxxxxxxxx", InstName.Ldrs, InstEmit.Ldrs, OpCodeMemImm.Create);
+ SetA64("0x1110011xxxxxxxxxxxxxxxxxxxxxxx", InstName.Ldrs, InstEmit.Ldrs, OpCodeMemImm.Create);
+ SetA64("10111000100xxxxxxxxxxxxxxxxxxxxx", InstName.Ldrs, InstEmit.Ldrs, OpCodeMemImm.Create);
+ SetA64("1011100110xxxxxxxxxxxxxxxxxxxxxx", InstName.Ldrs, InstEmit.Ldrs, OpCodeMemImm.Create);
+ SetA64("0x1110001x1xxxxxxxxx10xxxxxxxxxx", InstName.Ldrs, InstEmit.Ldrs, OpCodeMemReg.Create);
+ SetA64("10111000101xxxxxxxxx10xxxxxxxxxx", InstName.Ldrs, InstEmit.Ldrs, OpCodeMemReg.Create);
+ SetA64("xx001000010xxxxx0xxxxxxxxxxxxxxx", InstName.Ldxr, InstEmit.Ldxr, OpCodeMemEx.Create);
+ SetA64("1x001000011xxxxx0xxxxxxxxxxxxxxx", InstName.Ldxp, InstEmit.Ldxp, OpCodeMemEx.Create);
+ SetA64("x0011010110xxxxx001000xxxxxxxxxx", InstName.Lslv, InstEmit.Lslv, OpCodeAluRs.Create);
+ SetA64("x0011010110xxxxx001001xxxxxxxxxx", InstName.Lsrv, InstEmit.Lsrv, OpCodeAluRs.Create);
+ SetA64("x0011011000xxxxx0xxxxxxxxxxxxxxx", InstName.Madd, InstEmit.Madd, OpCodeMul.Create);
+ SetA64("0111001010xxxxxxxxxxxxxxxxxxxxxx", InstName.Movk, InstEmit.Movk, OpCodeMov.Create);
+ SetA64("111100101xxxxxxxxxxxxxxxxxxxxxxx", InstName.Movk, InstEmit.Movk, OpCodeMov.Create);
+ SetA64("0001001010xxxxxxxxxxxxxxxxxxxxxx", InstName.Movn, InstEmit.Movn, OpCodeMov.Create);
+ SetA64("100100101xxxxxxxxxxxxxxxxxxxxxxx", InstName.Movn, InstEmit.Movn, OpCodeMov.Create);
+ SetA64("0101001010xxxxxxxxxxxxxxxxxxxxxx", InstName.Movz, InstEmit.Movz, OpCodeMov.Create);
+ SetA64("110100101xxxxxxxxxxxxxxxxxxxxxxx", InstName.Movz, InstEmit.Movz, OpCodeMov.Create);
+ SetA64("110101010011xxxxxxxxxxxxxxxxxxxx", InstName.Mrs, InstEmit.Mrs, OpCodeSystem.Create);
+ SetA64("110101010001xxxxxxxxxxxxxxxxxxxx", InstName.Msr, InstEmit.Msr, OpCodeSystem.Create);
+ SetA64("x0011011000xxxxx1xxxxxxxxxxxxxxx", InstName.Msub, InstEmit.Msub, OpCodeMul.Create);
+ SetA64("11010101000000110010000000011111", InstName.Nop, InstEmit.Nop, OpCodeSystem.Create);
+ SetA64("00101010xx1xxxxx0xxxxxxxxxxxxxxx", InstName.Orn, InstEmit.Orn, OpCodeAluRs.Create);
+ SetA64("10101010xx1xxxxxxxxxxxxxxxxxxxxx", InstName.Orn, InstEmit.Orn, OpCodeAluRs.Create);
+ SetA64("0011001000xxxxxxxxxxxxxxxxxxxxxx", InstName.Orr, InstEmit.Orr, OpCodeAluImm.Create);
+ SetA64("101100100xxxxxxxxxxxxxxxxxxxxxxx", InstName.Orr, InstEmit.Orr, OpCodeAluImm.Create);
+ SetA64("00101010xx0xxxxx0xxxxxxxxxxxxxxx", InstName.Orr, InstEmit.Orr, OpCodeAluRs.Create);
+ SetA64("10101010xx0xxxxxxxxxxxxxxxxxxxxx", InstName.Orr, InstEmit.Orr, OpCodeAluRs.Create);
+ SetA64("1111100110xxxxxxxxxxxxxxxxxxxxxx", InstName.Prfm, InstEmit.Prfm, OpCodeMemImm.Create); // immediate
+ SetA64("11111000100xxxxxxxxx00xxxxxxxxxx", InstName.Prfm, InstEmit.Prfm, OpCodeMemImm.Create); // prfum (unscaled offset)
+ SetA64("11011000xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Prfm, InstEmit.Prfm, OpCodeMemLit.Create); // literal
+ SetA64("11111000101xxxxxxxxx10xxxxxxxxxx", InstName.Prfm, InstEmit.Prfm, OpCodeMemReg.Create); // register
+ SetA64("x101101011000000000000xxxxxxxxxx", InstName.Rbit, InstEmit.Rbit, OpCodeAlu.Create);
+ SetA64("1101011001011111000000xxxxx00000", InstName.Ret, InstEmit.Ret, OpCodeBReg.Create);
+ SetA64("x101101011000000000001xxxxxxxxxx", InstName.Rev16, InstEmit.Rev16, OpCodeAlu.Create);
+ SetA64("x101101011000000000010xxxxxxxxxx", InstName.Rev32, InstEmit.Rev32, OpCodeAlu.Create);
+ SetA64("1101101011000000000011xxxxxxxxxx", InstName.Rev64, InstEmit.Rev64, OpCodeAlu.Create);
+ SetA64("x0011010110xxxxx001011xxxxxxxxxx", InstName.Rorv, InstEmit.Rorv, OpCodeAluRs.Create);
+ SetA64("x1011010000xxxxx000000xxxxxxxxxx", InstName.Sbc, InstEmit.Sbc, OpCodeAluRs.Create);
+ SetA64("x1111010000xxxxx000000xxxxxxxxxx", InstName.Sbcs, InstEmit.Sbcs, OpCodeAluRs.Create);
+ SetA64("00010011000xxxxx0xxxxxxxxxxxxxxx", InstName.Sbfm, InstEmit.Sbfm, OpCodeBfm.Create);
+ SetA64("1001001101xxxxxxxxxxxxxxxxxxxxxx", InstName.Sbfm, InstEmit.Sbfm, OpCodeBfm.Create);
+ SetA64("x0011010110xxxxx000011xxxxxxxxxx", InstName.Sdiv, InstEmit.Sdiv, OpCodeAluBinary.Create);
+ SetA64("11010101000000110010000010011111", InstName.Sev, InstEmit.Nop, OpCodeSystem.Create);
+ SetA64("11010101000000110010000010111111", InstName.Sevl, InstEmit.Nop, OpCodeSystem.Create);
+ SetA64("10011011001xxxxx0xxxxxxxxxxxxxxx", InstName.Smaddl, InstEmit.Smaddl, OpCodeMul.Create);
+ SetA64("10011011001xxxxx1xxxxxxxxxxxxxxx", InstName.Smsubl, InstEmit.Smsubl, OpCodeMul.Create);
+ SetA64("10011011010xxxxx0xxxxxxxxxxxxxxx", InstName.Smulh, InstEmit.Smulh, OpCodeMul.Create);
+ SetA64("xx001000100xxxxx1xxxxxxxxxxxxxxx", InstName.Stlr, InstEmit.Stlr, OpCodeMemEx.Create);
+ SetA64("1x001000001xxxxx1xxxxxxxxxxxxxxx", InstName.Stlxp, InstEmit.Stlxp, OpCodeMemEx.Create);
+ SetA64("xx001000000xxxxx1xxxxxxxxxxxxxxx", InstName.Stlxr, InstEmit.Stlxr, OpCodeMemEx.Create);
+ SetA64("x010100xx0xxxxxxxxxxxxxxxxxxxxxx", InstName.Stp, InstEmit.Stp, OpCodeMemPair.Create);
+ SetA64("xx111000000xxxxxxxxxxxxxxxxxxxxx", InstName.Str, InstEmit.Str, OpCodeMemImm.Create);
+ SetA64("xx11100100xxxxxxxxxxxxxxxxxxxxxx", InstName.Str, InstEmit.Str, OpCodeMemImm.Create);
+ SetA64("xx111000001xxxxxxxxx10xxxxxxxxxx", InstName.Str, InstEmit.Str, OpCodeMemReg.Create);
+ SetA64("1x001000001xxxxx0xxxxxxxxxxxxxxx", InstName.Stxp, InstEmit.Stxp, OpCodeMemEx.Create);
+ SetA64("xx001000000xxxxx0xxxxxxxxxxxxxxx", InstName.Stxr, InstEmit.Stxr, OpCodeMemEx.Create);
+ SetA64("x10100010xxxxxxxxxxxxxxxxxxxxxxx", InstName.Sub, InstEmit.Sub, OpCodeAluImm.Create);
+ SetA64("01001011<<0xxxxx0xxxxxxxxxxxxxxx", InstName.Sub, InstEmit.Sub, OpCodeAluRs.Create);
+ SetA64("11001011<<0xxxxxxxxxxxxxxxxxxxxx", InstName.Sub, InstEmit.Sub, OpCodeAluRs.Create);
+ SetA64("x1001011001xxxxxxxx0xxxxxxxxxxxx", InstName.Sub, InstEmit.Sub, OpCodeAluRx.Create);
+ SetA64("x1001011001xxxxxxxx100xxxxxxxxxx", InstName.Sub, InstEmit.Sub, OpCodeAluRx.Create);
+ SetA64("x11100010xxxxxxxxxxxxxxxxxxxxxxx", InstName.Subs, InstEmit.Subs, OpCodeAluImm.Create);
+ SetA64("01101011<<0xxxxx0xxxxxxxxxxxxxxx", InstName.Subs, InstEmit.Subs, OpCodeAluRs.Create);
+ SetA64("11101011<<0xxxxxxxxxxxxxxxxxxxxx", InstName.Subs, InstEmit.Subs, OpCodeAluRs.Create);
+ SetA64("x1101011001xxxxxxxx0xxxxxxxxxxxx", InstName.Subs, InstEmit.Subs, OpCodeAluRx.Create);
+ SetA64("x1101011001xxxxxxxx100xxxxxxxxxx", InstName.Subs, InstEmit.Subs, OpCodeAluRx.Create);
+ SetA64("11010100000xxxxxxxxxxxxxxxx00001", InstName.Svc, InstEmit.Svc, OpCodeException.Create);
+ SetA64("1101010100001xxxxxxxxxxxxxxxxxxx", InstName.Sys, InstEmit.Sys, OpCodeSystem.Create);
+ SetA64("x0110111xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Tbnz, InstEmit.Tbnz, OpCodeBImmTest.Create);
+ SetA64("x0110110xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Tbz, InstEmit.Tbz, OpCodeBImmTest.Create);
+ SetA64("01010011000xxxxx0xxxxxxxxxxxxxxx", InstName.Ubfm, InstEmit.Ubfm, OpCodeBfm.Create);
+ SetA64("1101001101xxxxxxxxxxxxxxxxxxxxxx", InstName.Ubfm, InstEmit.Ubfm, OpCodeBfm.Create);
+ SetA64("x0011010110xxxxx000010xxxxxxxxxx", InstName.Udiv, InstEmit.Udiv, OpCodeAluBinary.Create);
+ SetA64("10011011101xxxxx0xxxxxxxxxxxxxxx", InstName.Umaddl, InstEmit.Umaddl, OpCodeMul.Create);
+ SetA64("10011011101xxxxx1xxxxxxxxxxxxxxx", InstName.Umsubl, InstEmit.Umsubl, OpCodeMul.Create);
+ SetA64("10011011110xxxxx0xxxxxxxxxxxxxxx", InstName.Umulh, InstEmit.Umulh, OpCodeMul.Create);
+ SetA64("11010101000000110010000001011111", InstName.Wfe, InstEmit.Nop, OpCodeSystem.Create);
+ SetA64("11010101000000110010000001111111", InstName.Wfi, InstEmit.Nop, OpCodeSystem.Create);
+ SetA64("11010101000000110010000000111111", InstName.Yield, InstEmit.Nop, OpCodeSystem.Create);
+
+ // FP & SIMD
+ SetA64("0101111011100000101110xxxxxxxxxx", InstName.Abs_S, InstEmit.Abs_S, OpCodeSimd.Create);
+ SetA64("0>001110<<100000101110xxxxxxxxxx", InstName.Abs_V, InstEmit.Abs_V, OpCodeSimd.Create);
+ SetA64("01011110111xxxxx100001xxxxxxxxxx", InstName.Add_S, InstEmit.Add_S, OpCodeSimdReg.Create);
+ SetA64("0>001110<<1xxxxx100001xxxxxxxxxx", InstName.Add_V, InstEmit.Add_V, OpCodeSimdReg.Create);
+ SetA64("0x001110<<1xxxxx010000xxxxxxxxxx", InstName.Addhn_V, InstEmit.Addhn_V, OpCodeSimdReg.Create);
+ SetA64("0101111011110001101110xxxxxxxxxx", InstName.Addp_S, InstEmit.Addp_S, OpCodeSimd.Create);
+ SetA64("0>001110<<1xxxxx101111xxxxxxxxxx", InstName.Addp_V, InstEmit.Addp_V, OpCodeSimdReg.Create);
+ SetA64("000011100x110001101110xxxxxxxxxx", InstName.Addv_V, InstEmit.Addv_V, OpCodeSimd.Create);
+ SetA64("01001110<<110001101110xxxxxxxxxx", InstName.Addv_V, InstEmit.Addv_V, OpCodeSimd.Create);
+ SetA64("0100111000101000010110xxxxxxxxxx", InstName.Aesd_V, InstEmit.Aesd_V, OpCodeSimd.Create);
+ SetA64("0100111000101000010010xxxxxxxxxx", InstName.Aese_V, InstEmit.Aese_V, OpCodeSimd.Create);
+ SetA64("0100111000101000011110xxxxxxxxxx", InstName.Aesimc_V, InstEmit.Aesimc_V, OpCodeSimd.Create);
+ SetA64("0100111000101000011010xxxxxxxxxx", InstName.Aesmc_V, InstEmit.Aesmc_V, OpCodeSimd.Create);
+ SetA64("0x001110001xxxxx000111xxxxxxxxxx", InstName.And_V, InstEmit.And_V, OpCodeSimdReg.Create);
+ SetA64("0x001110011xxxxx000111xxxxxxxxxx", InstName.Bic_V, InstEmit.Bic_V, OpCodeSimdReg.Create);
+ SetA64("0x10111100000xxx0xx101xxxxxxxxxx", InstName.Bic_Vi, InstEmit.Bic_Vi, OpCodeSimdImm.Create);
+ SetA64("0x10111100000xxx10x101xxxxxxxxxx", InstName.Bic_Vi, InstEmit.Bic_Vi, OpCodeSimdImm.Create);
+ SetA64("0x101110111xxxxx000111xxxxxxxxxx", InstName.Bif_V, InstEmit.Bif_V, OpCodeSimdReg.Create);
+ SetA64("0x101110101xxxxx000111xxxxxxxxxx", InstName.Bit_V, InstEmit.Bit_V, OpCodeSimdReg.Create);
+ SetA64("0x101110011xxxxx000111xxxxxxxxxx", InstName.Bsl_V, InstEmit.Bsl_V, OpCodeSimdReg.Create);
+ SetA64("0x001110<<100000010010xxxxxxxxxx", InstName.Cls_V, InstEmit.Cls_V, OpCodeSimd.Create);
+ SetA64("0x101110<<100000010010xxxxxxxxxx", InstName.Clz_V, InstEmit.Clz_V, OpCodeSimd.Create);
+ SetA64("01111110111xxxxx100011xxxxxxxxxx", InstName.Cmeq_S, InstEmit.Cmeq_S, OpCodeSimdReg.Create);
+ SetA64("0101111011100000100110xxxxxxxxxx", InstName.Cmeq_S, InstEmit.Cmeq_S, OpCodeSimd.Create);
+ SetA64("0>101110<<1xxxxx100011xxxxxxxxxx", InstName.Cmeq_V, InstEmit.Cmeq_V, OpCodeSimdReg.Create);
+ SetA64("0>001110<<100000100110xxxxxxxxxx", InstName.Cmeq_V, InstEmit.Cmeq_V, OpCodeSimd.Create);
+ SetA64("01011110111xxxxx001111xxxxxxxxxx", InstName.Cmge_S, InstEmit.Cmge_S, OpCodeSimdReg.Create);
+ SetA64("0111111011100000100010xxxxxxxxxx", InstName.Cmge_S, InstEmit.Cmge_S, OpCodeSimd.Create);
+ SetA64("0>001110<<1xxxxx001111xxxxxxxxxx", InstName.Cmge_V, InstEmit.Cmge_V, OpCodeSimdReg.Create);
+ SetA64("0>101110<<100000100010xxxxxxxxxx", InstName.Cmge_V, InstEmit.Cmge_V, OpCodeSimd.Create);
+ SetA64("01011110111xxxxx001101xxxxxxxxxx", InstName.Cmgt_S, InstEmit.Cmgt_S, OpCodeSimdReg.Create);
+ SetA64("0101111011100000100010xxxxxxxxxx", InstName.Cmgt_S, InstEmit.Cmgt_S, OpCodeSimd.Create);
+ SetA64("0>001110<<1xxxxx001101xxxxxxxxxx", InstName.Cmgt_V, InstEmit.Cmgt_V, OpCodeSimdReg.Create);
+ SetA64("0>001110<<100000100010xxxxxxxxxx", InstName.Cmgt_V, InstEmit.Cmgt_V, OpCodeSimd.Create);
+ SetA64("01111110111xxxxx001101xxxxxxxxxx", InstName.Cmhi_S, InstEmit.Cmhi_S, OpCodeSimdReg.Create);
+ SetA64("0>101110<<1xxxxx001101xxxxxxxxxx", InstName.Cmhi_V, InstEmit.Cmhi_V, OpCodeSimdReg.Create);
+ SetA64("01111110111xxxxx001111xxxxxxxxxx", InstName.Cmhs_S, InstEmit.Cmhs_S, OpCodeSimdReg.Create);
+ SetA64("0>101110<<1xxxxx001111xxxxxxxxxx", InstName.Cmhs_V, InstEmit.Cmhs_V, OpCodeSimdReg.Create);
+ SetA64("0111111011100000100110xxxxxxxxxx", InstName.Cmle_S, InstEmit.Cmle_S, OpCodeSimd.Create);
+ SetA64("0>101110<<100000100110xxxxxxxxxx", InstName.Cmle_V, InstEmit.Cmle_V, OpCodeSimd.Create);
+ SetA64("0101111011100000101010xxxxxxxxxx", InstName.Cmlt_S, InstEmit.Cmlt_S, OpCodeSimd.Create);
+ SetA64("0>001110<<100000101010xxxxxxxxxx", InstName.Cmlt_V, InstEmit.Cmlt_V, OpCodeSimd.Create);
+ SetA64("01011110111xxxxx100011xxxxxxxxxx", InstName.Cmtst_S, InstEmit.Cmtst_S, OpCodeSimdReg.Create);
+ SetA64("0>001110<<1xxxxx100011xxxxxxxxxx", InstName.Cmtst_V, InstEmit.Cmtst_V, OpCodeSimdReg.Create);
+ SetA64("0x00111000100000010110xxxxxxxxxx", InstName.Cnt_V, InstEmit.Cnt_V, OpCodeSimd.Create);
+ SetA64("0>001110000x<>>>000011xxxxxxxxxx", InstName.Dup_Gp, InstEmit.Dup_Gp, OpCodeSimdIns.Create);
+ SetA64("01011110000xxxxx000001xxxxxxxxxx", InstName.Dup_S, InstEmit.Dup_S, OpCodeSimdIns.Create);
+ SetA64("0>001110000x<>>>000001xxxxxxxxxx", InstName.Dup_V, InstEmit.Dup_V, OpCodeSimdIns.Create);
+ SetA64("0x101110001xxxxx000111xxxxxxxxxx", InstName.Eor_V, InstEmit.Eor_V, OpCodeSimdReg.Create);
+ SetA64("0>101110000xxxxx0<xxx0xxxxxxxxxx", InstName.Ext_V, InstEmit.Ext_V, OpCodeSimdExt.Create);
+ SetA64("011111101x1xxxxx110101xxxxxxxxxx", InstName.Fabd_S, InstEmit.Fabd_S, OpCodeSimdReg.Create);
+ SetA64("0>1011101<1xxxxx110101xxxxxxxxxx", InstName.Fabd_V, InstEmit.Fabd_V, OpCodeSimdReg.Create);
+ SetA64("000111100x100000110000xxxxxxxxxx", InstName.Fabs_S, InstEmit.Fabs_S, OpCodeSimd.Create);
+ SetA64("0>0011101<100000111110xxxxxxxxxx", InstName.Fabs_V, InstEmit.Fabs_V, OpCodeSimd.Create);
+ SetA64("011111100x1xxxxx111011xxxxxxxxxx", InstName.Facge_S, InstEmit.Facge_S, OpCodeSimdReg.Create);
+ SetA64("0>1011100<1xxxxx111011xxxxxxxxxx", InstName.Facge_V, InstEmit.Facge_V, OpCodeSimdReg.Create);
+ SetA64("011111101x1xxxxx111011xxxxxxxxxx", InstName.Facgt_S, InstEmit.Facgt_S, OpCodeSimdReg.Create);
+ SetA64("0>1011101<1xxxxx111011xxxxxxxxxx", InstName.Facgt_V, InstEmit.Facgt_V, OpCodeSimdReg.Create);
+ SetA64("000111100x1xxxxx001010xxxxxxxxxx", InstName.Fadd_S, InstEmit.Fadd_S, OpCodeSimdReg.Create);
+ SetA64("0>0011100<1xxxxx110101xxxxxxxxxx", InstName.Fadd_V, InstEmit.Fadd_V, OpCodeSimdReg.Create);
+ SetA64("011111100x110000110110xxxxxxxxxx", InstName.Faddp_S, InstEmit.Faddp_S, OpCodeSimd.Create);
+ SetA64("0>1011100<1xxxxx110101xxxxxxxxxx", InstName.Faddp_V, InstEmit.Faddp_V, OpCodeSimdReg.Create);
+ SetA64("000111100x1xxxxxxxxx01xxxxx0xxxx", InstName.Fccmp_S, InstEmit.Fccmp_S, OpCodeSimdFcond.Create);
+ SetA64("000111100x1xxxxxxxxx01xxxxx1xxxx", InstName.Fccmpe_S, InstEmit.Fccmpe_S, OpCodeSimdFcond.Create);
+ SetA64("010111100x1xxxxx111001xxxxxxxxxx", InstName.Fcmeq_S, InstEmit.Fcmeq_S, OpCodeSimdReg.Create);
+ SetA64("010111101x100000110110xxxxxxxxxx", InstName.Fcmeq_S, InstEmit.Fcmeq_S, OpCodeSimd.Create);
+ SetA64("0>0011100<1xxxxx111001xxxxxxxxxx", InstName.Fcmeq_V, InstEmit.Fcmeq_V, OpCodeSimdReg.Create);
+ SetA64("0>0011101<100000110110xxxxxxxxxx", InstName.Fcmeq_V, InstEmit.Fcmeq_V, OpCodeSimd.Create);
+ SetA64("011111100x1xxxxx111001xxxxxxxxxx", InstName.Fcmge_S, InstEmit.Fcmge_S, OpCodeSimdReg.Create);
+ SetA64("011111101x100000110010xxxxxxxxxx", InstName.Fcmge_S, InstEmit.Fcmge_S, OpCodeSimd.Create);
+ SetA64("0>1011100<1xxxxx111001xxxxxxxxxx", InstName.Fcmge_V, InstEmit.Fcmge_V, OpCodeSimdReg.Create);
+ SetA64("0>1011101<100000110010xxxxxxxxxx", InstName.Fcmge_V, InstEmit.Fcmge_V, OpCodeSimd.Create);
+ SetA64("011111101x1xxxxx111001xxxxxxxxxx", InstName.Fcmgt_S, InstEmit.Fcmgt_S, OpCodeSimdReg.Create);
+ SetA64("010111101x100000110010xxxxxxxxxx", InstName.Fcmgt_S, InstEmit.Fcmgt_S, OpCodeSimd.Create);
+ SetA64("0>1011101<1xxxxx111001xxxxxxxxxx", InstName.Fcmgt_V, InstEmit.Fcmgt_V, OpCodeSimdReg.Create);
+ SetA64("0>0011101<100000110010xxxxxxxxxx", InstName.Fcmgt_V, InstEmit.Fcmgt_V, OpCodeSimd.Create);
+ SetA64("011111101x100000110110xxxxxxxxxx", InstName.Fcmle_S, InstEmit.Fcmle_S, OpCodeSimd.Create);
+ SetA64("0>1011101<100000110110xxxxxxxxxx", InstName.Fcmle_V, InstEmit.Fcmle_V, OpCodeSimd.Create);
+ SetA64("010111101x100000111010xxxxxxxxxx", InstName.Fcmlt_S, InstEmit.Fcmlt_S, OpCodeSimd.Create);
+ SetA64("0>0011101<100000111010xxxxxxxxxx", InstName.Fcmlt_V, InstEmit.Fcmlt_V, OpCodeSimd.Create);
+ SetA64("000111100x1xxxxx001000xxxxx0x000", InstName.Fcmp_S, InstEmit.Fcmp_S, OpCodeSimdReg.Create);
+ SetA64("000111100x1xxxxx001000xxxxx1x000", InstName.Fcmpe_S, InstEmit.Fcmpe_S, OpCodeSimdReg.Create);
+ SetA64("000111100x1xxxxxxxxx11xxxxxxxxxx", InstName.Fcsel_S, InstEmit.Fcsel_S, OpCodeSimdFcond.Create);
+ SetA64("00011110xx10001xx10000xxxxxxxxxx", InstName.Fcvt_S, InstEmit.Fcvt_S, OpCodeSimd.Create);
+ SetA64("x00111100x100100000000xxxxxxxxxx", InstName.Fcvtas_Gp, InstEmit.Fcvtas_Gp, OpCodeSimdCvt.Create);
+ SetA64("010111100x100001110010xxxxxxxxxx", InstName.Fcvtas_S, InstEmit.Fcvtas_S, OpCodeSimd.Create);
+ SetA64("0>0011100<100001110010xxxxxxxxxx", InstName.Fcvtas_V, InstEmit.Fcvtas_V, OpCodeSimd.Create);
+ SetA64("x00111100x100101000000xxxxxxxxxx", InstName.Fcvtau_Gp, InstEmit.Fcvtau_Gp, OpCodeSimdCvt.Create);
+ SetA64("011111100x100001110010xxxxxxxxxx", InstName.Fcvtau_S, InstEmit.Fcvtau_S, OpCodeSimd.Create);
+ SetA64("0>1011100<100001110010xxxxxxxxxx", InstName.Fcvtau_V, InstEmit.Fcvtau_V, OpCodeSimd.Create);
+ SetA64("0x0011100x100001011110xxxxxxxxxx", InstName.Fcvtl_V, InstEmit.Fcvtl_V, OpCodeSimd.Create);
+ SetA64("x00111100x110000000000xxxxxxxxxx", InstName.Fcvtms_Gp, InstEmit.Fcvtms_Gp, OpCodeSimdCvt.Create);
+ SetA64("0>0011100<100001101110xxxxxxxxxx", InstName.Fcvtms_V, InstEmit.Fcvtms_V, OpCodeSimd.Create);
+ SetA64("x00111100x110001000000xxxxxxxxxx", InstName.Fcvtmu_Gp, InstEmit.Fcvtmu_Gp, OpCodeSimdCvt.Create);
+ SetA64("0x0011100x100001011010xxxxxxxxxx", InstName.Fcvtn_V, InstEmit.Fcvtn_V, OpCodeSimd.Create);
+ SetA64("x00111100x100000000000xxxxxxxxxx", InstName.Fcvtns_Gp, InstEmit.Fcvtns_Gp, OpCodeSimdCvt.Create);
+ SetA64("010111100x100001101010xxxxxxxxxx", InstName.Fcvtns_S, InstEmit.Fcvtns_S, OpCodeSimd.Create);
+ SetA64("0>0011100<100001101010xxxxxxxxxx", InstName.Fcvtns_V, InstEmit.Fcvtns_V, OpCodeSimd.Create);
+ SetA64("011111100x100001101010xxxxxxxxxx", InstName.Fcvtnu_S, InstEmit.Fcvtnu_S, OpCodeSimd.Create);
+ SetA64("0>1011100<100001101010xxxxxxxxxx", InstName.Fcvtnu_V, InstEmit.Fcvtnu_V, OpCodeSimd.Create);
+ SetA64("x00111100x101000000000xxxxxxxxxx", InstName.Fcvtps_Gp, InstEmit.Fcvtps_Gp, OpCodeSimdCvt.Create);
+ SetA64("x00111100x101001000000xxxxxxxxxx", InstName.Fcvtpu_Gp, InstEmit.Fcvtpu_Gp, OpCodeSimdCvt.Create);
+ SetA64("x00111100x111000000000xxxxxxxxxx", InstName.Fcvtzs_Gp, InstEmit.Fcvtzs_Gp, OpCodeSimdCvt.Create);
+ SetA64(">00111100x011000>xxxxxxxxxxxxxxx", InstName.Fcvtzs_Gp_Fixed, InstEmit.Fcvtzs_Gp_Fixed, OpCodeSimdCvt.Create);
+ SetA64("010111101x100001101110xxxxxxxxxx", InstName.Fcvtzs_S, InstEmit.Fcvtzs_S, OpCodeSimd.Create);
+ SetA64("0>0011101<100001101110xxxxxxxxxx", InstName.Fcvtzs_V, InstEmit.Fcvtzs_V, OpCodeSimd.Create);
+ SetA64("0x001111001xxxxx111111xxxxxxxxxx", InstName.Fcvtzs_V_Fixed, InstEmit.Fcvtzs_V_Fixed, OpCodeSimdShImm.Create);
+ SetA64("0100111101xxxxxx111111xxxxxxxxxx", InstName.Fcvtzs_V_Fixed, InstEmit.Fcvtzs_V_Fixed, OpCodeSimdShImm.Create);
+ SetA64("x00111100x111001000000xxxxxxxxxx", InstName.Fcvtzu_Gp, InstEmit.Fcvtzu_Gp, OpCodeSimdCvt.Create);
+ SetA64(">00111100x011001>xxxxxxxxxxxxxxx", InstName.Fcvtzu_Gp_Fixed, InstEmit.Fcvtzu_Gp_Fixed, OpCodeSimdCvt.Create);
+ SetA64("011111101x100001101110xxxxxxxxxx", InstName.Fcvtzu_S, InstEmit.Fcvtzu_S, OpCodeSimd.Create);
+ SetA64("0>1011101<100001101110xxxxxxxxxx", InstName.Fcvtzu_V, InstEmit.Fcvtzu_V, OpCodeSimd.Create);
+ SetA64("0x101111001xxxxx111111xxxxxxxxxx", InstName.Fcvtzu_V_Fixed, InstEmit.Fcvtzu_V_Fixed, OpCodeSimdShImm.Create);
+ SetA64("0110111101xxxxxx111111xxxxxxxxxx", InstName.Fcvtzu_V_Fixed, InstEmit.Fcvtzu_V_Fixed, OpCodeSimdShImm.Create);
+ SetA64("000111100x1xxxxx000110xxxxxxxxxx", InstName.Fdiv_S, InstEmit.Fdiv_S, OpCodeSimdReg.Create);
+ SetA64("0>1011100<1xxxxx111111xxxxxxxxxx", InstName.Fdiv_V, InstEmit.Fdiv_V, OpCodeSimdReg.Create);
+ SetA64("000111110x0xxxxx0xxxxxxxxxxxxxxx", InstName.Fmadd_S, InstEmit.Fmadd_S, OpCodeSimdReg.Create);
+ SetA64("000111100x1xxxxx010010xxxxxxxxxx", InstName.Fmax_S, InstEmit.Fmax_S, OpCodeSimdReg.Create);
+ SetA64("0>0011100<1xxxxx111101xxxxxxxxxx", InstName.Fmax_V, InstEmit.Fmax_V, OpCodeSimdReg.Create);
+ SetA64("000111100x1xxxxx011010xxxxxxxxxx", InstName.Fmaxnm_S, InstEmit.Fmaxnm_S, OpCodeSimdReg.Create);
+ SetA64("0>0011100<1xxxxx110001xxxxxxxxxx", InstName.Fmaxnm_V, InstEmit.Fmaxnm_V, OpCodeSimdReg.Create);
+ SetA64("011111100x110000110010xxxxxxxxxx", InstName.Fmaxnmp_S, InstEmit.Fmaxnmp_S, OpCodeSimd.Create);
+ SetA64("0>1011100<1xxxxx110001xxxxxxxxxx", InstName.Fmaxnmp_V, InstEmit.Fmaxnmp_V, OpCodeSimdReg.Create);
+ SetA64("0110111000110000110010xxxxxxxxxx", InstName.Fmaxnmv_V, InstEmit.Fmaxnmv_V, OpCodeSimd.Create);
+ SetA64("0>1011100<1xxxxx111101xxxxxxxxxx", InstName.Fmaxp_V, InstEmit.Fmaxp_V, OpCodeSimdReg.Create);
+ SetA64("0110111000110000111110xxxxxxxxxx", InstName.Fmaxv_V, InstEmit.Fmaxv_V, OpCodeSimd.Create);
+ SetA64("000111100x1xxxxx010110xxxxxxxxxx", InstName.Fmin_S, InstEmit.Fmin_S, OpCodeSimdReg.Create);
+ SetA64("0>0011101<1xxxxx111101xxxxxxxxxx", InstName.Fmin_V, InstEmit.Fmin_V, OpCodeSimdReg.Create);
+ SetA64("000111100x1xxxxx011110xxxxxxxxxx", InstName.Fminnm_S, InstEmit.Fminnm_S, OpCodeSimdReg.Create);
+ SetA64("0>0011101<1xxxxx110001xxxxxxxxxx", InstName.Fminnm_V, InstEmit.Fminnm_V, OpCodeSimdReg.Create);
+ SetA64("011111101x110000110010xxxxxxxxxx", InstName.Fminnmp_S, InstEmit.Fminnmp_S, OpCodeSimd.Create);
+ SetA64("0>1011101<1xxxxx110001xxxxxxxxxx", InstName.Fminnmp_V, InstEmit.Fminnmp_V, OpCodeSimdReg.Create);
+ SetA64("0110111010110000110010xxxxxxxxxx", InstName.Fminnmv_V, InstEmit.Fminnmv_V, OpCodeSimd.Create);
+ SetA64("0>1011101<1xxxxx111101xxxxxxxxxx", InstName.Fminp_V, InstEmit.Fminp_V, OpCodeSimdReg.Create);
+ SetA64("0110111010110000111110xxxxxxxxxx", InstName.Fminv_V, InstEmit.Fminv_V, OpCodeSimd.Create);
+ SetA64("010111111xxxxxxx0001x0xxxxxxxxxx", InstName.Fmla_Se, InstEmit.Fmla_Se, OpCodeSimdRegElemF.Create);
+ SetA64("0>0011100<1xxxxx110011xxxxxxxxxx", InstName.Fmla_V, InstEmit.Fmla_V, OpCodeSimdReg.Create);
+ SetA64("0>0011111<xxxxxx0001x0xxxxxxxxxx", InstName.Fmla_Ve, InstEmit.Fmla_Ve, OpCodeSimdRegElemF.Create);
+ SetA64("010111111xxxxxxx0101x0xxxxxxxxxx", InstName.Fmls_Se, InstEmit.Fmls_Se, OpCodeSimdRegElemF.Create);
+ SetA64("0>0011101<1xxxxx110011xxxxxxxxxx", InstName.Fmls_V, InstEmit.Fmls_V, OpCodeSimdReg.Create);
+ SetA64("0>0011111<xxxxxx0101x0xxxxxxxxxx", InstName.Fmls_Ve, InstEmit.Fmls_Ve, OpCodeSimdRegElemF.Create);
+ SetA64("000111100x100000010000xxxxxxxxxx", InstName.Fmov_S, InstEmit.Fmov_S, OpCodeSimd.Create);
+ SetA64("000111100x1xxxxxxxx10000000xxxxx", InstName.Fmov_Si, InstEmit.Fmov_Si, OpCodeSimdFmov.Create);
+ SetA64("0x00111100000xxx111101xxxxxxxxxx", InstName.Fmov_Vi, InstEmit.Fmov_Vi, OpCodeSimdImm.Create);
+ SetA64("0110111100000xxx111101xxxxxxxxxx", InstName.Fmov_Vi, InstEmit.Fmov_Vi, OpCodeSimdImm.Create);
+ SetA64("0001111000100110000000xxxxxxxxxx", InstName.Fmov_Ftoi, InstEmit.Fmov_Ftoi, OpCodeSimd.Create);
+ SetA64("1001111001100110000000xxxxxxxxxx", InstName.Fmov_Ftoi, InstEmit.Fmov_Ftoi, OpCodeSimd.Create);
+ SetA64("0001111000100111000000xxxxxxxxxx", InstName.Fmov_Itof, InstEmit.Fmov_Itof, OpCodeSimd.Create);
+ SetA64("1001111001100111000000xxxxxxxxxx", InstName.Fmov_Itof, InstEmit.Fmov_Itof, OpCodeSimd.Create);
+ SetA64("1001111010101110000000xxxxxxxxxx", InstName.Fmov_Ftoi1, InstEmit.Fmov_Ftoi1, OpCodeSimd.Create);
+ SetA64("1001111010101111000000xxxxxxxxxx", InstName.Fmov_Itof1, InstEmit.Fmov_Itof1, OpCodeSimd.Create);
+ SetA64("000111110x0xxxxx1xxxxxxxxxxxxxxx", InstName.Fmsub_S, InstEmit.Fmsub_S, OpCodeSimdReg.Create);
+ SetA64("000111100x1xxxxx000010xxxxxxxxxx", InstName.Fmul_S, InstEmit.Fmul_S, OpCodeSimdReg.Create);
+ SetA64("010111111xxxxxxx1001x0xxxxxxxxxx", InstName.Fmul_Se, InstEmit.Fmul_Se, OpCodeSimdRegElemF.Create);
+ SetA64("0>1011100<1xxxxx110111xxxxxxxxxx", InstName.Fmul_V, InstEmit.Fmul_V, OpCodeSimdReg.Create);
+ SetA64("0>0011111<xxxxxx1001x0xxxxxxxxxx", InstName.Fmul_Ve, InstEmit.Fmul_Ve, OpCodeSimdRegElemF.Create);
+ SetA64("010111100x1xxxxx110111xxxxxxxxxx", InstName.Fmulx_S, InstEmit.Fmulx_S, OpCodeSimdReg.Create);
+ SetA64("011111111xxxxxxx1001x0xxxxxxxxxx", InstName.Fmulx_Se, InstEmit.Fmulx_Se, OpCodeSimdRegElemF.Create);
+ SetA64("0>0011100<1xxxxx110111xxxxxxxxxx", InstName.Fmulx_V, InstEmit.Fmulx_V, OpCodeSimdReg.Create);
+ SetA64("0>1011111<xxxxxx1001x0xxxxxxxxxx", InstName.Fmulx_Ve, InstEmit.Fmulx_Ve, OpCodeSimdRegElemF.Create);
+ SetA64("000111100x100001010000xxxxxxxxxx", InstName.Fneg_S, InstEmit.Fneg_S, OpCodeSimd.Create);
+ SetA64("0>1011101<100000111110xxxxxxxxxx", InstName.Fneg_V, InstEmit.Fneg_V, OpCodeSimd.Create);
+ SetA64("000111110x1xxxxx0xxxxxxxxxxxxxxx", InstName.Fnmadd_S, InstEmit.Fnmadd_S, OpCodeSimdReg.Create);
+ SetA64("000111110x1xxxxx1xxxxxxxxxxxxxxx", InstName.Fnmsub_S, InstEmit.Fnmsub_S, OpCodeSimdReg.Create);
+ SetA64("000111100x1xxxxx100010xxxxxxxxxx", InstName.Fnmul_S, InstEmit.Fnmul_S, OpCodeSimdReg.Create);
+ SetA64("010111101x100001110110xxxxxxxxxx", InstName.Frecpe_S, InstEmit.Frecpe_S, OpCodeSimd.Create);
+ SetA64("0>0011101<100001110110xxxxxxxxxx", InstName.Frecpe_V, InstEmit.Frecpe_V, OpCodeSimd.Create);
+ SetA64("010111100x1xxxxx111111xxxxxxxxxx", InstName.Frecps_S, InstEmit.Frecps_S, OpCodeSimdReg.Create);
+ SetA64("0>0011100<1xxxxx111111xxxxxxxxxx", InstName.Frecps_V, InstEmit.Frecps_V, OpCodeSimdReg.Create);
+ SetA64("010111101x100001111110xxxxxxxxxx", InstName.Frecpx_S, InstEmit.Frecpx_S, OpCodeSimd.Create);
+ SetA64("000111100x100110010000xxxxxxxxxx", InstName.Frinta_S, InstEmit.Frinta_S, OpCodeSimd.Create);
+ SetA64("0>1011100<100001100010xxxxxxxxxx", InstName.Frinta_V, InstEmit.Frinta_V, OpCodeSimd.Create);
+ SetA64("000111100x100111110000xxxxxxxxxx", InstName.Frinti_S, InstEmit.Frinti_S, OpCodeSimd.Create);
+ SetA64("0>1011101<100001100110xxxxxxxxxx", InstName.Frinti_V, InstEmit.Frinti_V, OpCodeSimd.Create);
+ SetA64("000111100x100101010000xxxxxxxxxx", InstName.Frintm_S, InstEmit.Frintm_S, OpCodeSimd.Create);
+ SetA64("0>0011100<100001100110xxxxxxxxxx", InstName.Frintm_V, InstEmit.Frintm_V, OpCodeSimd.Create);
+ SetA64("000111100x100100010000xxxxxxxxxx", InstName.Frintn_S, InstEmit.Frintn_S, OpCodeSimd.Create);
+ SetA64("0>0011100<100001100010xxxxxxxxxx", InstName.Frintn_V, InstEmit.Frintn_V, OpCodeSimd.Create);
+ SetA64("000111100x100100110000xxxxxxxxxx", InstName.Frintp_S, InstEmit.Frintp_S, OpCodeSimd.Create);
+ SetA64("0>0011101<100001100010xxxxxxxxxx", InstName.Frintp_V, InstEmit.Frintp_V, OpCodeSimd.Create);
+ SetA64("000111100x100111010000xxxxxxxxxx", InstName.Frintx_S, InstEmit.Frintx_S, OpCodeSimd.Create);
+ SetA64("0>1011100<100001100110xxxxxxxxxx", InstName.Frintx_V, InstEmit.Frintx_V, OpCodeSimd.Create);
+ SetA64("000111100x100101110000xxxxxxxxxx", InstName.Frintz_S, InstEmit.Frintz_S, OpCodeSimd.Create);
+ SetA64("0>0011101<100001100110xxxxxxxxxx", InstName.Frintz_V, InstEmit.Frintz_V, OpCodeSimd.Create);
+ SetA64("011111101x100001110110xxxxxxxxxx", InstName.Frsqrte_S, InstEmit.Frsqrte_S, OpCodeSimd.Create);
+ SetA64("0>1011101<100001110110xxxxxxxxxx", InstName.Frsqrte_V, InstEmit.Frsqrte_V, OpCodeSimd.Create);
+ SetA64("010111101x1xxxxx111111xxxxxxxxxx", InstName.Frsqrts_S, InstEmit.Frsqrts_S, OpCodeSimdReg.Create);
+ SetA64("0>0011101<1xxxxx111111xxxxxxxxxx", InstName.Frsqrts_V, InstEmit.Frsqrts_V, OpCodeSimdReg.Create);
+ SetA64("000111100x100001110000xxxxxxxxxx", InstName.Fsqrt_S, InstEmit.Fsqrt_S, OpCodeSimd.Create);
+ SetA64("0>1011101<100001111110xxxxxxxxxx", InstName.Fsqrt_V, InstEmit.Fsqrt_V, OpCodeSimd.Create);
+ SetA64("000111100x1xxxxx001110xxxxxxxxxx", InstName.Fsub_S, InstEmit.Fsub_S, OpCodeSimdReg.Create);
+ SetA64("0>0011101<1xxxxx110101xxxxxxxxxx", InstName.Fsub_V, InstEmit.Fsub_V, OpCodeSimdReg.Create);
+ SetA64("01001110000xxxxx000111xxxxxxxxxx", InstName.Ins_Gp, InstEmit.Ins_Gp, OpCodeSimdIns.Create);
+ SetA64("01101110000xxxxx0xxxx1xxxxxxxxxx", InstName.Ins_V, InstEmit.Ins_V, OpCodeSimdIns.Create);
+ SetA64("0x00110001000000xxxxxxxxxxxxxxxx", InstName.Ld__Vms, InstEmit.Ld__Vms, OpCodeSimdMemMs.Create);
+ SetA64("0x001100110xxxxxxxxxxxxxxxxxxxxx", InstName.Ld__Vms, InstEmit.Ld__Vms, OpCodeSimdMemMs.Create);
+ SetA64("0x00110101x00000xxxxxxxxxxxxxxxx", InstName.Ld__Vss, InstEmit.Ld__Vss, OpCodeSimdMemSs.Create);
+ SetA64("0x00110111xxxxxxxxxxxxxxxxxxxxxx", InstName.Ld__Vss, InstEmit.Ld__Vss, OpCodeSimdMemSs.Create);
+ SetA64("<<10110xx1xxxxxxxxxxxxxxxxxxxxxx", InstName.Ldp, InstEmit.Ldp, OpCodeSimdMemPair.Create);
+ SetA64("xx111100x10xxxxxxxxx00xxxxxxxxxx", InstName.Ldr, InstEmit.Ldr, OpCodeSimdMemImm.Create);
+ SetA64("xx111100x10xxxxxxxxx01xxxxxxxxxx", InstName.Ldr, InstEmit.Ldr, OpCodeSimdMemImm.Create);
+ SetA64("xx111100x10xxxxxxxxx11xxxxxxxxxx", InstName.Ldr, InstEmit.Ldr, OpCodeSimdMemImm.Create);
+ SetA64("xx111101x1xxxxxxxxxxxxxxxxxxxxxx", InstName.Ldr, InstEmit.Ldr, OpCodeSimdMemImm.Create);
+ SetA64("xx111100x11xxxxxx1xx10xxxxxxxxxx", InstName.Ldr, InstEmit.Ldr, OpCodeSimdMemReg.Create);
+ SetA64("xx011100xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ldr_Literal, InstEmit.Ldr_Literal, OpCodeSimdMemLit.Create);
+ SetA64("0x001110<<1xxxxx100101xxxxxxxxxx", InstName.Mla_V, InstEmit.Mla_V, OpCodeSimdReg.Create);
+ SetA64("0x101111xxxxxxxx0000x0xxxxxxxxxx", InstName.Mla_Ve, InstEmit.Mla_Ve, OpCodeSimdRegElem.Create);
+ SetA64("0x101110<<1xxxxx100101xxxxxxxxxx", InstName.Mls_V, InstEmit.Mls_V, OpCodeSimdReg.Create);
+ SetA64("0x101111xxxxxxxx0100x0xxxxxxxxxx", InstName.Mls_Ve, InstEmit.Mls_Ve, OpCodeSimdRegElem.Create);
+ SetA64("0x00111100000xxx0xx001xxxxxxxxxx", InstName.Movi_V, InstEmit.Movi_V, OpCodeSimdImm.Create);
+ SetA64("0x00111100000xxx10x001xxxxxxxxxx", InstName.Movi_V, InstEmit.Movi_V, OpCodeSimdImm.Create);
+ SetA64("0x00111100000xxx110x01xxxxxxxxxx", InstName.Movi_V, InstEmit.Movi_V, OpCodeSimdImm.Create);
+ SetA64("0xx0111100000xxx111001xxxxxxxxxx", InstName.Movi_V, InstEmit.Movi_V, OpCodeSimdImm.Create);
+ SetA64("0x001110<<1xxxxx100111xxxxxxxxxx", InstName.Mul_V, InstEmit.Mul_V, OpCodeSimdReg.Create);
+ SetA64("0x001111xxxxxxxx1000x0xxxxxxxxxx", InstName.Mul_Ve, InstEmit.Mul_Ve, OpCodeSimdRegElem.Create);
+ SetA64("0x10111100000xxx0xx001xxxxxxxxxx", InstName.Mvni_V, InstEmit.Mvni_V, OpCodeSimdImm.Create);
+ SetA64("0x10111100000xxx10x001xxxxxxxxxx", InstName.Mvni_V, InstEmit.Mvni_V, OpCodeSimdImm.Create);
+ SetA64("0x10111100000xxx110x01xxxxxxxxxx", InstName.Mvni_V, InstEmit.Mvni_V, OpCodeSimdImm.Create);
+ SetA64("0111111011100000101110xxxxxxxxxx", InstName.Neg_S, InstEmit.Neg_S, OpCodeSimd.Create);
+ SetA64("0>101110<<100000101110xxxxxxxxxx", InstName.Neg_V, InstEmit.Neg_V, OpCodeSimd.Create);
+ SetA64("0x10111000100000010110xxxxxxxxxx", InstName.Not_V, InstEmit.Not_V, OpCodeSimd.Create);
+ SetA64("0x001110111xxxxx000111xxxxxxxxxx", InstName.Orn_V, InstEmit.Orn_V, OpCodeSimdReg.Create);
+ SetA64("0x001110101xxxxx000111xxxxxxxxxx", InstName.Orr_V, InstEmit.Orr_V, OpCodeSimdReg.Create);
+ SetA64("0x00111100000xxx0xx101xxxxxxxxxx", InstName.Orr_Vi, InstEmit.Orr_Vi, OpCodeSimdImm.Create);
+ SetA64("0x00111100000xxx10x101xxxxxxxxxx", InstName.Orr_Vi, InstEmit.Orr_Vi, OpCodeSimdImm.Create);
+ SetA64("0x001110001xxxxx111000xxxxxxxxxx", InstName.Pmull_V, InstEmit.Pmull_V, OpCodeSimdReg.Create);
+ SetA64("0x001110111xxxxx111000xxxxxxxxxx", InstName.Pmull_V, InstEmit.Pmull_V, OpCodeSimdReg.Create);
+ SetA64("0x101110<<1xxxxx010000xxxxxxxxxx", InstName.Raddhn_V, InstEmit.Raddhn_V, OpCodeSimdReg.Create);
+ SetA64("0x10111001100000010110xxxxxxxxxx", InstName.Rbit_V, InstEmit.Rbit_V, OpCodeSimd.Create);
+ SetA64("0x00111000100000000110xxxxxxxxxx", InstName.Rev16_V, InstEmit.Rev16_V, OpCodeSimd.Create);
+ SetA64("0x1011100x100000000010xxxxxxxxxx", InstName.Rev32_V, InstEmit.Rev32_V, OpCodeSimd.Create);
+ SetA64("0x001110<<100000000010xxxxxxxxxx", InstName.Rev64_V, InstEmit.Rev64_V, OpCodeSimd.Create);
+ SetA64("0x00111100>>>xxx100011xxxxxxxxxx", InstName.Rshrn_V, InstEmit.Rshrn_V, OpCodeSimdShImm.Create);
+ SetA64("0x101110<<1xxxxx011000xxxxxxxxxx", InstName.Rsubhn_V, InstEmit.Rsubhn_V, OpCodeSimdReg.Create);
+ SetA64("0x001110<<1xxxxx011111xxxxxxxxxx", InstName.Saba_V, InstEmit.Saba_V, OpCodeSimdReg.Create);
+ SetA64("0x001110<<1xxxxx010100xxxxxxxxxx", InstName.Sabal_V, InstEmit.Sabal_V, OpCodeSimdReg.Create);
+ SetA64("0x001110<<1xxxxx011101xxxxxxxxxx", InstName.Sabd_V, InstEmit.Sabd_V, OpCodeSimdReg.Create);
+ SetA64("0x001110<<1xxxxx011100xxxxxxxxxx", InstName.Sabdl_V, InstEmit.Sabdl_V, OpCodeSimdReg.Create);
+ SetA64("0x001110<<100000011010xxxxxxxxxx", InstName.Sadalp_V, InstEmit.Sadalp_V, OpCodeSimd.Create);
+ SetA64("0x001110<<1xxxxx000000xxxxxxxxxx", InstName.Saddl_V, InstEmit.Saddl_V, OpCodeSimdReg.Create);
+ SetA64("0x001110<<100000001010xxxxxxxxxx", InstName.Saddlp_V, InstEmit.Saddlp_V, OpCodeSimd.Create);
+ SetA64("000011100x110000001110xxxxxxxxxx", InstName.Saddlv_V, InstEmit.Saddlv_V, OpCodeSimd.Create);
+ SetA64("01001110<<110000001110xxxxxxxxxx", InstName.Saddlv_V, InstEmit.Saddlv_V, OpCodeSimd.Create);
+ SetA64("0x001110<<1xxxxx000100xxxxxxxxxx", InstName.Saddw_V, InstEmit.Saddw_V, OpCodeSimdReg.Create);
+ SetA64("x00111100x100010000000xxxxxxxxxx", InstName.Scvtf_Gp, InstEmit.Scvtf_Gp, OpCodeSimdCvt.Create);
+ SetA64(">00111100x000010>xxxxxxxxxxxxxxx", InstName.Scvtf_Gp_Fixed, InstEmit.Scvtf_Gp_Fixed, OpCodeSimdCvt.Create);
+ SetA64("010111100x100001110110xxxxxxxxxx", InstName.Scvtf_S, InstEmit.Scvtf_S, OpCodeSimd.Create);
+ SetA64("010111110>>xxxxx111001xxxxxxxxxx", InstName.Scvtf_S_Fixed, InstEmit.Scvtf_S_Fixed, OpCodeSimdShImm.Create);
+ SetA64("0>0011100<100001110110xxxxxxxxxx", InstName.Scvtf_V, InstEmit.Scvtf_V, OpCodeSimd.Create);
+ SetA64("0x001111001xxxxx111001xxxxxxxxxx", InstName.Scvtf_V_Fixed, InstEmit.Scvtf_V_Fixed, OpCodeSimdShImm.Create);
+ SetA64("0100111101xxxxxx111001xxxxxxxxxx", InstName.Scvtf_V_Fixed, InstEmit.Scvtf_V_Fixed, OpCodeSimdShImm.Create);
+ SetA64("01011110000xxxxx000000xxxxxxxxxx", InstName.Sha1c_V, InstEmit.Sha1c_V, OpCodeSimdReg.Create);
+ SetA64("0101111000101000000010xxxxxxxxxx", InstName.Sha1h_V, InstEmit.Sha1h_V, OpCodeSimd.Create);
+ SetA64("01011110000xxxxx001000xxxxxxxxxx", InstName.Sha1m_V, InstEmit.Sha1m_V, OpCodeSimdReg.Create);
+ SetA64("01011110000xxxxx000100xxxxxxxxxx", InstName.Sha1p_V, InstEmit.Sha1p_V, OpCodeSimdReg.Create);
+ SetA64("01011110000xxxxx001100xxxxxxxxxx", InstName.Sha1su0_V, InstEmit.Sha1su0_V, OpCodeSimdReg.Create);
+ SetA64("0101111000101000000110xxxxxxxxxx", InstName.Sha1su1_V, InstEmit.Sha1su1_V, OpCodeSimd.Create);
+ SetA64("01011110000xxxxx010000xxxxxxxxxx", InstName.Sha256h_V, InstEmit.Sha256h_V, OpCodeSimdReg.Create);
+ SetA64("01011110000xxxxx010100xxxxxxxxxx", InstName.Sha256h2_V, InstEmit.Sha256h2_V, OpCodeSimdReg.Create);
+ SetA64("0101111000101000001010xxxxxxxxxx", InstName.Sha256su0_V, InstEmit.Sha256su0_V, OpCodeSimd.Create);
+ SetA64("01011110000xxxxx011000xxxxxxxxxx", InstName.Sha256su1_V, InstEmit.Sha256su1_V, OpCodeSimdReg.Create);
+ SetA64("0x001110<<1xxxxx000001xxxxxxxxxx", InstName.Shadd_V, InstEmit.Shadd_V, OpCodeSimdReg.Create);
+ SetA64("0101111101xxxxxx010101xxxxxxxxxx", InstName.Shl_S, InstEmit.Shl_S, OpCodeSimdShImm.Create);
+ SetA64("0x00111100>>>xxx010101xxxxxxxxxx", InstName.Shl_V, InstEmit.Shl_V, OpCodeSimdShImm.Create);
+ SetA64("0100111101xxxxxx010101xxxxxxxxxx", InstName.Shl_V, InstEmit.Shl_V, OpCodeSimdShImm.Create);
+ SetA64("0x101110<<100001001110xxxxxxxxxx", InstName.Shll_V, InstEmit.Shll_V, OpCodeSimd.Create);
+ SetA64("0x00111100>>>xxx100001xxxxxxxxxx", InstName.Shrn_V, InstEmit.Shrn_V, OpCodeSimdShImm.Create);
+ SetA64("0x001110<<1xxxxx001001xxxxxxxxxx", InstName.Shsub_V, InstEmit.Shsub_V, OpCodeSimdReg.Create);
+ SetA64("0111111101xxxxxx010101xxxxxxxxxx", InstName.Sli_S, InstEmit.Sli_S, OpCodeSimdShImm.Create);
+ SetA64("0x10111100>>>xxx010101xxxxxxxxxx", InstName.Sli_V, InstEmit.Sli_V, OpCodeSimdShImm.Create);
+ SetA64("0110111101xxxxxx010101xxxxxxxxxx", InstName.Sli_V, InstEmit.Sli_V, OpCodeSimdShImm.Create);
+ SetA64("0x001110<<1xxxxx011001xxxxxxxxxx", InstName.Smax_V, InstEmit.Smax_V, OpCodeSimdReg.Create);
+ SetA64("0x001110<<1xxxxx101001xxxxxxxxxx", InstName.Smaxp_V, InstEmit.Smaxp_V, OpCodeSimdReg.Create);
+ SetA64("000011100x110000101010xxxxxxxxxx", InstName.Smaxv_V, InstEmit.Smaxv_V, OpCodeSimd.Create);
+ SetA64("01001110<<110000101010xxxxxxxxxx", InstName.Smaxv_V, InstEmit.Smaxv_V, OpCodeSimd.Create);
+ SetA64("0x001110<<1xxxxx011011xxxxxxxxxx", InstName.Smin_V, InstEmit.Smin_V, OpCodeSimdReg.Create);
+ SetA64("0x001110<<1xxxxx101011xxxxxxxxxx", InstName.Sminp_V, InstEmit.Sminp_V, OpCodeSimdReg.Create);
+ SetA64("000011100x110001101010xxxxxxxxxx", InstName.Sminv_V, InstEmit.Sminv_V, OpCodeSimd.Create);
+ SetA64("01001110<<110001101010xxxxxxxxxx", InstName.Sminv_V, InstEmit.Sminv_V, OpCodeSimd.Create);
+ SetA64("0x001110<<1xxxxx100000xxxxxxxxxx", InstName.Smlal_V, InstEmit.Smlal_V, OpCodeSimdReg.Create);
+ SetA64("0x001111xxxxxxxx0010x0xxxxxxxxxx", InstName.Smlal_Ve, InstEmit.Smlal_Ve, OpCodeSimdRegElem.Create);
+ SetA64("0x001110<<1xxxxx101000xxxxxxxxxx", InstName.Smlsl_V, InstEmit.Smlsl_V, OpCodeSimdReg.Create);
+ SetA64("0x001111xxxxxxxx0110x0xxxxxxxxxx", InstName.Smlsl_Ve, InstEmit.Smlsl_Ve, OpCodeSimdRegElem.Create);
+ SetA64("0x001110000xxxxx001011xxxxxxxxxx", InstName.Smov_S, InstEmit.Smov_S, OpCodeSimdIns.Create);
+ SetA64("0x001110<<1xxxxx110000xxxxxxxxxx", InstName.Smull_V, InstEmit.Smull_V, OpCodeSimdReg.Create);
+ SetA64("0x001111xxxxxxxx1010x0xxxxxxxxxx", InstName.Smull_Ve, InstEmit.Smull_Ve, OpCodeSimdRegElem.Create);
+ SetA64("01011110xx100000011110xxxxxxxxxx", InstName.Sqabs_S, InstEmit.Sqabs_S, OpCodeSimd.Create);
+ SetA64("0>001110<<100000011110xxxxxxxxxx", InstName.Sqabs_V, InstEmit.Sqabs_V, OpCodeSimd.Create);
+ SetA64("01011110xx1xxxxx000011xxxxxxxxxx", InstName.Sqadd_S, InstEmit.Sqadd_S, OpCodeSimdReg.Create);
+ SetA64("0>001110<<1xxxxx000011xxxxxxxxxx", InstName.Sqadd_V, InstEmit.Sqadd_V, OpCodeSimdReg.Create);
+ SetA64("01011110011xxxxx101101xxxxxxxxxx", InstName.Sqdmulh_S, InstEmit.Sqdmulh_S, OpCodeSimdReg.Create);
+ SetA64("01011110101xxxxx101101xxxxxxxxxx", InstName.Sqdmulh_S, InstEmit.Sqdmulh_S, OpCodeSimdReg.Create);
+ SetA64("0x001110011xxxxx101101xxxxxxxxxx", InstName.Sqdmulh_V, InstEmit.Sqdmulh_V, OpCodeSimdReg.Create);
+ SetA64("0x001110101xxxxx101101xxxxxxxxxx", InstName.Sqdmulh_V, InstEmit.Sqdmulh_V, OpCodeSimdReg.Create);
+ SetA64("0x00111101xxxxxx1100x0xxxxxxxxxx", InstName.Sqdmulh_Ve, InstEmit.Sqdmulh_Ve, OpCodeSimdRegElem.Create);
+ SetA64("0x00111110xxxxxx1100x0xxxxxxxxxx", InstName.Sqdmulh_Ve, InstEmit.Sqdmulh_Ve, OpCodeSimdRegElem.Create);
+ SetA64("01111110xx100000011110xxxxxxxxxx", InstName.Sqneg_S, InstEmit.Sqneg_S, OpCodeSimd.Create);
+ SetA64("0>101110<<100000011110xxxxxxxxxx", InstName.Sqneg_V, InstEmit.Sqneg_V, OpCodeSimd.Create);
+ SetA64("01111110011xxxxx101101xxxxxxxxxx", InstName.Sqrdmulh_S, InstEmit.Sqrdmulh_S, OpCodeSimdReg.Create);
+ SetA64("01111110101xxxxx101101xxxxxxxxxx", InstName.Sqrdmulh_S, InstEmit.Sqrdmulh_S, OpCodeSimdReg.Create);
+ SetA64("0x101110011xxxxx101101xxxxxxxxxx", InstName.Sqrdmulh_V, InstEmit.Sqrdmulh_V, OpCodeSimdReg.Create);
+ SetA64("0x101110101xxxxx101101xxxxxxxxxx", InstName.Sqrdmulh_V, InstEmit.Sqrdmulh_V, OpCodeSimdReg.Create);
+ SetA64("0x00111101xxxxxx1101x0xxxxxxxxxx", InstName.Sqrdmulh_Ve, InstEmit.Sqrdmulh_Ve, OpCodeSimdRegElem.Create);
+ SetA64("0x00111110xxxxxx1101x0xxxxxxxxxx", InstName.Sqrdmulh_Ve, InstEmit.Sqrdmulh_Ve, OpCodeSimdRegElem.Create);
+ SetA64("0>001110<<1xxxxx010111xxxxxxxxxx", InstName.Sqrshl_V, InstEmit.Sqrshl_V, OpCodeSimdReg.Create);
+ SetA64("0101111100>>>xxx100111xxxxxxxxxx", InstName.Sqrshrn_S, InstEmit.Sqrshrn_S, OpCodeSimdShImm.Create);
+ SetA64("0x00111100>>>xxx100111xxxxxxxxxx", InstName.Sqrshrn_V, InstEmit.Sqrshrn_V, OpCodeSimdShImm.Create);
+ SetA64("0111111100>>>xxx100011xxxxxxxxxx", InstName.Sqrshrun_S, InstEmit.Sqrshrun_S, OpCodeSimdShImm.Create);
+ SetA64("0x10111100>>>xxx100011xxxxxxxxxx", InstName.Sqrshrun_V, InstEmit.Sqrshrun_V, OpCodeSimdShImm.Create);
+ SetA64("0>001110<<1xxxxx010011xxxxxxxxxx", InstName.Sqshl_V, InstEmit.Sqshl_V, OpCodeSimdReg.Create);
+ SetA64("0101111100>>>xxx100101xxxxxxxxxx", InstName.Sqshrn_S, InstEmit.Sqshrn_S, OpCodeSimdShImm.Create);
+ SetA64("0x00111100>>>xxx100101xxxxxxxxxx", InstName.Sqshrn_V, InstEmit.Sqshrn_V, OpCodeSimdShImm.Create);
+ SetA64("0111111100>>>xxx100001xxxxxxxxxx", InstName.Sqshrun_S, InstEmit.Sqshrun_S, OpCodeSimdShImm.Create);
+ SetA64("0x10111100>>>xxx100001xxxxxxxxxx", InstName.Sqshrun_V, InstEmit.Sqshrun_V, OpCodeSimdShImm.Create);
+ SetA64("01011110xx1xxxxx001011xxxxxxxxxx", InstName.Sqsub_S, InstEmit.Sqsub_S, OpCodeSimdReg.Create);
+ SetA64("0>001110<<1xxxxx001011xxxxxxxxxx", InstName.Sqsub_V, InstEmit.Sqsub_V, OpCodeSimdReg.Create);
+ SetA64("01011110<<100001010010xxxxxxxxxx", InstName.Sqxtn_S, InstEmit.Sqxtn_S, OpCodeSimd.Create);
+ SetA64("0x001110<<100001010010xxxxxxxxxx", InstName.Sqxtn_V, InstEmit.Sqxtn_V, OpCodeSimd.Create);
+ SetA64("01111110<<100001001010xxxxxxxxxx", InstName.Sqxtun_S, InstEmit.Sqxtun_S, OpCodeSimd.Create);
+ SetA64("0x101110<<100001001010xxxxxxxxxx", InstName.Sqxtun_V, InstEmit.Sqxtun_V, OpCodeSimd.Create);
+ SetA64("0x001110<<1xxxxx000101xxxxxxxxxx", InstName.Srhadd_V, InstEmit.Srhadd_V, OpCodeSimdReg.Create);
+ SetA64("0111111101xxxxxx010001xxxxxxxxxx", InstName.Sri_S, InstEmit.Sri_S, OpCodeSimdShImm.Create);
+ SetA64("0x10111100>>>xxx010001xxxxxxxxxx", InstName.Sri_V, InstEmit.Sri_V, OpCodeSimdShImm.Create);
+ SetA64("0110111101xxxxxx010001xxxxxxxxxx", InstName.Sri_V, InstEmit.Sri_V, OpCodeSimdShImm.Create);
+ SetA64("0>001110<<1xxxxx010101xxxxxxxxxx", InstName.Srshl_V, InstEmit.Srshl_V, OpCodeSimdReg.Create);
+ SetA64("0101111101xxxxxx001001xxxxxxxxxx", InstName.Srshr_S, InstEmit.Srshr_S, OpCodeSimdShImm.Create);
+ SetA64("0x00111100>>>xxx001001xxxxxxxxxx", InstName.Srshr_V, InstEmit.Srshr_V, OpCodeSimdShImm.Create);
+ SetA64("0100111101xxxxxx001001xxxxxxxxxx", InstName.Srshr_V, InstEmit.Srshr_V, OpCodeSimdShImm.Create);
+ SetA64("0101111101xxxxxx001101xxxxxxxxxx", InstName.Srsra_S, InstEmit.Srsra_S, OpCodeSimdShImm.Create);
+ SetA64("0x00111100>>>xxx001101xxxxxxxxxx", InstName.Srsra_V, InstEmit.Srsra_V, OpCodeSimdShImm.Create);
+ SetA64("0100111101xxxxxx001101xxxxxxxxxx", InstName.Srsra_V, InstEmit.Srsra_V, OpCodeSimdShImm.Create);
+ SetA64("01011110111xxxxx010001xxxxxxxxxx", InstName.Sshl_S, InstEmit.Sshl_S, OpCodeSimdReg.Create);
+ SetA64("0>001110<<1xxxxx010001xxxxxxxxxx", InstName.Sshl_V, InstEmit.Sshl_V, OpCodeSimdReg.Create);
+ SetA64("0x00111100>>>xxx101001xxxxxxxxxx", InstName.Sshll_V, InstEmit.Sshll_V, OpCodeSimdShImm.Create);
+ SetA64("0101111101xxxxxx000001xxxxxxxxxx", InstName.Sshr_S, InstEmit.Sshr_S, OpCodeSimdShImm.Create);
+ SetA64("0x00111100>>>xxx000001xxxxxxxxxx", InstName.Sshr_V, InstEmit.Sshr_V, OpCodeSimdShImm.Create);
+ SetA64("0100111101xxxxxx000001xxxxxxxxxx", InstName.Sshr_V, InstEmit.Sshr_V, OpCodeSimdShImm.Create);
+ SetA64("0101111101xxxxxx000101xxxxxxxxxx", InstName.Ssra_S, InstEmit.Ssra_S, OpCodeSimdShImm.Create);
+ SetA64("0x00111100>>>xxx000101xxxxxxxxxx", InstName.Ssra_V, InstEmit.Ssra_V, OpCodeSimdShImm.Create);
+ SetA64("0100111101xxxxxx000101xxxxxxxxxx", InstName.Ssra_V, InstEmit.Ssra_V, OpCodeSimdShImm.Create);
+ SetA64("0x001110<<1xxxxx001000xxxxxxxxxx", InstName.Ssubl_V, InstEmit.Ssubl_V, OpCodeSimdReg.Create);
+ SetA64("0x001110<<1xxxxx001100xxxxxxxxxx", InstName.Ssubw_V, InstEmit.Ssubw_V, OpCodeSimdReg.Create);
+ SetA64("0x00110000000000xxxxxxxxxxxxxxxx", InstName.St__Vms, InstEmit.St__Vms, OpCodeSimdMemMs.Create);
+ SetA64("0x001100100xxxxxxxxxxxxxxxxxxxxx", InstName.St__Vms, InstEmit.St__Vms, OpCodeSimdMemMs.Create);
+ SetA64("0x00110100x00000xxxxxxxxxxxxxxxx", InstName.St__Vss, InstEmit.St__Vss, OpCodeSimdMemSs.Create);
+ SetA64("0x00110110xxxxxxxxxxxxxxxxxxxxxx", InstName.St__Vss, InstEmit.St__Vss, OpCodeSimdMemSs.Create);
+ SetA64("<<10110xx0xxxxxxxxxxxxxxxxxxxxxx", InstName.Stp, InstEmit.Stp, OpCodeSimdMemPair.Create);
+ SetA64("xx111100x00xxxxxxxxx00xxxxxxxxxx", InstName.Str, InstEmit.Str, OpCodeSimdMemImm.Create);
+ SetA64("xx111100x00xxxxxxxxx01xxxxxxxxxx", InstName.Str, InstEmit.Str, OpCodeSimdMemImm.Create);
+ SetA64("xx111100x00xxxxxxxxx11xxxxxxxxxx", InstName.Str, InstEmit.Str, OpCodeSimdMemImm.Create);
+ SetA64("xx111101x0xxxxxxxxxxxxxxxxxxxxxx", InstName.Str, InstEmit.Str, OpCodeSimdMemImm.Create);
+ SetA64("xx111100x01xxxxxx1xx10xxxxxxxxxx", InstName.Str, InstEmit.Str, OpCodeSimdMemReg.Create);
+ SetA64("01111110111xxxxx100001xxxxxxxxxx", InstName.Sub_S, InstEmit.Sub_S, OpCodeSimdReg.Create);
+ SetA64("0>101110<<1xxxxx100001xxxxxxxxxx", InstName.Sub_V, InstEmit.Sub_V, OpCodeSimdReg.Create);
+ SetA64("0x001110<<1xxxxx011000xxxxxxxxxx", InstName.Subhn_V, InstEmit.Subhn_V, OpCodeSimdReg.Create);
+ SetA64("01011110xx100000001110xxxxxxxxxx", InstName.Suqadd_S, InstEmit.Suqadd_S, OpCodeSimd.Create);
+ SetA64("0>001110<<100000001110xxxxxxxxxx", InstName.Suqadd_V, InstEmit.Suqadd_V, OpCodeSimd.Create);
+ SetA64("0x001110000xxxxx0xx000xxxxxxxxxx", InstName.Tbl_V, InstEmit.Tbl_V, OpCodeSimdTbl.Create);
+ SetA64("0x001110000xxxxx0xx100xxxxxxxxxx", InstName.Tbx_V, InstEmit.Tbx_V, OpCodeSimdTbl.Create);
+ SetA64("0>001110<<0xxxxx001010xxxxxxxxxx", InstName.Trn1_V, InstEmit.Trn1_V, OpCodeSimdReg.Create);
+ SetA64("0>001110<<0xxxxx011010xxxxxxxxxx", InstName.Trn2_V, InstEmit.Trn2_V, OpCodeSimdReg.Create);
+ SetA64("0x101110<<1xxxxx011111xxxxxxxxxx", InstName.Uaba_V, InstEmit.Uaba_V, OpCodeSimdReg.Create);
+ SetA64("0x101110<<1xxxxx010100xxxxxxxxxx", InstName.Uabal_V, InstEmit.Uabal_V, OpCodeSimdReg.Create);
+ SetA64("0x101110<<1xxxxx011101xxxxxxxxxx", InstName.Uabd_V, InstEmit.Uabd_V, OpCodeSimdReg.Create);
+ SetA64("0x101110<<1xxxxx011100xxxxxxxxxx", InstName.Uabdl_V, InstEmit.Uabdl_V, OpCodeSimdReg.Create);
+ SetA64("0x101110<<100000011010xxxxxxxxxx", InstName.Uadalp_V, InstEmit.Uadalp_V, OpCodeSimd.Create);
+ SetA64("0x101110<<1xxxxx000000xxxxxxxxxx", InstName.Uaddl_V, InstEmit.Uaddl_V, OpCodeSimdReg.Create);
+ SetA64("0x101110<<100000001010xxxxxxxxxx", InstName.Uaddlp_V, InstEmit.Uaddlp_V, OpCodeSimd.Create);
+ SetA64("001011100x110000001110xxxxxxxxxx", InstName.Uaddlv_V, InstEmit.Uaddlv_V, OpCodeSimd.Create);
+ SetA64("01101110<<110000001110xxxxxxxxxx", InstName.Uaddlv_V, InstEmit.Uaddlv_V, OpCodeSimd.Create);
+ SetA64("0x101110<<1xxxxx000100xxxxxxxxxx", InstName.Uaddw_V, InstEmit.Uaddw_V, OpCodeSimdReg.Create);
+ SetA64("x00111100x100011000000xxxxxxxxxx", InstName.Ucvtf_Gp, InstEmit.Ucvtf_Gp, OpCodeSimdCvt.Create);
+ SetA64(">00111100x000011>xxxxxxxxxxxxxxx", InstName.Ucvtf_Gp_Fixed, InstEmit.Ucvtf_Gp_Fixed, OpCodeSimdCvt.Create);
+ SetA64("011111100x100001110110xxxxxxxxxx", InstName.Ucvtf_S, InstEmit.Ucvtf_S, OpCodeSimd.Create);
+ SetA64("011111110>>xxxxx111001xxxxxxxxxx", InstName.Ucvtf_S_Fixed, InstEmit.Ucvtf_S_Fixed, OpCodeSimdShImm.Create);
+ SetA64("0>1011100<100001110110xxxxxxxxxx", InstName.Ucvtf_V, InstEmit.Ucvtf_V, OpCodeSimd.Create);
+ SetA64("0x101111001xxxxx111001xxxxxxxxxx", InstName.Ucvtf_V_Fixed, InstEmit.Ucvtf_V_Fixed, OpCodeSimdShImm.Create);
+ SetA64("0110111101xxxxxx111001xxxxxxxxxx", InstName.Ucvtf_V_Fixed, InstEmit.Ucvtf_V_Fixed, OpCodeSimdShImm.Create);
+ SetA64("0x101110<<1xxxxx000001xxxxxxxxxx", InstName.Uhadd_V, InstEmit.Uhadd_V, OpCodeSimdReg.Create);
+ SetA64("0x101110<<1xxxxx001001xxxxxxxxxx", InstName.Uhsub_V, InstEmit.Uhsub_V, OpCodeSimdReg.Create);
+ SetA64("0x101110<<1xxxxx011001xxxxxxxxxx", InstName.Umax_V, InstEmit.Umax_V, OpCodeSimdReg.Create);
+ SetA64("0x101110<<1xxxxx101001xxxxxxxxxx", InstName.Umaxp_V, InstEmit.Umaxp_V, OpCodeSimdReg.Create);
+ SetA64("001011100x110000101010xxxxxxxxxx", InstName.Umaxv_V, InstEmit.Umaxv_V, OpCodeSimd.Create);
+ SetA64("01101110<<110000101010xxxxxxxxxx", InstName.Umaxv_V, InstEmit.Umaxv_V, OpCodeSimd.Create);
+ SetA64("0x101110<<1xxxxx011011xxxxxxxxxx", InstName.Umin_V, InstEmit.Umin_V, OpCodeSimdReg.Create);
+ SetA64("0x101110<<1xxxxx101011xxxxxxxxxx", InstName.Uminp_V, InstEmit.Uminp_V, OpCodeSimdReg.Create);
+ SetA64("001011100x110001101010xxxxxxxxxx", InstName.Uminv_V, InstEmit.Uminv_V, OpCodeSimd.Create);
+ SetA64("01101110<<110001101010xxxxxxxxxx", InstName.Uminv_V, InstEmit.Uminv_V, OpCodeSimd.Create);
+ SetA64("0x101110<<1xxxxx100000xxxxxxxxxx", InstName.Umlal_V, InstEmit.Umlal_V, OpCodeSimdReg.Create);
+ SetA64("0x101111xxxxxxxx0010x0xxxxxxxxxx", InstName.Umlal_Ve, InstEmit.Umlal_Ve, OpCodeSimdRegElem.Create);
+ SetA64("0x101110<<1xxxxx101000xxxxxxxxxx", InstName.Umlsl_V, InstEmit.Umlsl_V, OpCodeSimdReg.Create);
+ SetA64("0x101111xxxxxxxx0110x0xxxxxxxxxx", InstName.Umlsl_Ve, InstEmit.Umlsl_Ve, OpCodeSimdRegElem.Create);
+ SetA64("0x001110000xxxxx001111xxxxxxxxxx", InstName.Umov_S, InstEmit.Umov_S, OpCodeSimdIns.Create);
+ SetA64("0x101110<<1xxxxx110000xxxxxxxxxx", InstName.Umull_V, InstEmit.Umull_V, OpCodeSimdReg.Create);
+ SetA64("0x101111xxxxxxxx1010x0xxxxxxxxxx", InstName.Umull_Ve, InstEmit.Umull_Ve, OpCodeSimdRegElem.Create);
+ SetA64("01111110xx1xxxxx000011xxxxxxxxxx", InstName.Uqadd_S, InstEmit.Uqadd_S, OpCodeSimdReg.Create);
+ SetA64("0>101110<<1xxxxx000011xxxxxxxxxx", InstName.Uqadd_V, InstEmit.Uqadd_V, OpCodeSimdReg.Create);
+ SetA64("0>101110<<1xxxxx010111xxxxxxxxxx", InstName.Uqrshl_V, InstEmit.Uqrshl_V, OpCodeSimdReg.Create);
+ SetA64("0111111100>>>xxx100111xxxxxxxxxx", InstName.Uqrshrn_S, InstEmit.Uqrshrn_S, OpCodeSimdShImm.Create);
+ SetA64("0x10111100>>>xxx100111xxxxxxxxxx", InstName.Uqrshrn_V, InstEmit.Uqrshrn_V, OpCodeSimdShImm.Create);
+ SetA64("0>101110<<1xxxxx010011xxxxxxxxxx", InstName.Uqshl_V, InstEmit.Uqshl_V, OpCodeSimdReg.Create);
+ SetA64("0111111100>>>xxx100101xxxxxxxxxx", InstName.Uqshrn_S, InstEmit.Uqshrn_S, OpCodeSimdShImm.Create);
+ SetA64("0x10111100>>>xxx100101xxxxxxxxxx", InstName.Uqshrn_V, InstEmit.Uqshrn_V, OpCodeSimdShImm.Create);
+ SetA64("01111110xx1xxxxx001011xxxxxxxxxx", InstName.Uqsub_S, InstEmit.Uqsub_S, OpCodeSimdReg.Create);
+ SetA64("0>101110<<1xxxxx001011xxxxxxxxxx", InstName.Uqsub_V, InstEmit.Uqsub_V, OpCodeSimdReg.Create);
+ SetA64("01111110<<100001010010xxxxxxxxxx", InstName.Uqxtn_S, InstEmit.Uqxtn_S, OpCodeSimd.Create);
+ SetA64("0x101110<<100001010010xxxxxxxxxx", InstName.Uqxtn_V, InstEmit.Uqxtn_V, OpCodeSimd.Create);
+ SetA64("0x101110<<1xxxxx000101xxxxxxxxxx", InstName.Urhadd_V, InstEmit.Urhadd_V, OpCodeSimdReg.Create);
+ SetA64("0>101110<<1xxxxx010101xxxxxxxxxx", InstName.Urshl_V, InstEmit.Urshl_V, OpCodeSimdReg.Create);
+ SetA64("0111111101xxxxxx001001xxxxxxxxxx", InstName.Urshr_S, InstEmit.Urshr_S, OpCodeSimdShImm.Create);
+ SetA64("0x10111100>>>xxx001001xxxxxxxxxx", InstName.Urshr_V, InstEmit.Urshr_V, OpCodeSimdShImm.Create);
+ SetA64("0110111101xxxxxx001001xxxxxxxxxx", InstName.Urshr_V, InstEmit.Urshr_V, OpCodeSimdShImm.Create);
+ SetA64("0111111101xxxxxx001101xxxxxxxxxx", InstName.Ursra_S, InstEmit.Ursra_S, OpCodeSimdShImm.Create);
+ SetA64("0x10111100>>>xxx001101xxxxxxxxxx", InstName.Ursra_V, InstEmit.Ursra_V, OpCodeSimdShImm.Create);
+ SetA64("0110111101xxxxxx001101xxxxxxxxxx", InstName.Ursra_V, InstEmit.Ursra_V, OpCodeSimdShImm.Create);
+ SetA64("01111110111xxxxx010001xxxxxxxxxx", InstName.Ushl_S, InstEmit.Ushl_S, OpCodeSimdReg.Create);
+ SetA64("0>101110<<1xxxxx010001xxxxxxxxxx", InstName.Ushl_V, InstEmit.Ushl_V, OpCodeSimdReg.Create);
+ SetA64("0x10111100>>>xxx101001xxxxxxxxxx", InstName.Ushll_V, InstEmit.Ushll_V, OpCodeSimdShImm.Create);
+ SetA64("0111111101xxxxxx000001xxxxxxxxxx", InstName.Ushr_S, InstEmit.Ushr_S, OpCodeSimdShImm.Create);
+ SetA64("0x10111100>>>xxx000001xxxxxxxxxx", InstName.Ushr_V, InstEmit.Ushr_V, OpCodeSimdShImm.Create);
+ SetA64("0110111101xxxxxx000001xxxxxxxxxx", InstName.Ushr_V, InstEmit.Ushr_V, OpCodeSimdShImm.Create);
+ SetA64("01111110xx100000001110xxxxxxxxxx", InstName.Usqadd_S, InstEmit.Usqadd_S, OpCodeSimd.Create);
+ SetA64("0>101110<<100000001110xxxxxxxxxx", InstName.Usqadd_V, InstEmit.Usqadd_V, OpCodeSimd.Create);
+ SetA64("0111111101xxxxxx000101xxxxxxxxxx", InstName.Usra_S, InstEmit.Usra_S, OpCodeSimdShImm.Create);
+ SetA64("0x10111100>>>xxx000101xxxxxxxxxx", InstName.Usra_V, InstEmit.Usra_V, OpCodeSimdShImm.Create);
+ SetA64("0110111101xxxxxx000101xxxxxxxxxx", InstName.Usra_V, InstEmit.Usra_V, OpCodeSimdShImm.Create);
+ SetA64("0x101110<<1xxxxx001000xxxxxxxxxx", InstName.Usubl_V, InstEmit.Usubl_V, OpCodeSimdReg.Create);
+ SetA64("0x101110<<1xxxxx001100xxxxxxxxxx", InstName.Usubw_V, InstEmit.Usubw_V, OpCodeSimdReg.Create);
+ SetA64("0>001110<<0xxxxx000110xxxxxxxxxx", InstName.Uzp1_V, InstEmit.Uzp1_V, OpCodeSimdReg.Create);
+ SetA64("0>001110<<0xxxxx010110xxxxxxxxxx", InstName.Uzp2_V, InstEmit.Uzp2_V, OpCodeSimdReg.Create);
+ SetA64("0x001110<<100001001010xxxxxxxxxx", InstName.Xtn_V, InstEmit.Xtn_V, OpCodeSimd.Create);
+ SetA64("0>001110<<0xxxxx001110xxxxxxxxxx", InstName.Zip1_V, InstEmit.Zip1_V, OpCodeSimdReg.Create);
+ SetA64("0>001110<<0xxxxx011110xxxxxxxxxx", InstName.Zip2_V, InstEmit.Zip2_V, OpCodeSimdReg.Create);
+#endregion
+
+#region "OpCode Table (AArch32, A32)"
+ // Base
+ SetA32("<<<<0010101xxxxxxxxxxxxxxxxxxxxx", InstName.Adc, InstEmit32.Adc, OpCode32AluImm.Create);
+ SetA32("<<<<0000101xxxxxxxxxxxxxxxx0xxxx", InstName.Adc, InstEmit32.Adc, OpCode32AluRsImm.Create);
+ SetA32("<<<<0000101xxxxxxxxxxxxx0xx1xxxx", InstName.Adc, InstEmit32.Adc, OpCode32AluRsReg.Create);
+ SetA32("<<<<0010100xxxxxxxxxxxxxxxxxxxxx", InstName.Add, InstEmit32.Add, OpCode32AluImm.Create);
+ SetA32("<<<<0000100xxxxxxxxxxxxxxxx0xxxx", InstName.Add, InstEmit32.Add, OpCode32AluRsImm.Create);
+ SetA32("<<<<0000100xxxxxxxxxxxxx0xx1xxxx", InstName.Add, InstEmit32.Add, OpCode32AluRsReg.Create);
+ SetA32("<<<<0010000xxxxxxxxxxxxxxxxxxxxx", InstName.And, InstEmit32.And, OpCode32AluImm.Create);
+ SetA32("<<<<0000000xxxxxxxxxxxxxxxx0xxxx", InstName.And, InstEmit32.And, OpCode32AluRsImm.Create);
+ SetA32("<<<<0000000xxxxxxxxxxxxx0xx1xxxx", InstName.And, InstEmit32.And, OpCode32AluRsReg.Create);
+ SetA32("<<<<1010xxxxxxxxxxxxxxxxxxxxxxxx", InstName.B, InstEmit32.B, OpCode32BImm.Create);
+ SetA32("<<<<0111110xxxxxxxxxxxxxx0011111", InstName.Bfc, InstEmit32.Bfc, OpCode32AluBf.Create);
+ SetA32("<<<<0111110xxxxxxxxxxxxxx001xxxx", InstName.Bfi, InstEmit32.Bfi, OpCode32AluBf.Create);
+ SetA32("<<<<0011110xxxxxxxxxxxxxxxxxxxxx", InstName.Bic, InstEmit32.Bic, OpCode32AluImm.Create);
+ SetA32("<<<<0001110xxxxxxxxxxxxxxxx0xxxx", InstName.Bic, InstEmit32.Bic, OpCode32AluRsImm.Create);
+ SetA32("<<<<0001110xxxxxxxxxxxxx0xx1xxxx", InstName.Bic, InstEmit32.Bic, OpCode32AluRsReg.Create);
+ SetA32("<<<<1011xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Bl, InstEmit32.Bl, OpCode32BImm.Create);
+ SetA32("1111101xxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Blx, InstEmit32.Blx, OpCode32BImm.Create);
+ SetA32("<<<<000100101111111111110011xxxx", InstName.Blx, InstEmit32.Blxr, OpCode32BReg.Create);
+ SetA32("<<<<000100101111111111110001xxxx", InstName.Bx, InstEmit32.Bx, OpCode32BReg.Create);
+ SetA32("11110101011111111111000000011111", InstName.Clrex, InstEmit32.Clrex, OpCode32.Create);
+ SetA32("<<<<000101101111xxxx11110001xxxx", InstName.Clz, InstEmit32.Clz, OpCode32AluReg.Create);
+ SetA32("<<<<00110111xxxx0000xxxxxxxxxxxx", InstName.Cmn, InstEmit32.Cmn, OpCode32AluImm.Create);
+ SetA32("<<<<00010111xxxx0000xxxxxxx0xxxx", InstName.Cmn, InstEmit32.Cmn, OpCode32AluRsImm.Create);
+ SetA32("<<<<00010111xxxx0000xxxx0xx1xxxx", InstName.Cmn, InstEmit32.Cmn, OpCode32AluRsReg.Create);
+ SetA32("<<<<00110101xxxx0000xxxxxxxxxxxx", InstName.Cmp, InstEmit32.Cmp, OpCode32AluImm.Create);
+ SetA32("<<<<00010101xxxx0000xxxxxxx0xxxx", InstName.Cmp, InstEmit32.Cmp, OpCode32AluRsImm.Create);
+ SetA32("<<<<00010101xxxx0000xxxx0xx1xxxx", InstName.Cmp, InstEmit32.Cmp, OpCode32AluRsReg.Create);
+ SetA32("<<<<00010000xxxxxxxx00000100xxxx", InstName.Crc32b, InstEmit32.Crc32b, OpCode32AluReg.Create);
+ SetA32("<<<<00010000xxxxxxxx00100100xxxx", InstName.Crc32cb, InstEmit32.Crc32cb, OpCode32AluReg.Create);
+ SetA32("<<<<00010010xxxxxxxx00100100xxxx", InstName.Crc32ch, InstEmit32.Crc32ch, OpCode32AluReg.Create);
+ SetA32("<<<<00010100xxxxxxxx00100100xxxx", InstName.Crc32cw, InstEmit32.Crc32cw, OpCode32AluReg.Create);
+ SetA32("<<<<00010010xxxxxxxx00000100xxxx", InstName.Crc32h, InstEmit32.Crc32h, OpCode32AluReg.Create);
+ SetA32("<<<<00010100xxxxxxxx00000100xxxx", InstName.Crc32w, InstEmit32.Crc32w, OpCode32AluReg.Create);
+ SetA32("<<<<0011001000001111000000010100", InstName.Csdb, InstEmit32.Csdb, OpCode32.Create);
+ SetA32("1111010101111111111100000101xxxx", InstName.Dmb, InstEmit32.Dmb, OpCode32.Create);
+ SetA32("1111010101111111111100000100xxxx", InstName.Dsb, InstEmit32.Dsb, OpCode32.Create);
+ SetA32("<<<<0010001xxxxxxxxxxxxxxxxxxxxx", InstName.Eor, InstEmit32.Eor, OpCode32AluImm.Create);
+ SetA32("<<<<0000001xxxxxxxxxxxxxxxx0xxxx", InstName.Eor, InstEmit32.Eor, OpCode32AluRsImm.Create);
+ SetA32("<<<<0000001xxxxxxxxxxxxx0xx1xxxx", InstName.Eor, InstEmit32.Eor, OpCode32AluRsReg.Create);
+ SetA32("<<<<0011001000001111000000010000", InstName.Esb, InstEmit32.Nop, OpCode32.Create); // Error Synchronization Barrier (FEAT_RAS)
+ SetA32("<<<<001100100000111100000000011x", InstName.Hint, InstEmit32.Nop, OpCode32.Create); // Reserved Hint
+ SetA32("<<<<0011001000001111000000001xxx", InstName.Hint, InstEmit32.Nop, OpCode32.Create); // Reserved Hint
+ SetA32("<<<<0011001000001111000000010001", InstName.Hint, InstEmit32.Nop, OpCode32.Create); // Reserved Hint
+ SetA32("<<<<0011001000001111000000010011", InstName.Hint, InstEmit32.Nop, OpCode32.Create); // Reserved Hint
+ SetA32("<<<<0011001000001111000000010101", InstName.Hint, InstEmit32.Nop, OpCode32.Create); // Reserved Hint
+ SetA32("<<<<001100100000111100000001011x", InstName.Hint, InstEmit32.Nop, OpCode32.Create); // Reserved Hint
+ SetA32("<<<<0011001000001111000000011xxx", InstName.Hint, InstEmit32.Nop, OpCode32.Create); // Reserved Hint
+ SetA32("<<<<00110010000011110000001xxxxx", InstName.Hint, InstEmit32.Nop, OpCode32.Create); // Reserved Hint
+ SetA32("<<<<0011001000001111000001xxxxxx", InstName.Hint, InstEmit32.Nop, OpCode32.Create); // Reserved Hint
+ SetA32("<<<<001100100000111100001xxxxxxx", InstName.Hint, InstEmit32.Nop, OpCode32.Create); // Reserved Hint
+ SetA32("1111010101111111111100000110xxxx", InstName.Isb, InstEmit32.Nop, OpCode32.Create);
+ SetA32("<<<<00011001xxxxxxxx110010011111", InstName.Lda, InstEmit32.Lda, OpCode32MemLdEx.Create);
+ SetA32("<<<<00011101xxxxxxxx110010011111", InstName.Ldab, InstEmit32.Ldab, OpCode32MemLdEx.Create);
+ SetA32("<<<<00011001xxxxxxxx111010011111", InstName.Ldaex, InstEmit32.Ldaex, OpCode32MemLdEx.Create);
+ SetA32("<<<<00011101xxxxxxxx111010011111", InstName.Ldaexb, InstEmit32.Ldaexb, OpCode32MemLdEx.Create);
+ SetA32("<<<<00011011xxxxxxxx111010011111", InstName.Ldaexd, InstEmit32.Ldaexd, OpCode32MemLdEx.Create);
+ SetA32("<<<<00011111xxxxxxxx111010011111", InstName.Ldaexh, InstEmit32.Ldaexh, OpCode32MemLdEx.Create);
+ SetA32("<<<<00011111xxxxxxxx110010011111", InstName.Ldah, InstEmit32.Ldah, OpCode32MemLdEx.Create);
+ SetA32("<<<<100xx0x1xxxxxxxxxxxxxxxxxxxx", InstName.Ldm, InstEmit32.Ldm, OpCode32MemMult.Create);
+ SetA32("<<<<010xx0x1xxxxxxxxxxxxxxxxxxxx", InstName.Ldr, InstEmit32.Ldr, OpCode32MemImm.Create);
+ SetA32("<<<<011xx0x1xxxxxxxxxxxxxxx0xxxx", InstName.Ldr, InstEmit32.Ldr, OpCode32MemRsImm.Create);
+ SetA32("<<<<010xx1x1xxxxxxxxxxxxxxxxxxxx", InstName.Ldrb, InstEmit32.Ldrb, OpCode32MemImm.Create);
+ SetA32("<<<<011xx1x1xxxxxxxxxxxxxxx0xxxx", InstName.Ldrb, InstEmit32.Ldrb, OpCode32MemRsImm.Create);
+ SetA32("<<<<000xx1x0xxxxxxxxxxxx1101xxxx", InstName.Ldrd, InstEmit32.Ldrd, OpCode32MemImm8.Create);
+ SetA32("<<<<000xx0x0xxxxxxxx00001101xxxx", InstName.Ldrd, InstEmit32.Ldrd, OpCode32MemReg.Create);
+ SetA32("<<<<00011001xxxxxxxx111110011111", InstName.Ldrex, InstEmit32.Ldrex, OpCode32MemLdEx.Create);
+ SetA32("<<<<00011101xxxxxxxx111110011111", InstName.Ldrexb, InstEmit32.Ldrexb, OpCode32MemLdEx.Create);
+ SetA32("<<<<00011011xxxxxxxx111110011111", InstName.Ldrexd, InstEmit32.Ldrexd, OpCode32MemLdEx.Create);
+ SetA32("<<<<00011111xxxxxxxx111110011111", InstName.Ldrexh, InstEmit32.Ldrexh, OpCode32MemLdEx.Create);
+ SetA32("<<<<000xx1x1xxxxxxxxxxxx1011xxxx", InstName.Ldrh, InstEmit32.Ldrh, OpCode32MemImm8.Create);
+ SetA32("<<<<000xx0x1xxxxxxxx00001011xxxx", InstName.Ldrh, InstEmit32.Ldrh, OpCode32MemReg.Create);
+ SetA32("<<<<000xx1x1xxxxxxxxxxxx1101xxxx", InstName.Ldrsb, InstEmit32.Ldrsb, OpCode32MemImm8.Create);
+ SetA32("<<<<000xx0x1xxxxxxxx00001101xxxx", InstName.Ldrsb, InstEmit32.Ldrsb, OpCode32MemReg.Create);
+ SetA32("<<<<000xx1x1xxxxxxxxxxxx1111xxxx", InstName.Ldrsh, InstEmit32.Ldrsh, OpCode32MemImm8.Create);
+ SetA32("<<<<000xx0x1xxxxxxxx00001111xxxx", InstName.Ldrsh, InstEmit32.Ldrsh, OpCode32MemReg.Create);
+ SetA32("<<<<1110xxx0xxxxxxxx111xxxx1xxxx", InstName.Mcr, InstEmit32.Mcr, OpCode32System.Create);
+ SetA32("<<<<0000001xxxxxxxxxxxxx1001xxxx", InstName.Mla, InstEmit32.Mla, OpCode32AluMla.Create);
+ SetA32("<<<<00000110xxxxxxxxxxxx1001xxxx", InstName.Mls, InstEmit32.Mls, OpCode32AluMla.Create);
+ SetA32("<<<<0011101x0000xxxxxxxxxxxxxxxx", InstName.Mov, InstEmit32.Mov, OpCode32AluImm.Create);
+ SetA32("<<<<0001101x0000xxxxxxxxxxx0xxxx", InstName.Mov, InstEmit32.Mov, OpCode32AluRsImm.Create);
+ SetA32("<<<<0001101x0000xxxxxxxx0xx1xxxx", InstName.Mov, InstEmit32.Mov, OpCode32AluRsReg.Create);
+ SetA32("<<<<00110000xxxxxxxxxxxxxxxxxxxx", InstName.Mov, InstEmit32.Mov, OpCode32AluImm16.Create);
+ SetA32("<<<<00110100xxxxxxxxxxxxxxxxxxxx", InstName.Movt, InstEmit32.Movt, OpCode32AluImm16.Create);
+ SetA32("<<<<1110xxx1xxxxxxxx111xxxx1xxxx", InstName.Mrc, InstEmit32.Mrc, OpCode32System.Create);
+ SetA32("<<<<11000101xxxxxxxx111xxxxxxxxx", InstName.Mrrc, InstEmit32.Mrrc, OpCode32System.Create);
+ SetA32("<<<<00010x001111xxxx000000000000", InstName.Mrs, InstEmit32.Mrs, OpCode32Mrs.Create);
+ SetA32("<<<<00010x10xxxx111100000000xxxx", InstName.Msr, InstEmit32.Msr, OpCode32MsrReg.Create);
+ SetA32("<<<<0000000xxxxx0000xxxx1001xxxx", InstName.Mul, InstEmit32.Mul, OpCode32AluMla.Create);
+ SetA32("<<<<0011111x0000xxxxxxxxxxxxxxxx", InstName.Mvn, InstEmit32.Mvn, OpCode32AluImm.Create);
+ SetA32("<<<<0001111x0000xxxxxxxxxxx0xxxx", InstName.Mvn, InstEmit32.Mvn, OpCode32AluRsImm.Create);
+ SetA32("<<<<0001111x0000xxxxxxxx0xx1xxxx", InstName.Mvn, InstEmit32.Mvn, OpCode32AluRsReg.Create);
+ SetA32("<<<<0011001000001111000000000000", InstName.Nop, InstEmit32.Nop, OpCode32.Create);
+ SetA32("<<<<0011100xxxxxxxxxxxxxxxxxxxxx", InstName.Orr, InstEmit32.Orr, OpCode32AluImm.Create);
+ SetA32("<<<<0001100xxxxxxxxxxxxxxxx0xxxx", InstName.Orr, InstEmit32.Orr, OpCode32AluRsImm.Create);
+ SetA32("<<<<0001100xxxxxxxxxxxxx0xx1xxxx", InstName.Orr, InstEmit32.Orr, OpCode32AluRsReg.Create);
+ SetA32("<<<<01101000xxxxxxxxxxxxxx01xxxx", InstName.Pkh, InstEmit32.Pkh, OpCode32AluRsImm.Create);
+ SetA32("11110101xx01xxxx1111xxxxxxxxxxxx", InstName.Pld, InstEmit32.Nop, OpCode32.Create);
+ SetA32("11110111xx01xxxx1111xxxxxxx0xxxx", InstName.Pld, InstEmit32.Nop, OpCode32.Create);
+ SetA32("<<<<011011111111xxxx11110011xxxx", InstName.Rbit, InstEmit32.Rbit, OpCode32AluReg.Create);
+ SetA32("<<<<011010111111xxxx11110011xxxx", InstName.Rev, InstEmit32.Rev, OpCode32AluReg.Create);
+ SetA32("<<<<011010111111xxxx11111011xxxx", InstName.Rev16, InstEmit32.Rev16, OpCode32AluReg.Create);
+ SetA32("<<<<011011111111xxxx11111011xxxx", InstName.Revsh, InstEmit32.Revsh, OpCode32AluReg.Create);
+ SetA32("<<<<0010011xxxxxxxxxxxxxxxxxxxxx", InstName.Rsb, InstEmit32.Rsb, OpCode32AluImm.Create);
+ SetA32("<<<<0000011xxxxxxxxxxxxxxxx0xxxx", InstName.Rsb, InstEmit32.Rsb, OpCode32AluRsImm.Create);
+ SetA32("<<<<0000011xxxxxxxxxxxxx0xx1xxxx", InstName.Rsb, InstEmit32.Rsb, OpCode32AluRsReg.Create);
+ SetA32("<<<<0010111xxxxxxxxxxxxxxxxxxxxx", InstName.Rsc, InstEmit32.Rsc, OpCode32AluImm.Create);
+ SetA32("<<<<0000111xxxxxxxxxxxxxxxx0xxxx", InstName.Rsc, InstEmit32.Rsc, OpCode32AluRsImm.Create);
+ SetA32("<<<<0000111xxxxxxxxxxxxx0xx1xxxx", InstName.Rsc, InstEmit32.Rsc, OpCode32AluRsReg.Create);
+ SetA32("<<<<01100001xxxxxxxx11111001xxxx", InstName.Sadd8, InstEmit32.Sadd8, OpCode32AluReg.Create);
+ SetA32("<<<<0010110xxxxxxxxxxxxxxxxxxxxx", InstName.Sbc, InstEmit32.Sbc, OpCode32AluImm.Create);
+ SetA32("<<<<0000110xxxxxxxxxxxxxxxx0xxxx", InstName.Sbc, InstEmit32.Sbc, OpCode32AluRsImm.Create);
+ SetA32("<<<<0000110xxxxxxxxxxxxx0xx1xxxx", InstName.Sbc, InstEmit32.Sbc, OpCode32AluRsReg.Create);
+ SetA32("<<<<0111101xxxxxxxxxxxxxx101xxxx", InstName.Sbfx, InstEmit32.Sbfx, OpCode32AluBf.Create);
+ SetA32("<<<<01110001xxxx1111xxxx0001xxxx", InstName.Sdiv, InstEmit32.Sdiv, OpCode32AluMla.Create);
+ SetA32("<<<<01101000xxxxxxxx11111011xxxx", InstName.Sel, InstEmit32.Sel, OpCode32AluReg.Create);
+ SetA32("<<<<0011001000001111000000000100", InstName.Sev, InstEmit32.Nop, OpCode32.Create);
+ SetA32("<<<<0011001000001111000000000101", InstName.Sevl, InstEmit32.Nop, OpCode32.Create);
+ SetA32("<<<<01100011xxxxxxxx11111001xxxx", InstName.Shadd8, InstEmit32.Shadd8, OpCode32AluReg.Create);
+ SetA32("<<<<01100011xxxxxxxx11111111xxxx", InstName.Shsub8, InstEmit32.Shsub8, OpCode32AluReg.Create);
+ SetA32("<<<<00010000xxxxxxxxxxxx1xx0xxxx", InstName.Smla__, InstEmit32.Smla__, OpCode32AluMla.Create);
+ SetA32("<<<<0000111xxxxxxxxxxxxx1001xxxx", InstName.Smlal, InstEmit32.Smlal, OpCode32AluUmull.Create);
+ SetA32("<<<<00010100xxxxxxxxxxxx1xx0xxxx", InstName.Smlal__, InstEmit32.Smlal__, OpCode32AluUmull.Create);
+ SetA32("<<<<00010010xxxxxxxxxxxx1x00xxxx", InstName.Smlaw_, InstEmit32.Smlaw_, OpCode32AluMla.Create);
+ SetA32("<<<<01110101xxxxxxxxxxxx00x1xxxx", InstName.Smmla, InstEmit32.Smmla, OpCode32AluMla.Create);
+ SetA32("<<<<01110101xxxxxxxxxxxx11x1xxxx", InstName.Smmls, InstEmit32.Smmls, OpCode32AluMla.Create);
+ SetA32("<<<<00010110xxxxxxxxxxxx1xx0xxxx", InstName.Smul__, InstEmit32.Smul__, OpCode32AluMla.Create);
+ SetA32("<<<<0000110xxxxxxxxxxxxx1001xxxx", InstName.Smull, InstEmit32.Smull, OpCode32AluUmull.Create);
+ SetA32("<<<<00010010xxxx0000xxxx1x10xxxx", InstName.Smulw_, InstEmit32.Smulw_, OpCode32AluMla.Create);
+ SetA32("<<<<0110101xxxxxxxxxxxxxxx01xxxx", InstName.Ssat, InstEmit32.Ssat, OpCode32Sat.Create);
+ SetA32("<<<<01101010xxxxxxxx11110011xxxx", InstName.Ssat16, InstEmit32.Ssat16, OpCode32Sat16.Create);
+ SetA32("<<<<01100001xxxxxxxx11111111xxxx", InstName.Ssub8, InstEmit32.Ssub8, OpCode32AluReg.Create);
+ SetA32("<<<<00011000xxxx111111001001xxxx", InstName.Stl, InstEmit32.Stl, OpCode32MemStEx.Create);
+ SetA32("<<<<00011100xxxx111111001001xxxx", InstName.Stlb, InstEmit32.Stlb, OpCode32MemStEx.Create);
+ SetA32("<<<<00011000xxxxxxxx11101001xxxx", InstName.Stlex, InstEmit32.Stlex, OpCode32MemStEx.Create);
+ SetA32("<<<<00011100xxxxxxxx11101001xxxx", InstName.Stlexb, InstEmit32.Stlexb, OpCode32MemStEx.Create);
+ SetA32("<<<<00011010xxxxxxxx11101001xxxx", InstName.Stlexd, InstEmit32.Stlexd, OpCode32MemStEx.Create);
+ SetA32("<<<<00011110xxxxxxxx11101001xxxx", InstName.Stlexh, InstEmit32.Stlexh, OpCode32MemStEx.Create);
+ SetA32("<<<<00011110xxxx111111001001xxxx", InstName.Stlh, InstEmit32.Stlh, OpCode32MemStEx.Create);
+ SetA32("<<<<100xx0x0xxxxxxxxxxxxxxxxxxxx", InstName.Stm, InstEmit32.Stm, OpCode32MemMult.Create);
+ SetA32("<<<<010xx0x0xxxxxxxxxxxxxxxxxxxx", InstName.Str, InstEmit32.Str, OpCode32MemImm.Create);
+ SetA32("<<<<011xx0x0xxxxxxxxxxxxxxx0xxxx", InstName.Str, InstEmit32.Str, OpCode32MemRsImm.Create);
+ SetA32("<<<<010xx1x0xxxxxxxxxxxxxxxxxxxx", InstName.Strb, InstEmit32.Strb, OpCode32MemImm.Create);
+ SetA32("<<<<011xx1x0xxxxxxxxxxxxxxx0xxxx", InstName.Strb, InstEmit32.Strb, OpCode32MemRsImm.Create);
+ SetA32("<<<<000xx1x0xxxxxxxxxxxx1111xxxx", InstName.Strd, InstEmit32.Strd, OpCode32MemImm8.Create);
+ SetA32("<<<<000xx0x0xxxxxxxx00001111xxxx", InstName.Strd, InstEmit32.Strd, OpCode32MemReg.Create);
+ SetA32("<<<<00011000xxxxxxxx11111001xxxx", InstName.Strex, InstEmit32.Strex, OpCode32MemStEx.Create);
+ SetA32("<<<<00011100xxxxxxxx11111001xxxx", InstName.Strexb, InstEmit32.Strexb, OpCode32MemStEx.Create);
+ SetA32("<<<<00011010xxxxxxxx11111001xxxx", InstName.Strexd, InstEmit32.Strexd, OpCode32MemStEx.Create);
+ SetA32("<<<<00011110xxxxxxxx11111001xxxx", InstName.Strexh, InstEmit32.Strexh, OpCode32MemStEx.Create);
+ SetA32("<<<<000xx1x0xxxxxxxxxxxx1011xxxx", InstName.Strh, InstEmit32.Strh, OpCode32MemImm8.Create);
+ SetA32("<<<<000xx0x0xxxxxxxx00001011xxxx", InstName.Strh, InstEmit32.Strh, OpCode32MemReg.Create);
+ SetA32("<<<<0010010xxxxxxxxxxxxxxxxxxxxx", InstName.Sub, InstEmit32.Sub, OpCode32AluImm.Create);
+ SetA32("<<<<0000010xxxxxxxxxxxxxxxx0xxxx", InstName.Sub, InstEmit32.Sub, OpCode32AluRsImm.Create);
+ SetA32("<<<<0000010xxxxxxxxxxxxx0xx1xxxx", InstName.Sub, InstEmit32.Sub, OpCode32AluRsReg.Create);
+ SetA32("<<<<1111xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Svc, InstEmit32.Svc, OpCode32Exception.Create);
+ SetA32("<<<<01101010xxxxxxxxxx000111xxxx", InstName.Sxtb, InstEmit32.Sxtb, OpCode32AluUx.Create);
+ SetA32("<<<<01101000xxxxxxxxxx000111xxxx", InstName.Sxtb16, InstEmit32.Sxtb16, OpCode32AluUx.Create);
+ SetA32("<<<<01101011xxxxxxxxxx000111xxxx", InstName.Sxth, InstEmit32.Sxth, OpCode32AluUx.Create);
+ SetA32("<<<<00110011xxxx0000xxxxxxxxxxxx", InstName.Teq, InstEmit32.Teq, OpCode32AluImm.Create);
+ SetA32("<<<<00010011xxxx0000xxxxxxx0xxxx", InstName.Teq, InstEmit32.Teq, OpCode32AluRsImm.Create);
+ SetA32("<<<<00010011xxxx0000xxxx0xx1xxxx", InstName.Teq, InstEmit32.Teq, OpCode32AluRsReg.Create);
+ SetA32("<<<<0111111111111101111011111110", InstName.Trap, InstEmit32.Trap, OpCode32Exception.Create);
+ SetA32("<<<<0011001000001111000000010010", InstName.Tsb, InstEmit32.Nop, OpCode32.Create); // Trace Synchronization Barrier (FEAT_TRF)
+ SetA32("<<<<00110001xxxx0000xxxxxxxxxxxx", InstName.Tst, InstEmit32.Tst, OpCode32AluImm.Create);
+ SetA32("<<<<00010001xxxx0000xxxxxxx0xxxx", InstName.Tst, InstEmit32.Tst, OpCode32AluRsImm.Create);
+ SetA32("<<<<00010001xxxx0000xxxx0xx1xxxx", InstName.Tst, InstEmit32.Tst, OpCode32AluRsReg.Create);
+ SetA32("<<<<01100101xxxxxxxx11111001xxxx", InstName.Uadd8, InstEmit32.Uadd8, OpCode32AluReg.Create);
+ SetA32("<<<<0111111xxxxxxxxxxxxxx101xxxx", InstName.Ubfx, InstEmit32.Ubfx, OpCode32AluBf.Create);
+ SetA32("<<<<01110011xxxx1111xxxx0001xxxx", InstName.Udiv, InstEmit32.Udiv, OpCode32AluMla.Create);
+ SetA32("<<<<01100111xxxxxxxx11111001xxxx", InstName.Uhadd8, InstEmit32.Uhadd8, OpCode32AluReg.Create);
+ SetA32("<<<<01100111xxxxxxxx11111111xxxx", InstName.Uhsub8, InstEmit32.Uhsub8, OpCode32AluReg.Create);
+ SetA32("<<<<00000100xxxxxxxxxxxx1001xxxx", InstName.Umaal, InstEmit32.Umaal, OpCode32AluUmull.Create);
+ SetA32("<<<<0000101xxxxxxxxxxxxx1001xxxx", InstName.Umlal, InstEmit32.Umlal, OpCode32AluUmull.Create);
+ SetA32("<<<<0000100xxxxxxxxxxxxx1001xxxx", InstName.Umull, InstEmit32.Umull, OpCode32AluUmull.Create);
+ SetA32("<<<<0110111xxxxxxxxxxxxxxx01xxxx", InstName.Usat, InstEmit32.Usat, OpCode32Sat.Create);
+ SetA32("<<<<01101110xxxxxxxx11110011xxxx", InstName.Usat16, InstEmit32.Usat16, OpCode32Sat16.Create);
+ SetA32("<<<<01100101xxxxxxxx11111111xxxx", InstName.Usub8, InstEmit32.Usub8, OpCode32AluReg.Create);
+ SetA32("<<<<01101110xxxxxxxxxx000111xxxx", InstName.Uxtb, InstEmit32.Uxtb, OpCode32AluUx.Create);
+ SetA32("<<<<01101100xxxxxxxxxx000111xxxx", InstName.Uxtb16, InstEmit32.Uxtb16, OpCode32AluUx.Create);
+ SetA32("<<<<01101111xxxxxxxxxx000111xxxx", InstName.Uxth, InstEmit32.Uxth, OpCode32AluUx.Create);
+ SetA32("<<<<0011001000001111000000000010", InstName.Wfe, InstEmit32.Nop, OpCode32.Create);
+ SetA32("<<<<0011001000001111000000000011", InstName.Wfi, InstEmit32.Nop, OpCode32.Create);
+ SetA32("<<<<0011001000001111000000000001", InstName.Yield, InstEmit32.Nop, OpCode32.Create);
+
+ // VFP
+ SetVfp("<<<<11101x110000xxxx101x11x0xxxx", InstName.Vabs, InstEmit32.Vabs_S, OpCode32SimdS.Create, OpCode32SimdS.CreateT32);
+ SetVfp("<<<<11100x11xxxxxxxx101xx0x0xxxx", InstName.Vadd, InstEmit32.Vadd_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32);
+ SetVfp("<<<<11101x11010xxxxx101x01x0xxxx", InstName.Vcmp, InstEmit32.Vcmp, OpCode32SimdS.Create, OpCode32SimdS.CreateT32);
+ SetVfp("<<<<11101x11010xxxxx101x11x0xxxx", InstName.Vcmpe, InstEmit32.Vcmpe, OpCode32SimdS.Create, OpCode32SimdS.CreateT32);
+ SetVfp("<<<<11101x110111xxxx101x11x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_FD, OpCode32SimdS.Create, OpCode32SimdS.CreateT32); // FP 32 and 64, scalar.
+ SetVfp("<<<<11101x11110xxxxx101x11x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_FI, OpCode32SimdCvtFI.Create, OpCode32SimdCvtFI.CreateT32); // FP32 to int.
+ SetVfp("<<<<11101x111000xxxx101xx1x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_FI, OpCode32SimdCvtFI.Create, OpCode32SimdCvtFI.CreateT32); // Int to FP32.
+ SetVfp("111111101x1111xxxxxx101xx1x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_RM, OpCode32SimdCvtFI.Create, OpCode32SimdCvtFI.CreateT32); // The many FP32 to int encodings (fp).
+ SetVfp("<<<<11101x11001xxxxx101xx1x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_TB, OpCode32SimdCvtTB.Create, OpCode32SimdCvtTB.CreateT32);
+ SetVfp("<<<<11101x00xxxxxxxx101xx0x0xxxx", InstName.Vdiv, InstEmit32.Vdiv_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32);
+ SetVfp("<<<<11101xx0xxxxxxxx1011x0x10000", InstName.Vdup, InstEmit32.Vdup, OpCode32SimdDupGP.Create, OpCode32SimdDupGP.CreateT32);
+ SetVfp("<<<<11101x10xxxxxxxx101xx0x0xxxx", InstName.Vfma, InstEmit32.Vfma_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32);
+ SetVfp("<<<<11101x10xxxxxxxx101xx1x0xxxx", InstName.Vfms, InstEmit32.Vfms_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32);
+ SetVfp("<<<<11101x01xxxxxxxx101xx1x0xxxx", InstName.Vfnma, InstEmit32.Vfnma_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32);
+ SetVfp("<<<<11101x01xxxxxxxx101xx0x0xxxx", InstName.Vfnms, InstEmit32.Vfnms_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32);
+ SetVfp("<<<<11001x01xxxxxxxx1011xxxxxxx0", InstName.Vldm, InstEmit32.Vldm, OpCode32SimdMemMult.Create, OpCode32SimdMemMult.CreateT32);
+ SetVfp("<<<<11001x11xxxxxxxx1011xxxxxxx0", InstName.Vldm, InstEmit32.Vldm, OpCode32SimdMemMult.Create, OpCode32SimdMemMult.CreateT32);
+ SetVfp("<<<<11010x11xxxxxxxx1011xxxxxxx0", InstName.Vldm, InstEmit32.Vldm, OpCode32SimdMemMult.Create, OpCode32SimdMemMult.CreateT32);
+ SetVfp("<<<<11001x01xxxxxxxx1010xxxxxxxx", InstName.Vldm, InstEmit32.Vldm, OpCode32SimdMemMult.Create, OpCode32SimdMemMult.CreateT32);
+ SetVfp("<<<<11001x11xxxxxxxx1010xxxxxxxx", InstName.Vldm, InstEmit32.Vldm, OpCode32SimdMemMult.Create, OpCode32SimdMemMult.CreateT32);
+ SetVfp("<<<<11010x11xxxxxxxx1010xxxxxxxx", InstName.Vldm, InstEmit32.Vldm, OpCode32SimdMemMult.Create, OpCode32SimdMemMult.CreateT32);
+ SetVfp("<<<<1101xx01xxxxxxxx101xxxxxxxxx", InstName.Vldr, InstEmit32.Vldr, OpCode32SimdMemImm.Create, OpCode32SimdMemImm.CreateT32);
+ SetVfp("111111101x00xxxxxxxx10>>x0x0xxxx", InstName.Vmaxnm, InstEmit32.Vmaxnm_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32);
+ SetVfp("111111101x00xxxxxxxx10>>x1x0xxxx", InstName.Vminnm, InstEmit32.Vminnm_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32);
+ SetVfp("<<<<11100x00xxxxxxxx101xx0x0xxxx", InstName.Vmla, InstEmit32.Vmla_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32);
+ SetVfp("<<<<11100x00xxxxxxxx101xx1x0xxxx", InstName.Vmls, InstEmit32.Vmls_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32);
+ SetVfp("<<<<11100xx0xxxxxxxx1011xxx10000", InstName.Vmov, InstEmit32.Vmov_G1, OpCode32SimdMovGpElem.Create, OpCode32SimdMovGpElem.CreateT32); // From gen purpose.
+ SetVfp("<<<<1110xxx1xxxxxxxx1011xxx10000", InstName.Vmov, InstEmit32.Vmov_G1, OpCode32SimdMovGpElem.Create, OpCode32SimdMovGpElem.CreateT32); // To gen purpose.
+ SetVfp("<<<<1100010xxxxxxxxx101000x1xxxx", InstName.Vmov, InstEmit32.Vmov_G2, OpCode32SimdMovGpDouble.Create, OpCode32SimdMovGpDouble.CreateT32); // To/from gen purpose x2 and single precision x2.
+ SetVfp("<<<<1100010xxxxxxxxx101100x1xxxx", InstName.Vmov, InstEmit32.Vmov_GD, OpCode32SimdMovGpDouble.Create, OpCode32SimdMovGpDouble.CreateT32); // To/from gen purpose x2 and double precision.
+ SetVfp("<<<<1110000xxxxxxxxx1010x0010000", InstName.Vmov, InstEmit32.Vmov_GS, OpCode32SimdMovGp.Create, OpCode32SimdMovGp.CreateT32); // To/from gen purpose and single precision.
+ SetVfp("<<<<11101x11xxxxxxxx101x0000xxxx", InstName.Vmov, InstEmit32.Vmov_I, OpCode32SimdImm44.Create, OpCode32SimdImm44.CreateT32); // Scalar f16/32/64 based on size 01 10 11.
+ SetVfp("<<<<11101x110000xxxx101x01x0xxxx", InstName.Vmov, InstEmit32.Vmov_S, OpCode32SimdS.Create, OpCode32SimdS.CreateT32);
+ SetVfp("<<<<11101111xxxxxxxx101000010000", InstName.Vmrs, InstEmit32.Vmrs, OpCode32SimdSpecial.Create, OpCode32SimdSpecial.CreateT32);
+ SetVfp("<<<<11101110xxxxxxxx101000010000", InstName.Vmsr, InstEmit32.Vmsr, OpCode32SimdSpecial.Create, OpCode32SimdSpecial.CreateT32);
+ SetVfp("<<<<11100x10xxxxxxxx101xx0x0xxxx", InstName.Vmul, InstEmit32.Vmul_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32);
+ SetVfp("<<<<11101x110001xxxx101x01x0xxxx", InstName.Vneg, InstEmit32.Vneg_S, OpCode32SimdS.Create, OpCode32SimdS.CreateT32);
+ SetVfp("<<<<11100x01xxxxxxxx101xx1x0xxxx", InstName.Vnmla, InstEmit32.Vnmla_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32);
+ SetVfp("<<<<11100x01xxxxxxxx101xx0x0xxxx", InstName.Vnmls, InstEmit32.Vnmls_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32);
+ SetVfp("<<<<11100x10xxxxxxxx101xx1x0xxxx", InstName.Vnmul, InstEmit32.Vnmul_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32);
+ SetVfp("111111101x1110xxxxxx101x01x0xxxx", InstName.Vrint, InstEmit32.Vrint_RM, OpCode32SimdS.Create, OpCode32SimdS.CreateT32);
+ SetVfp("<<<<11101x110110xxxx101x11x0xxxx", InstName.Vrint, InstEmit32.Vrint_Z, OpCode32SimdS.Create, OpCode32SimdS.CreateT32);
+ SetVfp("<<<<11101x110111xxxx101x01x0xxxx", InstName.Vrintx, InstEmit32.Vrintx_S, OpCode32SimdS.Create, OpCode32SimdS.CreateT32);
+ SetVfp("<<<<11101x110001xxxx101x11x0xxxx", InstName.Vsqrt, InstEmit32.Vsqrt_S, OpCode32SimdS.Create, OpCode32SimdS.CreateT32);
+ SetVfp("111111100xxxxxxxxxxx101xx0x0xxxx", InstName.Vsel, InstEmit32.Vsel, OpCode32SimdSel.Create, OpCode32SimdSel.CreateT32);
+ SetVfp("<<<<11001x00xxxxxxxx1011xxxxxxx0", InstName.Vstm, InstEmit32.Vstm, OpCode32SimdMemMult.Create, OpCode32SimdMemMult.CreateT32);
+ SetVfp("<<<<11001x10xxxxxxxx1011xxxxxxx0", InstName.Vstm, InstEmit32.Vstm, OpCode32SimdMemMult.Create, OpCode32SimdMemMult.CreateT32);
+ SetVfp("<<<<11010x10xxxxxxxx1011xxxxxxx0", InstName.Vstm, InstEmit32.Vstm, OpCode32SimdMemMult.Create, OpCode32SimdMemMult.CreateT32);
+ SetVfp("<<<<11001x00xxxxxxxx1010xxxxxxxx", InstName.Vstm, InstEmit32.Vstm, OpCode32SimdMemMult.Create, OpCode32SimdMemMult.CreateT32);
+ SetVfp("<<<<11001x10xxxxxxxx1010xxxxxxxx", InstName.Vstm, InstEmit32.Vstm, OpCode32SimdMemMult.Create, OpCode32SimdMemMult.CreateT32);
+ SetVfp("<<<<11010x10xxxxxxxx1010xxxxxxxx", InstName.Vstm, InstEmit32.Vstm, OpCode32SimdMemMult.Create, OpCode32SimdMemMult.CreateT32);
+ SetVfp("<<<<1101xx00xxxxxxxx101xxxxxxxxx", InstName.Vstr, InstEmit32.Vstr, OpCode32SimdMemImm.Create, OpCode32SimdMemImm.CreateT32);
+ SetVfp("<<<<11100x11xxxxxxxx101xx1x0xxxx", InstName.Vsub, InstEmit32.Vsub_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32);
+
+ // ASIMD
+ SetAsimd("111100111x110000xxx0001101x0xxx0", InstName.Aesd_V, InstEmit32.Aesd_V, OpCode32Simd.Create, OpCode32Simd.CreateT32);
+ SetAsimd("111100111x110000xxx0001100x0xxx0", InstName.Aese_V, InstEmit32.Aese_V, OpCode32Simd.Create, OpCode32Simd.CreateT32);
+ SetAsimd("111100111x110000xxx0001111x0xxx0", InstName.Aesimc_V, InstEmit32.Aesimc_V, OpCode32Simd.Create, OpCode32Simd.CreateT32);
+ SetAsimd("111100111x110000xxx0001110x0xxx0", InstName.Aesmc_V, InstEmit32.Aesmc_V, OpCode32Simd.Create, OpCode32Simd.CreateT32);
+ SetAsimd("111100110x00xxx0xxx01100x1x0xxx0", InstName.Sha256h_V, InstEmit32.Sha256h_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("111100110x01xxx0xxx01100x1x0xxx0", InstName.Sha256h2_V, InstEmit32.Sha256h2_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("111100111x111010xxx0001111x0xxx0", InstName.Sha256su0_V, InstEmit32.Sha256su0_V, OpCode32Simd.Create, OpCode32Simd.CreateT32);
+ SetAsimd("111100110x10xxx0xxx01100x1x0xxx0", InstName.Sha256su1_V, InstEmit32.Sha256su1_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("1111001x0x<<xxxxxxxx0111xxx0xxxx", InstName.Vabd, InstEmit32.Vabd_I, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("1111001x1x<<xxxxxxxx0111x0x0xxxx", InstName.Vabdl, InstEmit32.Vabdl_I, OpCode32SimdRegLong.Create, OpCode32SimdRegLong.CreateT32);
+ SetAsimd("111100111x11<<01xxxx00110xx0xxxx", InstName.Vabs, InstEmit32.Vabs_V, OpCode32SimdCmpZ.Create, OpCode32SimdCmpZ.CreateT32);
+ SetAsimd("111100111x111001xxxx01110xx0xxxx", InstName.Vabs, InstEmit32.Vabs_V, OpCode32SimdCmpZ.Create, OpCode32SimdCmpZ.CreateT32);
+ SetAsimd("111100100xxxxxxxxxxx1000xxx0xxxx", InstName.Vadd, InstEmit32.Vadd_I, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("111100100x00xxxxxxxx1101xxx0xxxx", InstName.Vadd, InstEmit32.Vadd_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("1111001x1x<<xxxxxxx00000x0x0xxxx", InstName.Vaddl, InstEmit32.Vaddl_I, OpCode32SimdRegLong.Create, OpCode32SimdRegLong.CreateT32);
+ SetAsimd("1111001x1x<<xxxxxxx00001x0x0xxxx", InstName.Vaddw, InstEmit32.Vaddw_I, OpCode32SimdRegWide.Create, OpCode32SimdRegWide.CreateT32);
+ SetAsimd("111100100x00xxxxxxxx0001xxx1xxxx", InstName.Vand, InstEmit32.Vand_I, OpCode32SimdBinary.Create, OpCode32SimdBinary.CreateT32);
+ SetAsimd("111100100x01xxxxxxxx0001xxx1xxxx", InstName.Vbic, InstEmit32.Vbic_I, OpCode32SimdBinary.Create, OpCode32SimdBinary.CreateT32);
+ SetAsimd("1111001x1x000xxxxxxx<<x10x11xxxx", InstName.Vbic, InstEmit32.Vbic_II, OpCode32SimdImm.Create, OpCode32SimdImm.CreateT32);
+ SetAsimd("111100110x11xxxxxxxx0001xxx1xxxx", InstName.Vbif, InstEmit32.Vbif, OpCode32SimdBinary.Create, OpCode32SimdBinary.CreateT32);
+ SetAsimd("111100110x10xxxxxxxx0001xxx1xxxx", InstName.Vbit, InstEmit32.Vbit, OpCode32SimdBinary.Create, OpCode32SimdBinary.CreateT32);
+ SetAsimd("111100110x01xxxxxxxx0001xxx1xxxx", InstName.Vbsl, InstEmit32.Vbsl, OpCode32SimdBinary.Create, OpCode32SimdBinary.CreateT32);
+ SetAsimd("111100110x<<xxxxxxxx1000xxx1xxxx", InstName.Vceq, InstEmit32.Vceq_I, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("111100100x00xxxxxxxx1110xxx0xxxx", InstName.Vceq, InstEmit32.Vceq_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("111100111x11xx01xxxx0x010xx0xxxx", InstName.Vceq, InstEmit32.Vceq_Z, OpCode32SimdCmpZ.Create, OpCode32SimdCmpZ.CreateT32);
+ SetAsimd("1111001x0x<<xxxxxxxx0011xxx1xxxx", InstName.Vcge, InstEmit32.Vcge_I, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("111100110x00xxxxxxxx1110xxx0xxxx", InstName.Vcge, InstEmit32.Vcge_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("111100111x11xx01xxxx0x001xx0xxxx", InstName.Vcge, InstEmit32.Vcge_Z, OpCode32SimdCmpZ.Create, OpCode32SimdCmpZ.CreateT32);
+ SetAsimd("1111001x0x<<xxxxxxxx0011xxx0xxxx", InstName.Vcgt, InstEmit32.Vcgt_I, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("111100110x10xxxxxxxx1110xxx0xxxx", InstName.Vcgt, InstEmit32.Vcgt_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("111100111x11xx01xxxx0x000xx0xxxx", InstName.Vcgt, InstEmit32.Vcgt_Z, OpCode32SimdCmpZ.Create, OpCode32SimdCmpZ.CreateT32);
+ SetAsimd("111100111x11xx01xxxx0x011xx0xxxx", InstName.Vcle, InstEmit32.Vcle_Z, OpCode32SimdCmpZ.Create, OpCode32SimdCmpZ.CreateT32);
+ SetAsimd("111100111x11xx01xxxx0x100xx0xxxx", InstName.Vclt, InstEmit32.Vclt_Z, OpCode32SimdCmpZ.Create, OpCode32SimdCmpZ.CreateT32);
+ SetAsimd("111100111x110000xxxx01010xx0xxxx", InstName.Vcnt, InstEmit32.Vcnt, OpCode32SimdCmpZ.Create, OpCode32SimdCmpZ.CreateT32);
+ SetAsimd("111100111x111011xxxx011xxxx0xxxx", InstName.Vcvt, InstEmit32.Vcvt_V, OpCode32SimdCmpZ.Create, OpCode32SimdCmpZ.CreateT32); // FP and integer, vector.
+ SetAsimd("111100111x11xxxxxxxx11000xx0xxxx", InstName.Vdup, InstEmit32.Vdup_1, OpCode32SimdDupElem.Create, OpCode32SimdDupElem.CreateT32);
+ SetAsimd("111100110x00xxxxxxxx0001xxx1xxxx", InstName.Veor, InstEmit32.Veor_I, OpCode32SimdBinary.Create, OpCode32SimdBinary.CreateT32);
+ SetAsimd("111100101x11xxxxxxxxxxxxxxx0xxxx", InstName.Vext, InstEmit32.Vext, OpCode32SimdExt.Create, OpCode32SimdExt.CreateT32);
+ SetAsimd("111100100x00xxxxxxxx1100xxx1xxxx", InstName.Vfma, InstEmit32.Vfma_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("111100100x10xxxxxxxx1100xxx1xxxx", InstName.Vfms, InstEmit32.Vfms_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("1111001x0x<<xxxxxxxx0000xxx0xxxx", InstName.Vhadd, InstEmit32.Vhadd, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("111101001x10xxxxxxxx0000xxx0xxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32);
+ SetAsimd("111101001x10xxxxxxxx0100xx0xxxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32);
+ SetAsimd("111101001x10xxxxxxxx1000x000xxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32);
+ SetAsimd("111101001x10xxxxxxxx1000x011xxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32);
+ SetAsimd("111101001x10xxxxxxxx110000x0xxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32);
+ SetAsimd("111101001x10xxxxxxxx110001xxxxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32);
+ SetAsimd("111101001x10xxxxxxxx110010xxxxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32);
+ SetAsimd("111101000x10xxxxxxxx0111xx0xxxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemPair.Create, OpCode32SimdMemPair.CreateT32); // Regs = 1.
+ SetAsimd("111101000x10xxxxxxxx1010xx<<xxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemPair.Create, OpCode32SimdMemPair.CreateT32); // Regs = 2.
+ SetAsimd("111101000x10xxxxxxxx0110xx0xxxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemPair.Create, OpCode32SimdMemPair.CreateT32); // Regs = 3.
+ SetAsimd("111101000x10xxxxxxxx0010xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemPair.Create, OpCode32SimdMemPair.CreateT32); // Regs = 4.
+ SetAsimd("111101001x10xxxxxxxx0x01xxxxxxxx", InstName.Vld2, InstEmit32.Vld2, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32);
+ SetAsimd("111101001x10xxxxxxxx1001xx0xxxxx", InstName.Vld2, InstEmit32.Vld2, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32);
+ SetAsimd("111101001x10xxxxxxxx1101<<xxxxxx", InstName.Vld2, InstEmit32.Vld2, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32);
+ SetAsimd("111101000x10xxxxxxxx100x<<0xxxxx", InstName.Vld2, InstEmit32.Vld2, OpCode32SimdMemPair.Create, OpCode32SimdMemPair.CreateT32); // Regs = 1, inc = 1/2 (itype).
+ SetAsimd("111101000x10xxxxxxxx100x<<10xxxx", InstName.Vld2, InstEmit32.Vld2, OpCode32SimdMemPair.Create, OpCode32SimdMemPair.CreateT32); // Regs = 1, inc = 1/2 (itype).
+ SetAsimd("111101000x10xxxxxxxx0011<<xxxxxx", InstName.Vld2, InstEmit32.Vld2, OpCode32SimdMemPair.Create, OpCode32SimdMemPair.CreateT32); // Regs = 2, inc = 2.
+ SetAsimd("111101001x10xxxxxxxx0x10xxx0xxxx", InstName.Vld3, InstEmit32.Vld3, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32);
+ SetAsimd("111101001x10xxxxxxxx1010xx00xxxx", InstName.Vld3, InstEmit32.Vld3, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32);
+ SetAsimd("111101001x10xxxxxxxx1110<<x0xxxx", InstName.Vld3, InstEmit32.Vld3, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32);
+ SetAsimd("111101000x10xxxxxxxx010x<<0xxxxx", InstName.Vld3, InstEmit32.Vld3, OpCode32SimdMemPair.Create, OpCode32SimdMemPair.CreateT32); // Inc = 1/2 (itype).
+ SetAsimd("111101001x10xxxxxxxx0x11xxxxxxxx", InstName.Vld4, InstEmit32.Vld4, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32);
+ SetAsimd("111101001x10xxxxxxxx1011xx<<xxxx", InstName.Vld4, InstEmit32.Vld4, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32);
+ SetAsimd("111101001x10xxxxxxxx1111<<x>xxxx", InstName.Vld4, InstEmit32.Vld4, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32);
+ SetAsimd("111101000x10xxxxxxxx000x<<xxxxxx", InstName.Vld4, InstEmit32.Vld4, OpCode32SimdMemPair.Create, OpCode32SimdMemPair.CreateT32); // Inc = 1/2 (itype).
+ SetAsimd("1111001x0x<<xxxxxxxx0110xxx0xxxx", InstName.Vmax, InstEmit32.Vmax_I, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("111100100x00xxxxxxxx1111xxx0xxxx", InstName.Vmax, InstEmit32.Vmax_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("1111001x0x<<xxxxxxxx0110xxx1xxxx", InstName.Vmin, InstEmit32.Vmin_I, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("111100100x10xxxxxxxx1111xxx0xxxx", InstName.Vmin, InstEmit32.Vmin_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("111100110x0xxxxxxxxx1111xxx1xxxx", InstName.Vmaxnm, InstEmit32.Vmaxnm_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("111100110x1xxxxxxxxx1111xxx1xxxx", InstName.Vminnm, InstEmit32.Vminnm_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("1111001x1x<<xxxxxxxx000xx1x0xxxx", InstName.Vmla, InstEmit32.Vmla_1, OpCode32SimdRegElem.Create, OpCode32SimdRegElem.CreateT32);
+ SetAsimd("111100100xxxxxxxxxxx1001xxx0xxxx", InstName.Vmla, InstEmit32.Vmla_I, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("111100100x00xxxxxxxx1101xxx1xxxx", InstName.Vmla, InstEmit32.Vmla_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("1111001x1x<<xxxxxxx01000x0x0xxxx", InstName.Vmlal, InstEmit32.Vmlal_I, OpCode32SimdRegLong.Create, OpCode32SimdRegLong.CreateT32);
+ SetAsimd("1111001x1x<<xxxxxxxx010xx1x0xxxx", InstName.Vmls, InstEmit32.Vmls_1, OpCode32SimdRegElem.Create, OpCode32SimdRegElem.CreateT32);
+ SetAsimd("111100100x10xxxxxxxx1101xxx1xxxx", InstName.Vmls, InstEmit32.Vmls_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("111100110xxxxxxxxxxx1001xxx0xxxx", InstName.Vmls, InstEmit32.Vmls_I, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("1111001x1x<<xxxxxxx01010x0x0xxxx", InstName.Vmlsl, InstEmit32.Vmlsl_I, OpCode32SimdRegLong.Create, OpCode32SimdRegLong.CreateT32);
+ SetAsimd("1111001x1x000xxxxxxx0xx00x01xxxx", InstName.Vmov, InstEmit32.Vmov_I, OpCode32SimdImm.Create, OpCode32SimdImm.CreateT32); // D/Q vector I32.
+ SetAsimd("1111001x1x000xxxxxxx10x00x01xxxx", InstName.Vmov, InstEmit32.Vmov_I, OpCode32SimdImm.Create, OpCode32SimdImm.CreateT32); // D/Q I16.
+ SetAsimd("1111001x1x000xxxxxxx11xx0x01xxxx", InstName.Vmov, InstEmit32.Vmov_I, OpCode32SimdImm.Create, OpCode32SimdImm.CreateT32); // D/Q (dt - from cmode).
+ SetAsimd("1111001x1x000xxxxxxx11100x11xxxx", InstName.Vmov, InstEmit32.Vmov_I, OpCode32SimdImm.Create, OpCode32SimdImm.CreateT32); // D/Q I64.
+ SetAsimd("1111001x1x001000xxx0101000x1xxxx", InstName.Vmovl, InstEmit32.Vmovl, OpCode32SimdLong.Create, OpCode32SimdLong.CreateT32);
+ SetAsimd("1111001x1x010000xxx0101000x1xxxx", InstName.Vmovl, InstEmit32.Vmovl, OpCode32SimdLong.Create, OpCode32SimdLong.CreateT32);
+ SetAsimd("1111001x1x100000xxx0101000x1xxxx", InstName.Vmovl, InstEmit32.Vmovl, OpCode32SimdLong.Create, OpCode32SimdLong.CreateT32);
+ SetAsimd("111100111x11<<10xxxx001000x0xxx0", InstName.Vmovn, InstEmit32.Vmovn, OpCode32SimdMovn.Create, OpCode32SimdMovn.CreateT32);
+ SetAsimd("1111001x1x<<xxxxxxxx100xx1x0xxxx", InstName.Vmul, InstEmit32.Vmul_1, OpCode32SimdRegElem.Create, OpCode32SimdRegElem.CreateT32);
+ SetAsimd("111100100x<<xxxxxxxx1001xxx1xxxx", InstName.Vmul, InstEmit32.Vmul_I, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("111100110x00xxxxxxxx1001xxx1xxxx", InstName.Vmul, InstEmit32.Vmul_I, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("111100110x00xxxxxxxx1101xxx1xxxx", InstName.Vmul, InstEmit32.Vmul_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("1111001x1x<<xxxxxxx01010x1x0xxxx", InstName.Vmull, InstEmit32.Vmull_1, OpCode32SimdRegElemLong.Create, OpCode32SimdRegElemLong.CreateT32);
+ SetAsimd("1111001x1x<<xxxxxxx01100x0x0xxxx", InstName.Vmull, InstEmit32.Vmull_I, OpCode32SimdRegLong.Create, OpCode32SimdRegLong.CreateT32);
+ SetAsimd("111100101xx0xxxxxxx01110x0x0xxxx", InstName.Vmull, InstEmit32.Vmull_I, OpCode32SimdRegLong.Create, OpCode32SimdRegLong.CreateT32); // P8/P64
+ SetAsimd("111100111x110000xxxx01011xx0xxxx", InstName.Vmvn, InstEmit32.Vmvn_I, OpCode32SimdBinary.Create, OpCode32SimdBinary.CreateT32);
+ SetAsimd("1111001x1x000xxxxxxx0xx00x11xxxx", InstName.Vmvn, InstEmit32.Vmvn_II, OpCode32SimdImm.Create, OpCode32SimdImm.CreateT32); // D/Q vector I32.
+ SetAsimd("1111001x1x000xxxxxxx10x00x11xxxx", InstName.Vmvn, InstEmit32.Vmvn_II, OpCode32SimdImm.Create, OpCode32SimdImm.CreateT32);
+ SetAsimd("1111001x1x000xxxxxxx110x0x11xxxx", InstName.Vmvn, InstEmit32.Vmvn_II, OpCode32SimdImm.Create, OpCode32SimdImm.CreateT32);
+ SetAsimd("111100111x11<<01xxxx00111xx0xxxx", InstName.Vneg, InstEmit32.Vneg_V, OpCode32SimdCmpZ.Create, OpCode32SimdCmpZ.CreateT32);
+ SetAsimd("111100111x111001xxxx01111xx0xxxx", InstName.Vneg, InstEmit32.Vneg_V, OpCode32SimdCmpZ.Create, OpCode32SimdCmpZ.CreateT32);
+ SetAsimd("111100100x11xxxxxxxx0001xxx1xxxx", InstName.Vorn, InstEmit32.Vorn_I, OpCode32SimdBinary.Create, OpCode32SimdBinary.CreateT32);
+ SetAsimd("111100100x10xxxxxxxx0001xxx1xxxx", InstName.Vorr, InstEmit32.Vorr_I, OpCode32SimdBinary.Create, OpCode32SimdBinary.CreateT32);
+ SetAsimd("1111001x1x000xxxxxxx<<x10x01xxxx", InstName.Vorr, InstEmit32.Vorr_II, OpCode32SimdImm.Create, OpCode32SimdImm.CreateT32);
+ SetAsimd("111100100x<<xxxxxxxx1011x0x1xxxx", InstName.Vpadd, InstEmit32.Vpadd_I, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("111100110x00xxxxxxxx1101x0x0xxxx", InstName.Vpadd, InstEmit32.Vpadd_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("111100111x11<<00xxxx0010xxx0xxxx", InstName.Vpaddl, InstEmit32.Vpaddl, OpCode32SimdCmpZ.Create, OpCode32SimdCmpZ.CreateT32);
+ SetAsimd("1111001x0x<<xxxxxxxx1010x0x0xxxx", InstName.Vpmax, InstEmit32.Vpmax_I, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("111100110x00xxxxxxxx1111x0x0xxxx", InstName.Vpmax, InstEmit32.Vpmax_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("1111001x0x<<xxxxxxxx1010x0x1xxxx", InstName.Vpmin, InstEmit32.Vpmin_I, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("111100110x10xxxxxxxx1111x0x0xxxx", InstName.Vpmin, InstEmit32.Vpmin_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("1111001x0xxxxxxxxxxx0000xxx1xxxx", InstName.Vqadd, InstEmit32.Vqadd, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("111100100x01xxxxxxxx1011xxx0xxxx", InstName.Vqdmulh, InstEmit32.Vqdmulh, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("111100100x10xxxxxxxx1011xxx0xxxx", InstName.Vqdmulh, InstEmit32.Vqdmulh, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("111100111x11<<10xxxx00101xx0xxx0", InstName.Vqmovn, InstEmit32.Vqmovn, OpCode32SimdMovn.Create, OpCode32SimdMovn.CreateT32);
+ SetAsimd("111100111x11<<10xxxx001001x0xxx0", InstName.Vqmovun, InstEmit32.Vqmovun, OpCode32SimdMovn.Create, OpCode32SimdMovn.CreateT32);
+ SetAsimd("1111001x1x>>>xxxxxxx100101x1xxx0", InstName.Vqrshrn, InstEmit32.Vqrshrn, OpCode32SimdShImmNarrow.Create, OpCode32SimdShImmNarrow.CreateT32);
+ SetAsimd("111100111x>>>xxxxxxx100001x1xxx0", InstName.Vqrshrun, InstEmit32.Vqrshrun, OpCode32SimdShImmNarrow.Create, OpCode32SimdShImmNarrow.CreateT32);
+ SetAsimd("1111001x1x>>>xxxxxxx100100x1xxx0", InstName.Vqshrn, InstEmit32.Vqshrn, OpCode32SimdShImmNarrow.Create, OpCode32SimdShImmNarrow.CreateT32);
+ SetAsimd("111100111x>>>xxxxxxx100000x1xxx0", InstName.Vqshrun, InstEmit32.Vqshrun, OpCode32SimdShImmNarrow.Create, OpCode32SimdShImmNarrow.CreateT32);
+ SetAsimd("1111001x0xxxxxxxxxxx0010xxx1xxxx", InstName.Vqsub, InstEmit32.Vqsub, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("111100111x111011xxxx010x0xx0xxxx", InstName.Vrecpe, InstEmit32.Vrecpe, OpCode32SimdSqrte.Create, OpCode32SimdSqrte.CreateT32);
+ SetAsimd("111100100x00xxxxxxxx1111xxx1xxxx", InstName.Vrecps, InstEmit32.Vrecps, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("111100111x11xx00xxxx000<<xx0xxxx", InstName.Vrev, InstEmit32.Vrev, OpCode32SimdRev.Create, OpCode32SimdRev.CreateT32);
+ SetAsimd("1111001x0x<<xxxxxxxx0001xxx0xxxx", InstName.Vrhadd, InstEmit32.Vrhadd, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("111100111x111010xxxx01010xx0xxxx", InstName.Vrinta, InstEmit32.Vrinta_V, OpCode32SimdCmpZ.Create, OpCode32SimdCmpZ.CreateT32);
+ SetAsimd("111100111x111010xxxx01101xx0xxxx", InstName.Vrintm, InstEmit32.Vrintm_V, OpCode32SimdCmpZ.Create, OpCode32SimdCmpZ.CreateT32);
+ SetAsimd("111100111x111010xxxx01000xx0xxxx", InstName.Vrintn, InstEmit32.Vrintn_V, OpCode32SimdCmpZ.Create, OpCode32SimdCmpZ.CreateT32);
+ SetAsimd("111100111x111010xxxx01111xx0xxxx", InstName.Vrintp, InstEmit32.Vrintp_V, OpCode32SimdCmpZ.Create, OpCode32SimdCmpZ.CreateT32);
+ SetAsimd("1111001x1x>>>xxxxxxx0010>xx1xxxx", InstName.Vrshr, InstEmit32.Vrshr, OpCode32SimdShImm.Create, OpCode32SimdShImm.CreateT32);
+ SetAsimd("111100101x>>>xxxxxxx100001x1xxx0", InstName.Vrshrn, InstEmit32.Vrshrn, OpCode32SimdShImmNarrow.Create, OpCode32SimdShImmNarrow.CreateT32);
+ SetAsimd("111100111x111011xxxx010x1xx0xxxx", InstName.Vrsqrte, InstEmit32.Vrsqrte, OpCode32SimdSqrte.Create, OpCode32SimdSqrte.CreateT32);
+ SetAsimd("111100100x10xxxxxxxx1111xxx1xxxx", InstName.Vrsqrts, InstEmit32.Vrsqrts, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("1111001x1x>>>xxxxxxx0011>xx1xxxx", InstName.Vrsra, InstEmit32.Vrsra, OpCode32SimdShImm.Create, OpCode32SimdShImm.CreateT32);
+ SetAsimd("111100101x>>>xxxxxxx0101>xx1xxxx", InstName.Vshl, InstEmit32.Vshl, OpCode32SimdShImm.Create, OpCode32SimdShImm.CreateT32);
+ SetAsimd("1111001x0xxxxxxxxxxx0100xxx0xxxx", InstName.Vshl, InstEmit32.Vshl_I, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("1111001x1x>>>xxxxxxx101000x1xxxx", InstName.Vshll, InstEmit32.Vshll, OpCode32SimdShImmLong.Create, OpCode32SimdShImmLong.CreateT32); // A1 encoding.
+ SetAsimd("1111001x1x>>>xxxxxxx0000>xx1xxxx", InstName.Vshr, InstEmit32.Vshr, OpCode32SimdShImm.Create, OpCode32SimdShImm.CreateT32);
+ SetAsimd("111100101x>>>xxxxxxx100000x1xxx0", InstName.Vshrn, InstEmit32.Vshrn, OpCode32SimdShImmNarrow.Create, OpCode32SimdShImmNarrow.CreateT32);
+ SetAsimd("1111001x1x>>>xxxxxxx0001>xx1xxxx", InstName.Vsra, InstEmit32.Vsra, OpCode32SimdShImm.Create, OpCode32SimdShImm.CreateT32);
+ SetAsimd("111101001x00xxxxxxxx0000xxx0xxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32);
+ SetAsimd("111101001x00xxxxxxxx0100xx0xxxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32);
+ SetAsimd("111101001x00xxxxxxxx1000x000xxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32);
+ SetAsimd("111101001x00xxxxxxxx1000x011xxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32);
+ SetAsimd("111101000x00xxxxxxxx0111xx0xxxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemPair.Create, OpCode32SimdMemPair.CreateT32); // Regs = 1.
+ SetAsimd("111101000x00xxxxxxxx1010xx<<xxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemPair.Create, OpCode32SimdMemPair.CreateT32); // Regs = 2.
+ SetAsimd("111101000x00xxxxxxxx0110xx0xxxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemPair.Create, OpCode32SimdMemPair.CreateT32); // Regs = 3.
+ SetAsimd("111101000x00xxxxxxxx0010xxxxxxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemPair.Create, OpCode32SimdMemPair.CreateT32); // Regs = 4.
+ SetAsimd("111101001x00xxxxxxxx0x01xxxxxxxx", InstName.Vst2, InstEmit32.Vst2, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32);
+ SetAsimd("111101001x00xxxxxxxx1001xx0xxxxx", InstName.Vst2, InstEmit32.Vst2, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32);
+ SetAsimd("111101000x00xxxxxxxx100x<<0xxxxx", InstName.Vst2, InstEmit32.Vst2, OpCode32SimdMemPair.Create, OpCode32SimdMemPair.CreateT32); // Regs = 1, inc = 1/2 (itype).
+ SetAsimd("111101000x00xxxxxxxx100x<<10xxxx", InstName.Vst2, InstEmit32.Vst2, OpCode32SimdMemPair.Create, OpCode32SimdMemPair.CreateT32); // Regs = 1, inc = 1/2 (itype).
+ SetAsimd("111101000x00xxxxxxxx0011<<xxxxxx", InstName.Vst2, InstEmit32.Vst2, OpCode32SimdMemPair.Create, OpCode32SimdMemPair.CreateT32); // Regs = 2, inc = 2.
+ SetAsimd("111101001x00xxxxxxxx0x10xxx0xxxx", InstName.Vst3, InstEmit32.Vst3, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32);
+ SetAsimd("111101001x00xxxxxxxx1010xx00xxxx", InstName.Vst3, InstEmit32.Vst3, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32);
+ SetAsimd("111101000x00xxxxxxxx010x<<0xxxxx", InstName.Vst3, InstEmit32.Vst3, OpCode32SimdMemPair.Create, OpCode32SimdMemPair.CreateT32); // Inc = 1/2 (itype).
+ SetAsimd("111101001x00xxxxxxxx0x11xxxxxxxx", InstName.Vst4, InstEmit32.Vst4, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32);
+ SetAsimd("111101001x00xxxxxxxx1011xx<<xxxx", InstName.Vst4, InstEmit32.Vst4, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32);
+ SetAsimd("111101000x00xxxxxxxx000x<<xxxxxx", InstName.Vst4, InstEmit32.Vst4, OpCode32SimdMemPair.Create, OpCode32SimdMemPair.CreateT32); // Inc = 1/2 (itype).
+ SetAsimd("111100110xxxxxxxxxxx1000xxx0xxxx", InstName.Vsub, InstEmit32.Vsub_I, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("111100100x10xxxxxxxx1101xxx0xxxx", InstName.Vsub, InstEmit32.Vsub_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("1111001x1x<<xxxxxxx00010x0x0xxxx", InstName.Vsubl, InstEmit32.Vsubl_I, OpCode32SimdRegLong.Create, OpCode32SimdRegLong.CreateT32);
+ SetAsimd("1111001x1x<<xxxxxxx00011x0x0xxxx", InstName.Vsubw, InstEmit32.Vsubw_I, OpCode32SimdRegWide.Create, OpCode32SimdRegWide.CreateT32);
+ SetAsimd("111100111x11xxxxxxxx10xxxxx0xxxx", InstName.Vtbl, InstEmit32.Vtbl, OpCode32SimdTbl.Create, OpCode32SimdTbl.CreateT32);
+ SetAsimd("111100111x11<<10xxxx00001xx0xxxx", InstName.Vtrn, InstEmit32.Vtrn, OpCode32SimdCmpZ.Create, OpCode32SimdCmpZ.CreateT32);
+ SetAsimd("111100100x<<xxxxxxxx1000xxx1xxxx", InstName.Vtst, InstEmit32.Vtst, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("111100111x11<<10xxxx00010xx0xxxx", InstName.Vuzp, InstEmit32.Vuzp, OpCode32SimdCmpZ.Create, OpCode32SimdCmpZ.CreateT32);
+ SetAsimd("111100111x11<<10xxxx00011xx0xxxx", InstName.Vzip, InstEmit32.Vzip, OpCode32SimdCmpZ.Create, OpCode32SimdCmpZ.CreateT32);
+#endregion
+
+#region "OpCode Table (AArch32, T16)"
+ SetT16("000<<xxxxxxxxxxx", InstName.Mov, InstEmit32.Mov, OpCodeT16ShiftImm.Create);
+ SetT16("0001100xxxxxxxxx", InstName.Add, InstEmit32.Add, OpCodeT16AddSubReg.Create);
+ SetT16("0001101xxxxxxxxx", InstName.Sub, InstEmit32.Sub, OpCodeT16AddSubReg.Create);
+ SetT16("0001110xxxxxxxxx", InstName.Add, InstEmit32.Add, OpCodeT16AddSubImm3.Create);
+ SetT16("0001111xxxxxxxxx", InstName.Sub, InstEmit32.Sub, OpCodeT16AddSubImm3.Create);
+ SetT16("00100xxxxxxxxxxx", InstName.Mov, InstEmit32.Mov, OpCodeT16AluImm8.Create);
+ SetT16("00101xxxxxxxxxxx", InstName.Cmp, InstEmit32.Cmp, OpCodeT16AluImm8.Create);
+ SetT16("00110xxxxxxxxxxx", InstName.Add, InstEmit32.Add, OpCodeT16AluImm8.Create);
+ SetT16("00111xxxxxxxxxxx", InstName.Sub, InstEmit32.Sub, OpCodeT16AluImm8.Create);
+ SetT16("0100000000xxxxxx", InstName.And, InstEmit32.And, OpCodeT16AluRegLow.Create);
+ SetT16("0100000001xxxxxx", InstName.Eor, InstEmit32.Eor, OpCodeT16AluRegLow.Create);
+ SetT16("0100000010xxxxxx", InstName.Mov, InstEmit32.Mov, OpCodeT16ShiftReg.Create);
+ SetT16("0100000011xxxxxx", InstName.Mov, InstEmit32.Mov, OpCodeT16ShiftReg.Create);
+ SetT16("0100000100xxxxxx", InstName.Mov, InstEmit32.Mov, OpCodeT16ShiftReg.Create);
+ SetT16("0100000101xxxxxx", InstName.Adc, InstEmit32.Adc, OpCodeT16AluRegLow.Create);
+ SetT16("0100000110xxxxxx", InstName.Sbc, InstEmit32.Sbc, OpCodeT16AluRegLow.Create);
+ SetT16("0100000111xxxxxx", InstName.Mov, InstEmit32.Mov, OpCodeT16ShiftReg.Create);
+ SetT16("0100001000xxxxxx", InstName.Tst, InstEmit32.Tst, OpCodeT16AluRegLow.Create);
+ SetT16("0100001001xxxxxx", InstName.Rsb, InstEmit32.Rsb, OpCodeT16AluImmZero.Create);
+ SetT16("0100001010xxxxxx", InstName.Cmp, InstEmit32.Cmp, OpCodeT16AluRegLow.Create);
+ SetT16("0100001011xxxxxx", InstName.Cmn, InstEmit32.Cmn, OpCodeT16AluRegLow.Create);
+ SetT16("0100001100xxxxxx", InstName.Orr, InstEmit32.Orr, OpCodeT16AluRegLow.Create);
+ SetT16("0100001101xxxxxx", InstName.Mul, InstEmit32.Mul, OpCodeT16AluRegLow.Create);
+ SetT16("0100001110xxxxxx", InstName.Bic, InstEmit32.Bic, OpCodeT16AluRegLow.Create);
+ SetT16("0100001111xxxxxx", InstName.Mvn, InstEmit32.Mvn, OpCodeT16AluRegLow.Create);
+ SetT16("01000100xxxxxxxx", InstName.Add, InstEmit32.Add, OpCodeT16AluRegHigh.Create);
+ SetT16("01000101xxxxxxxx", InstName.Cmp, InstEmit32.Cmp, OpCodeT16AluRegHigh.Create);
+ SetT16("01000110xxxxxxxx", InstName.Mov, InstEmit32.Mov, OpCodeT16AluRegHigh.Create);
+ SetT16("010001110xxxx000", InstName.Bx, InstEmit32.Bx, OpCodeT16BReg.Create);
+ SetT16("010001111xxxx000", InstName.Blx, InstEmit32.Blxr, OpCodeT16BReg.Create);
+ SetT16("01001xxxxxxxxxxx", InstName.Ldr, InstEmit32.Ldr, OpCodeT16MemLit.Create);
+ SetT16("0101000xxxxxxxxx", InstName.Str, InstEmit32.Str, OpCodeT16MemReg.Create);
+ SetT16("0101001xxxxxxxxx", InstName.Strh, InstEmit32.Strh, OpCodeT16MemReg.Create);
+ SetT16("0101010xxxxxxxxx", InstName.Strb, InstEmit32.Strb, OpCodeT16MemReg.Create);
+ SetT16("0101011xxxxxxxxx", InstName.Ldrsb, InstEmit32.Ldrsb, OpCodeT16MemReg.Create);
+ SetT16("0101100xxxxxxxxx", InstName.Ldr, InstEmit32.Ldr, OpCodeT16MemReg.Create);
+ SetT16("0101101xxxxxxxxx", InstName.Ldrh, InstEmit32.Ldrh, OpCodeT16MemReg.Create);
+ SetT16("0101110xxxxxxxxx", InstName.Ldrb, InstEmit32.Ldrb, OpCodeT16MemReg.Create);
+ SetT16("0101111xxxxxxxxx", InstName.Ldrsh, InstEmit32.Ldrsh, OpCodeT16MemReg.Create);
+ SetT16("01100xxxxxxxxxxx", InstName.Str, InstEmit32.Str, OpCodeT16MemImm5.Create);
+ SetT16("01101xxxxxxxxxxx", InstName.Ldr, InstEmit32.Ldr, OpCodeT16MemImm5.Create);
+ SetT16("01110xxxxxxxxxxx", InstName.Strb, InstEmit32.Strb, OpCodeT16MemImm5.Create);
+ SetT16("01111xxxxxxxxxxx", InstName.Ldrb, InstEmit32.Ldrb, OpCodeT16MemImm5.Create);
+ SetT16("10000xxxxxxxxxxx", InstName.Strh, InstEmit32.Strh, OpCodeT16MemImm5.Create);
+ SetT16("10001xxxxxxxxxxx", InstName.Ldrh, InstEmit32.Ldrh, OpCodeT16MemImm5.Create);
+ SetT16("10010xxxxxxxxxxx", InstName.Str, InstEmit32.Str, OpCodeT16MemSp.Create);
+ SetT16("10011xxxxxxxxxxx", InstName.Ldr, InstEmit32.Ldr, OpCodeT16MemSp.Create);
+ SetT16("10100xxxxxxxxxxx", InstName.Adr, InstEmit32.Adr, OpCodeT16Adr.Create);
+ SetT16("10101xxxxxxxxxxx", InstName.Add, InstEmit32.Add, OpCodeT16SpRel.Create);
+ SetT16("101100000xxxxxxx", InstName.Add, InstEmit32.Add, OpCodeT16AddSubSp.Create);
+ SetT16("101100001xxxxxxx", InstName.Sub, InstEmit32.Sub, OpCodeT16AddSubSp.Create);
+ SetT16("1011001000xxxxxx", InstName.Sxth, InstEmit32.Sxth, OpCodeT16AluUx.Create);
+ SetT16("1011001001xxxxxx", InstName.Sxtb, InstEmit32.Sxtb, OpCodeT16AluUx.Create);
+ SetT16("1011001010xxxxxx", InstName.Uxth, InstEmit32.Uxth, OpCodeT16AluUx.Create);
+ SetT16("1011001011xxxxxx", InstName.Uxtb, InstEmit32.Uxtb, OpCodeT16AluUx.Create);
+ SetT16("101100x1xxxxxxxx", InstName.Cbz, InstEmit32.Cbz, OpCodeT16BImmCmp.Create);
+ SetT16("1011010xxxxxxxxx", InstName.Push, InstEmit32.Stm, OpCodeT16MemStack.Create);
+ SetT16("1011101000xxxxxx", InstName.Rev, InstEmit32.Rev, OpCodeT16AluRegLow.Create);
+ SetT16("1011101001xxxxxx", InstName.Rev16, InstEmit32.Rev16, OpCodeT16AluRegLow.Create);
+ SetT16("1011101011xxxxxx", InstName.Revsh, InstEmit32.Revsh, OpCodeT16AluRegLow.Create);
+ SetT16("101110x1xxxxxxxx", InstName.Cbnz, InstEmit32.Cbnz, OpCodeT16BImmCmp.Create);
+ SetT16("1011110xxxxxxxxx", InstName.Pop, InstEmit32.Ldm, OpCodeT16MemStack.Create);
+ SetT16("1011111100000000", InstName.Nop, InstEmit32.Nop, OpCodeT16.Create);
+ SetT16("1011111100010000", InstName.Yield, InstEmit32.Nop, OpCodeT16.Create);
+ SetT16("1011111100100000", InstName.Wfe, InstEmit32.Nop, OpCodeT16.Create);
+ SetT16("1011111100110000", InstName.Wfi, InstEmit32.Nop, OpCodeT16.Create);
+ SetT16("1011111101000000", InstName.Sev, InstEmit32.Nop, OpCodeT16.Create);
+ SetT16("1011111101010000", InstName.Sevl, InstEmit32.Nop, OpCodeT16.Create);
+ SetT16("10111111011x0000", InstName.Hint, InstEmit32.Nop, OpCodeT16.Create); // Hint instruction
+ SetT16("101111111xxx0000", InstName.Hint, InstEmit32.Nop, OpCodeT16.Create); // Hint instruction
+ SetT16("10111111xxxx>>>>", InstName.It, InstEmit32.It, OpCodeT16IfThen.Create);
+ SetT16("11000xxxxxxxxxxx", InstName.Stm, InstEmit32.Stm, OpCodeT16MemMult.Create);
+ SetT16("11001xxxxxxxxxxx", InstName.Ldm, InstEmit32.Ldm, OpCodeT16MemMult.Create);
+ SetT16("1101<<<xxxxxxxxx", InstName.B, InstEmit32.B, OpCodeT16BImm8.Create);
+ SetT16("11011111xxxxxxxx", InstName.Svc, InstEmit32.Svc, OpCodeT16Exception.Create);
+ SetT16("11100xxxxxxxxxxx", InstName.B, InstEmit32.B, OpCodeT16BImm11.Create);
+#endregion
+
+#region "OpCode Table (AArch32, T32)"
+ // Base
+ SetT32("11101011010xxxxx0xxxxxxxxxxxxxxx", InstName.Adc, InstEmit32.Adc, OpCodeT32AluRsImm.Create);
+ SetT32("11110x01010xxxxx0xxxxxxxxxxxxxxx", InstName.Adc, InstEmit32.Adc, OpCodeT32AluImm.Create);
+ SetT32("11101011000<xxxx0xxx<<<<xxxxxxxx", InstName.Add, InstEmit32.Add, OpCodeT32AluRsImm.Create);
+ SetT32("11110x01000<xxxx0xxx<<<<xxxxxxxx", InstName.Add, InstEmit32.Add, OpCodeT32AluImm.Create);
+ SetT32("11110x100000xxxx0xxxxxxxxxxxxxxx", InstName.Add, InstEmit32.Add, OpCodeT32AluImm12.Create);
+ SetT32("11101010000<xxxx0xxx<<<<xxxxxxxx", InstName.And, InstEmit32.And, OpCodeT32AluRsImm.Create);
+ SetT32("11110x00000<xxxx0xxx<<<<xxxxxxxx", InstName.And, InstEmit32.And, OpCodeT32AluImm.Create);
+ SetT32("11110x<<<xxxxxxx10x0xxxxxxxxxxxx", InstName.B, InstEmit32.B, OpCodeT32BImm20.Create);
+ SetT32("11110xxxxxxxxxxx10x1xxxxxxxxxxxx", InstName.B, InstEmit32.B, OpCodeT32BImm24.Create);
+ SetT32("11110011011011110xxxxxxxxx0xxxxx", InstName.Bfc, InstEmit32.Bfc, OpCodeT32AluBf.Create);
+ SetT32("111100110110<<<<0xxxxxxxxx0xxxxx", InstName.Bfi, InstEmit32.Bfi, OpCodeT32AluBf.Create);
+ SetT32("11101010001xxxxx0xxxxxxxxxxxxxxx", InstName.Bic, InstEmit32.Bic, OpCodeT32AluRsImm.Create);
+ SetT32("11110x00001xxxxx0xxxxxxxxxxxxxxx", InstName.Bic, InstEmit32.Bic, OpCodeT32AluImm.Create);
+ SetT32("11110xxxxxxxxxxx11x1xxxxxxxxxxxx", InstName.Bl, InstEmit32.Bl, OpCodeT32BImm24.Create);
+ SetT32("11110xxxxxxxxxxx11x0xxxxxxxxxxx0", InstName.Blx, InstEmit32.Blx, OpCodeT32BImm24.Create);
+ SetT32("111110101011xxxx1111xxxx1000xxxx", InstName.Clz, InstEmit32.Clz, OpCodeT32AluReg.Create);
+ SetT32("111010110001xxxx0xxx1111xxxxxxxx", InstName.Cmn, InstEmit32.Cmn, OpCodeT32AluRsImm.Create);
+ SetT32("11110x010001xxxx0xxx1111xxxxxxxx", InstName.Cmn, InstEmit32.Cmn, OpCodeT32AluImm.Create);
+ SetT32("111010111011xxxx0xxx1111xxxxxxxx", InstName.Cmp, InstEmit32.Cmp, OpCodeT32AluRsImm.Create);
+ SetT32("11110x011011xxxx0xxx1111xxxxxxxx", InstName.Cmp, InstEmit32.Cmp, OpCodeT32AluImm.Create);
+ SetT32("11110011101011111000000000010100", InstName.Csdb, InstEmit32.Csdb, OpCodeT32.Create);
+ SetT32("11101010100<xxxx0xxx<<<<xxxxxxxx", InstName.Eor, InstEmit32.Eor, OpCodeT32AluRsImm.Create);
+ SetT32("11110x00100<xxxx0xxx<<<<xxxxxxxx", InstName.Eor, InstEmit32.Eor, OpCodeT32AluImm.Create);
+ SetT32("11110011101011111000000000010000", InstName.Esb, InstEmit32.Nop, OpCodeT32.Create); // Error Synchronization Barrier (FEAT_RAS)
+ SetT32("1111001110101111100000000000011x", InstName.Hint, InstEmit32.Nop, OpCodeT32.Create); // Reserved Hint
+ SetT32("11110011101011111000000000001xxx", InstName.Hint, InstEmit32.Nop, OpCodeT32.Create); // Reserved Hint
+ SetT32("11110011101011111000000000010001", InstName.Hint, InstEmit32.Nop, OpCodeT32.Create); // Reserved Hint
+ SetT32("11110011101011111000000000010011", InstName.Hint, InstEmit32.Nop, OpCodeT32.Create); // Reserved Hint
+ SetT32("11110011101011111000000000010101", InstName.Hint, InstEmit32.Nop, OpCodeT32.Create); // Reserved Hint
+ SetT32("1111001110101111100000000001011x", InstName.Hint, InstEmit32.Nop, OpCodeT32.Create); // Reserved Hint
+ SetT32("11110011101011111000000000011xxx", InstName.Hint, InstEmit32.Nop, OpCodeT32.Create); // Reserved Hint
+ SetT32("111100111010111110000000001xxxxx", InstName.Hint, InstEmit32.Nop, OpCodeT32.Create); // Reserved Hint
+ SetT32("11110011101011111000000001xxxxxx", InstName.Hint, InstEmit32.Nop, OpCodeT32.Create); // Reserved Hint
+ SetT32("1111001110101111100000001xxxxxxx", InstName.Hint, InstEmit32.Nop, OpCodeT32.Create); // Reserved Hint
+ SetT32("111010001101xxxxxxxx111110101111", InstName.Lda, InstEmit32.Lda, OpCodeT32MemLdEx.Create);
+ SetT32("111010001101xxxxxxxx111110001111", InstName.Ldab, InstEmit32.Ldab, OpCodeT32MemLdEx.Create);
+ SetT32("111010001101xxxxxxxx111111101111", InstName.Ldaex, InstEmit32.Ldaex, OpCodeT32MemLdEx.Create);
+ SetT32("111010001101xxxxxxxx111111001111", InstName.Ldaexb, InstEmit32.Ldaexb, OpCodeT32MemLdEx.Create);
+ SetT32("111010001101xxxxxxxxxxxx11111111", InstName.Ldaexd, InstEmit32.Ldaexd, OpCodeT32MemLdEx.Create);
+ SetT32("111010001101xxxxxxxx111111011111", InstName.Ldaexh, InstEmit32.Ldaexh, OpCodeT32MemLdEx.Create);
+ SetT32("111010001101xxxxxxxx111110011111", InstName.Ldah, InstEmit32.Ldah, OpCodeT32MemLdEx.Create);
+ SetT32("1110100010x1xxxxxxxxxxxxxxxxxxxx", InstName.Ldm, InstEmit32.Ldm, OpCodeT32MemMult.Create);
+ SetT32("1110100100x1xxxxxxxxxxxxxxxxxxxx", InstName.Ldm, InstEmit32.Ldm, OpCodeT32MemMult.Create);
+ SetT32("111110000101xxxxxxxx10x1xxxxxxxx", InstName.Ldr, InstEmit32.Ldr, OpCodeT32MemImm8.Create);
+ SetT32("111110000101xxxxxxxx1100xxxxxxxx", InstName.Ldr, InstEmit32.Ldr, OpCodeT32MemImm8.Create);
+ SetT32("111110000101xxxxxxxx11x1xxxxxxxx", InstName.Ldr, InstEmit32.Ldr, OpCodeT32MemImm8.Create);
+ SetT32("111110001101xxxxxxxxxxxxxxxxxxxx", InstName.Ldr, InstEmit32.Ldr, OpCodeT32MemImm12.Create);
+ SetT32("111110000101<<<<xxxx000000xxxxxx", InstName.Ldr, InstEmit32.Ldr, OpCodeT32MemRsImm.Create);
+ SetT32("111110000001xxxxxxxx10x1xxxxxxxx", InstName.Ldrb, InstEmit32.Ldrb, OpCodeT32MemImm8.Create);
+ SetT32("111110000001xxxx<<<<1100xxxxxxxx", InstName.Ldrb, InstEmit32.Ldrb, OpCodeT32MemImm8.Create);
+ SetT32("111110000001xxxxxxxx11x1xxxxxxxx", InstName.Ldrb, InstEmit32.Ldrb, OpCodeT32MemImm8.Create);
+ SetT32("111110001001xxxx<<<<xxxxxxxxxxxx", InstName.Ldrb, InstEmit32.Ldrb, OpCodeT32MemImm12.Create);
+ SetT32("111110000001xxxx<<<<000000xxxxxx", InstName.Ldrb, InstEmit32.Ldrb, OpCodeT32MemRsImm.Create);
+ SetT32("11101000x111<<<<xxxxxxxxxxxxxxxx", InstName.Ldrd, InstEmit32.Ldrd, OpCodeT32MemImm8D.Create);
+ SetT32("11101001x1x1<<<<xxxxxxxxxxxxxxxx", InstName.Ldrd, InstEmit32.Ldrd, OpCodeT32MemImm8D.Create);
+ SetT32("111110000011xxxxxxxx10x1xxxxxxxx", InstName.Ldrh, InstEmit32.Ldrh, OpCodeT32MemImm8.Create);
+ SetT32("111110000011xxxx<<<<1100xxxxxxxx", InstName.Ldrh, InstEmit32.Ldrh, OpCodeT32MemImm8.Create);
+ SetT32("111110000011xxxxxxxx11x1xxxxxxxx", InstName.Ldrh, InstEmit32.Ldrh, OpCodeT32MemImm8.Create);
+ SetT32("111110001011xxxx<<<<xxxxxxxxxxxx", InstName.Ldrh, InstEmit32.Ldrh, OpCodeT32MemImm12.Create);
+ SetT32("111110000011xxxx<<<<000000xxxxxx", InstName.Ldrh, InstEmit32.Ldrh, OpCodeT32MemRsImm.Create);
+ SetT32("111110010001xxxxxxxx10x1xxxxxxxx", InstName.Ldrsb, InstEmit32.Ldrsb, OpCodeT32MemImm8.Create);
+ SetT32("111110010001xxxx<<<<1100xxxxxxxx", InstName.Ldrsb, InstEmit32.Ldrsb, OpCodeT32MemImm8.Create);
+ SetT32("111110010001xxxxxxxx11x1xxxxxxxx", InstName.Ldrsb, InstEmit32.Ldrsb, OpCodeT32MemImm8.Create);
+ SetT32("111110011001xxxx<<<<xxxxxxxxxxxx", InstName.Ldrsb, InstEmit32.Ldrsb, OpCodeT32MemImm12.Create);
+ SetT32("111110010001xxxx<<<<000000xxxxxx", InstName.Ldrsb, InstEmit32.Ldrsb, OpCodeT32MemRsImm.Create);
+ SetT32("111110010011xxxxxxxx10x1xxxxxxxx", InstName.Ldrsh, InstEmit32.Ldrsh, OpCodeT32MemImm8.Create);
+ SetT32("111110010011xxxx<<<<1100xxxxxxxx", InstName.Ldrsh, InstEmit32.Ldrsh, OpCodeT32MemImm8.Create);
+ SetT32("111110010011xxxxxxxx11x1xxxxxxxx", InstName.Ldrsh, InstEmit32.Ldrsh, OpCodeT32MemImm8.Create);
+ SetT32("111110011011xxxx<<<<xxxxxxxxxxxx", InstName.Ldrsh, InstEmit32.Ldrsh, OpCodeT32MemImm12.Create);
+ SetT32("111110010011xxxx<<<<000000xxxxxx", InstName.Ldrsh, InstEmit32.Ldrsh, OpCodeT32MemRsImm.Create);
+ SetT32("111110110000xxxx<<<<xxxx0000xxxx", InstName.Mla, InstEmit32.Mla, OpCodeT32AluMla.Create);
+ SetT32("111110110000xxxxxxxxxxxx0001xxxx", InstName.Mls, InstEmit32.Mls, OpCodeT32AluMla.Create);
+ SetT32("11101010010x11110xxxxxxxxxxxxxxx", InstName.Mov, InstEmit32.Mov, OpCodeT32AluRsImm.Create);
+ SetT32("111110100xxxxxxx1111xxxx0000xxxx", InstName.Mov, InstEmit32.Mov, OpCodeT32ShiftReg.Create);
+ SetT32("11110x00010x11110xxxxxxxxxxxxxxx", InstName.Mov, InstEmit32.Mov, OpCodeT32AluImm.Create);
+ SetT32("11110x100100xxxx0xxxxxxxxxxxxxxx", InstName.Mov, InstEmit32.Mov, OpCodeT32MovImm16.Create);
+ SetT32("11110x101100xxxx0xxxxxxxxxxxxxxx", InstName.Movt, InstEmit32.Movt, OpCodeT32MovImm16.Create);
+ SetT32("111110110000xxxx1111xxxx0000xxxx", InstName.Mul, InstEmit32.Mul, OpCodeT32AluMla.Create);
+ SetT32("11101010011x11110xxxxxxxxxxxxxxx", InstName.Mvn, InstEmit32.Mvn, OpCodeT32AluRsImm.Create);
+ SetT32("11110x00011x11110xxxxxxxxxxxxxxx", InstName.Mvn, InstEmit32.Mvn, OpCodeT32AluImm.Create);
+ SetT32("11110011101011111000000000000000", InstName.Nop, InstEmit32.Nop, OpCodeT32.Create);
+ SetT32("11101010011x<<<<0xxxxxxxxxxxxxxx", InstName.Orn, InstEmit32.Orn, OpCodeT32AluRsImm.Create);
+ SetT32("11110x00011x<<<<0xxxxxxxxxxxxxxx", InstName.Orn, InstEmit32.Orn, OpCodeT32AluImm.Create);
+ SetT32("11101010010x<<<<0xxxxxxxxxxxxxxx", InstName.Orr, InstEmit32.Orr, OpCodeT32AluRsImm.Create);
+ SetT32("11110x00010x<<<<0xxxxxxxxxxxxxxx", InstName.Orr, InstEmit32.Orr, OpCodeT32AluImm.Create);
+ SetT32("1111100010x1xxxx1111xxxxxxxxxxxx", InstName.Pld, InstEmit32.Nop, OpCodeT32.Create);
+ SetT32("1111100000x1xxxx11111100xxxxxxxx", InstName.Pld, InstEmit32.Nop, OpCodeT32.Create);
+ SetT32("1111100000x1xxxx1111000000xxxxxx", InstName.Pld, InstEmit32.Nop, OpCodeT32.Create);
+ SetT32("11101011110xxxxx0xxxxxxxxxxxxxxx", InstName.Rsb, InstEmit32.Rsb, OpCodeT32AluRsImm.Create);
+ SetT32("11110x01110xxxxx0xxxxxxxxxxxxxxx", InstName.Rsb, InstEmit32.Rsb, OpCodeT32AluImm.Create);
+ SetT32("111110101000xxxx1111xxxx0000xxxx", InstName.Sadd8, InstEmit32.Sadd8, OpCodeT32AluReg.Create);
+ SetT32("11101011011xxxxx0xxxxxxxxxxxxxxx", InstName.Sbc, InstEmit32.Sbc, OpCodeT32AluRsImm.Create);
+ SetT32("11110x01011xxxxx0xxxxxxxxxxxxxxx", InstName.Sbc, InstEmit32.Sbc, OpCodeT32AluImm.Create);
+ SetT32("111100110100xxxx0xxxxxxxxx0xxxxx", InstName.Sbfx, InstEmit32.Sbfx, OpCodeT32AluBf.Create);
+ SetT32("111110111001xxxx1111xxxx1111xxxx", InstName.Sdiv, InstEmit32.Sdiv, OpCodeT32AluMla.Create);
+ SetT32("111110101010xxxx1111xxxx1000xxxx", InstName.Sel, InstEmit32.Sel, OpCodeT32AluReg.Create);
+ SetT32("111110101000xxxx1111xxxx0010xxxx", InstName.Shadd8, InstEmit32.Shadd8, OpCodeT32AluReg.Create);
+ SetT32("111110101100xxxx1111xxxx0010xxxx", InstName.Shsub8, InstEmit32.Shsub8, OpCodeT32AluReg.Create);
+ SetT32("11110011101011111000000000000100", InstName.Sev, InstEmit32.Nop, OpCodeT32.Create);
+ SetT32("11110011101011111000000000000101", InstName.Sevl, InstEmit32.Nop, OpCodeT32.Create);
+ SetT32("111110110001xxxx<<<<xxxx00xxxxxx", InstName.Smla__, InstEmit32.Smla__, OpCodeT32AluMla.Create);
+ SetT32("111110111100xxxxxxxxxxxx0000xxxx", InstName.Smlal, InstEmit32.Smlal, OpCodeT32AluUmull.Create);
+ SetT32("111110111100xxxxxxxxxxxx10xxxxxx", InstName.Smlal__, InstEmit32.Smlal__, OpCodeT32AluUmull.Create);
+ SetT32("111110110011xxxx<<<<xxxx000xxxxx", InstName.Smlaw_, InstEmit32.Smlaw_, OpCodeT32AluMla.Create);
+ SetT32("111110110101xxxx<<<<xxxx000xxxxx", InstName.Smmla, InstEmit32.Smmla, OpCodeT32AluMla.Create);
+ SetT32("111110110110xxxxxxxxxxxx000xxxxx", InstName.Smmls, InstEmit32.Smmls, OpCodeT32AluMla.Create);
+ SetT32("111110110001xxxx1111xxxx00xxxxxx", InstName.Smul__, InstEmit32.Smul__, OpCodeT32AluMla.Create);
+ SetT32("111110111000xxxxxxxxxxxx0000xxxx", InstName.Smull, InstEmit32.Smull, OpCodeT32AluUmull.Create);
+ SetT32("111110110011xxxx1111xxxx000xxxxx", InstName.Smulw_, InstEmit32.Smulw_, OpCodeT32AluMla.Create);
+ SetT32("111110101100xxxx1111xxxx0000xxxx", InstName.Ssub8, InstEmit32.Ssub8, OpCodeT32AluReg.Create);
+ SetT32("111010001100xxxxxxxx111110101111", InstName.Stl, InstEmit32.Stl, OpCodeT32MemStEx.Create);
+ SetT32("111010001100xxxxxxxx111110001111", InstName.Stlb, InstEmit32.Stlb, OpCodeT32MemStEx.Create);
+ SetT32("111010001100xxxxxxxx11111110xxxx", InstName.Stlex, InstEmit32.Stlex, OpCodeT32MemStEx.Create);
+ SetT32("111010001100xxxxxxxx11111100xxxx", InstName.Stlexb, InstEmit32.Stlexb, OpCodeT32MemStEx.Create);
+ SetT32("111010001100xxxxxxxxxxxx1111xxxx", InstName.Stlexd, InstEmit32.Stlexd, OpCodeT32MemStEx.Create);
+ SetT32("111010001100xxxxxxxx11111101xxxx", InstName.Stlexh, InstEmit32.Stlexh, OpCodeT32MemStEx.Create);
+ SetT32("111010001100xxxxxxxx111110011111", InstName.Stlh, InstEmit32.Stlh, OpCodeT32MemStEx.Create);
+ SetT32("1110100010x0xxxx0xxxxxxxxxxxxxxx", InstName.Stm, InstEmit32.Stm, OpCodeT32MemMult.Create);
+ SetT32("1110100100x0xxxx0xxxxxxxxxxxxxxx", InstName.Stm, InstEmit32.Stm, OpCodeT32MemMult.Create);
+ SetT32("111110000100<<<<xxxx10x1xxxxxxxx", InstName.Str, InstEmit32.Str, OpCodeT32MemImm8.Create);
+ SetT32("111110000100<<<<xxxx1100xxxxxxxx", InstName.Str, InstEmit32.Str, OpCodeT32MemImm8.Create);
+ SetT32("111110000100<<<<xxxx11x1xxxxxxxx", InstName.Str, InstEmit32.Str, OpCodeT32MemImm8.Create);
+ SetT32("111110001100<<<<xxxxxxxxxxxxxxxx", InstName.Str, InstEmit32.Str, OpCodeT32MemImm12.Create);
+ SetT32("111110000100<<<<xxxx000000xxxxxx", InstName.Str, InstEmit32.Str, OpCodeT32MemRsImm.Create);
+ SetT32("111110000000<<<<xxxx10x1xxxxxxxx", InstName.Strb, InstEmit32.Strb, OpCodeT32MemImm8.Create);
+ SetT32("111110000000<<<<xxxx1100xxxxxxxx", InstName.Strb, InstEmit32.Strb, OpCodeT32MemImm8.Create);
+ SetT32("111110000000<<<<xxxx11x1xxxxxxxx", InstName.Strb, InstEmit32.Strb, OpCodeT32MemImm8.Create);
+ SetT32("111110001000<<<<xxxxxxxxxxxxxxxx", InstName.Strb, InstEmit32.Strb, OpCodeT32MemImm12.Create);
+ SetT32("111110000000<<<<xxxx000000xxxxxx", InstName.Strb, InstEmit32.Strb, OpCodeT32MemRsImm.Create);
+ SetT32("11101000x110<<<<xxxxxxxxxxxxxxxx", InstName.Strd, InstEmit32.Strd, OpCodeT32MemImm8D.Create);
+ SetT32("11101001x1x0<<<<xxxxxxxxxxxxxxxx", InstName.Strd, InstEmit32.Strd, OpCodeT32MemImm8D.Create);
+ SetT32("111110000010<<<<xxxx10x1xxxxxxxx", InstName.Strh, InstEmit32.Strh, OpCodeT32MemImm8.Create);
+ SetT32("111110000010<<<<xxxx1100xxxxxxxx", InstName.Strh, InstEmit32.Strh, OpCodeT32MemImm8.Create);
+ SetT32("111110000010<<<<xxxx11x1xxxxxxxx", InstName.Strh, InstEmit32.Strh, OpCodeT32MemImm8.Create);
+ SetT32("111110001010<<<<xxxxxxxxxxxxxxxx", InstName.Strh, InstEmit32.Strh, OpCodeT32MemImm12.Create);
+ SetT32("111110000010<<<<xxxx000000xxxxxx", InstName.Strh, InstEmit32.Strh, OpCodeT32MemRsImm.Create);
+ SetT32("11101011101<xxxx0xxx<<<<xxxxxxxx", InstName.Sub, InstEmit32.Sub, OpCodeT32AluRsImm.Create);
+ SetT32("11110x01101<xxxx0xxx<<<<xxxxxxxx", InstName.Sub, InstEmit32.Sub, OpCodeT32AluImm.Create);
+ SetT32("11110x101010xxxx0xxxxxxxxxxxxxxx", InstName.Sub, InstEmit32.Sub, OpCodeT32AluImm12.Create);
+ SetT32("111110100100xxxx1111xxxx10xxxxxx", InstName.Sxtb, InstEmit32.Sxtb, OpCodeT32AluUx.Create);
+ SetT32("111110100010xxxx1111xxxx10xxxxxx", InstName.Sxtb16, InstEmit32.Sxtb16, OpCodeT32AluUx.Create);
+ SetT32("111110100000xxxx1111xxxx10xxxxxx", InstName.Sxth, InstEmit32.Sxth, OpCodeT32AluUx.Create);
+ SetT32("111010001101xxxx111100000000xxxx", InstName.Tbb, InstEmit32.Tbb, OpCodeT32Tb.Create);
+ SetT32("111010001101xxxx111100000001xxxx", InstName.Tbh, InstEmit32.Tbh, OpCodeT32Tb.Create);
+ SetT32("111010101001xxxx0xxx1111xxxxxxxx", InstName.Teq, InstEmit32.Teq, OpCodeT32AluRsImm.Create);
+ SetT32("11110x001001xxxx0xxx1111xxxxxxxx", InstName.Teq, InstEmit32.Teq, OpCodeT32AluImm.Create);
+ SetT32("11110011101011111000000000010010", InstName.Tsb, InstEmit32.Nop, OpCodeT32.Create); // Trace Synchronization Barrier (FEAT_TRF)
+ SetT32("111010100001xxxx0xxx1111xxxxxxxx", InstName.Tst, InstEmit32.Tst, OpCodeT32AluRsImm.Create);
+ SetT32("11110x000001xxxx0xxx1111xxxxxxxx", InstName.Tst, InstEmit32.Tst, OpCodeT32AluImm.Create);
+ SetT32("111110101000xxxx1111xxxx0100xxxx", InstName.Uadd8, InstEmit32.Uadd8, OpCodeT32AluReg.Create);
+ SetT32("111100111100xxxx0xxxxxxxxx0xxxxx", InstName.Ubfx, InstEmit32.Ubfx, OpCodeT32AluBf.Create);
+ SetT32("111110111011xxxx1111xxxx1111xxxx", InstName.Udiv, InstEmit32.Udiv, OpCodeT32AluMla.Create);
+ SetT32("111110101000xxxx1111xxxx0110xxxx", InstName.Uhadd8, InstEmit32.Uhadd8, OpCodeT32AluReg.Create);
+ SetT32("111110101100xxxx1111xxxx0110xxxx", InstName.Uhsub8, InstEmit32.Uhsub8, OpCodeT32AluReg.Create);
+ SetT32("111110111110xxxxxxxxxxxx0110xxxx", InstName.Umaal, InstEmit32.Umaal, OpCodeT32AluUmull.Create);
+ SetT32("111110111110xxxxxxxxxxxx0000xxxx", InstName.Umlal, InstEmit32.Umlal, OpCodeT32AluUmull.Create);
+ SetT32("111110111010xxxxxxxxxxxx0000xxxx", InstName.Umull, InstEmit32.Umull, OpCodeT32AluUmull.Create);
+ SetT32("111110101100xxxx1111xxxx0100xxxx", InstName.Usub8, InstEmit32.Usub8, OpCodeT32AluReg.Create);
+ SetT32("111110100101xxxx1111xxxx10xxxxxx", InstName.Uxtb, InstEmit32.Uxtb, OpCodeT32AluUx.Create);
+ SetT32("111110100011xxxx1111xxxx10xxxxxx", InstName.Uxtb16, InstEmit32.Uxtb16, OpCodeT32AluUx.Create);
+ SetT32("111110100001xxxx1111xxxx10xxxxxx", InstName.Uxth, InstEmit32.Uxth, OpCodeT32AluUx.Create);
+ SetT32("11110011101011111000000000000010", InstName.Wfe, InstEmit32.Nop, OpCodeT32.Create);
+ SetT32("11110011101011111000000000000011", InstName.Wfi, InstEmit32.Nop, OpCodeT32.Create);
+ SetT32("11110011101011111000000000000001", InstName.Yield, InstEmit32.Nop, OpCodeT32.Create);
+#endregion
+
+ FillFastLookupTable(InstA32FastLookup, AllInstA32, ToFastLookupIndexA);
+ FillFastLookupTable(InstT32FastLookup, AllInstT32, ToFastLookupIndexT);
+ FillFastLookupTable(InstA64FastLookup, AllInstA64, ToFastLookupIndexA);
+ }
+
+ private static void FillFastLookupTable(InstInfo[][] table, List<InstInfo> allInsts, Func<int, int> ToFastLookupIndex)
+ {
+ List<InstInfo>[] temp = new List<InstInfo>[FastLookupSize];
+
+ for (int index = 0; index < temp.Length; index++)
+ {
+ temp[index] = new List<InstInfo>();
+ }
+
+ foreach (InstInfo inst in allInsts)
+ {
+ int mask = ToFastLookupIndex(inst.Mask);
+ int value = ToFastLookupIndex(inst.Value);
+
+ for (int index = 0; index < temp.Length; index++)
+ {
+ if ((index & mask) == value)
+ {
+ temp[index].Add(inst);
+ }
+ }
+ }
+
+ for (int index = 0; index < temp.Length; index++)
+ {
+ table[index] = temp[index].ToArray();
+ }
+ }
+
+ private static void SetA32(string encoding, InstName name, InstEmitter emitter, MakeOp makeOp)
+ {
+ Set(encoding, AllInstA32, new InstDescriptor(name, emitter), makeOp);
+ }
+
+ private static void SetT16(string encoding, InstName name, InstEmitter emitter, MakeOp makeOp)
+ {
+ encoding = "xxxxxxxxxxxxxxxx" + encoding;
+ Set(encoding, AllInstT32, new InstDescriptor(name, emitter), makeOp);
+ }
+
+ private static void SetT32(string encoding, InstName name, InstEmitter emitter, MakeOp makeOp)
+ {
+ string reversedEncoding = $"{encoding.AsSpan(16)}{encoding.AsSpan(0, 16)}";
+ MakeOp reversedMakeOp =
+ (inst, address, opCode)
+ => makeOp(inst, address, (int)BitOperations.RotateRight((uint)opCode, 16));
+ Set(reversedEncoding, AllInstT32, new InstDescriptor(name, emitter), reversedMakeOp);
+ }
+
+ private static void SetVfp(string encoding, InstName name, InstEmitter emitter, MakeOp makeOpA32, MakeOp makeOpT32)
+ {
+ SetA32(encoding, name, emitter, makeOpA32);
+
+ string thumbEncoding = encoding;
+ if (thumbEncoding.StartsWith("<<<<"))
+ {
+ thumbEncoding = $"1110{thumbEncoding.AsSpan(4)}";
+ }
+ SetT32(thumbEncoding, name, emitter, makeOpT32);
+ }
+
+ private static void SetAsimd(string encoding, InstName name, InstEmitter emitter, MakeOp makeOpA32, MakeOp makeOpT32)
+ {
+ SetA32(encoding, name, emitter, makeOpA32);
+
+ string thumbEncoding = encoding;
+ if (thumbEncoding.StartsWith("11110100"))
+ {
+ thumbEncoding = $"11111001{encoding.AsSpan(8)}";
+ }
+ else if (thumbEncoding.StartsWith("1111001x"))
+ {
+ thumbEncoding = $"111x1111{encoding.AsSpan(8)}";
+ }
+ else if (thumbEncoding.StartsWith("11110010"))
+ {
+ thumbEncoding = $"11101111{encoding.AsSpan(8)}";
+ }
+ else if (thumbEncoding.StartsWith("11110011"))
+ {
+ thumbEncoding = $"11111111{encoding.AsSpan(8)}";
+ }
+ else
+ {
+ throw new ArgumentException("Invalid ASIMD instruction encoding");
+ }
+ SetT32(thumbEncoding, name, emitter, makeOpT32);
+ }
+
+ private static void SetA64(string encoding, InstName name, InstEmitter emitter, MakeOp makeOp)
+ {
+ Set(encoding, AllInstA64, new InstDescriptor(name, emitter), makeOp);
+ }
+
+ private static void Set(string encoding, List<InstInfo> list, InstDescriptor inst, MakeOp makeOp)
+ {
+ int bit = encoding.Length - 1;
+ int value = 0;
+ int xMask = 0;
+ int xBits = 0;
+
+ int[] xPos = new int[encoding.Length];
+
+ int blacklisted = 0;
+
+ for (int index = 0; index < encoding.Length; index++, bit--)
+ {
+ // Note: < and > are used on special encodings.
+ // The < means that we should never have ALL bits with the '<' set.
+ // So, when the encoding has <<, it means that 00, 01, and 10 are valid,
+ // but not 11. <<< is 000, 001, ..., 110 but NOT 111, and so on...
+ // For >, the invalid value is zero. So, for >> 01, 10 and 11 are valid,
+ // but 00 isn't.
+ char chr = encoding[index];
+
+ if (chr == '1')
+ {
+ value |= 1 << bit;
+ }
+ else if (chr == 'x')
+ {
+ xMask |= 1 << bit;
+ }
+ else if (chr == '>')
+ {
+ xPos[xBits++] = bit;
+ }
+ else if (chr == '<')
+ {
+ xPos[xBits++] = bit;
+
+ blacklisted |= 1 << bit;
+ }
+ else if (chr != '0')
+ {
+ throw new ArgumentException(nameof(encoding));
+ }
+ }
+
+ xMask = ~xMask;
+
+ if (xBits == 0)
+ {
+ list.Add(new InstInfo(xMask, value, inst, makeOp));
+
+ return;
+ }
+
+ for (int index = 0; index < (1 << xBits); index++)
+ {
+ int mask = 0;
+
+ for (int x = 0; x < xBits; x++)
+ {
+ mask |= ((index >> x) & 1) << xPos[x];
+ }
+
+ if (mask != blacklisted)
+ {
+ list.Add(new InstInfo(xMask, value | mask, inst, makeOp));
+ }
+ }
+ }
+
+ public static (InstDescriptor inst, MakeOp makeOp) GetInstA32(int opCode)
+ {
+ return GetInstFromList(InstA32FastLookup[ToFastLookupIndexA(opCode)], opCode);
+ }
+
+ public static (InstDescriptor inst, MakeOp makeOp) GetInstT32(int opCode)
+ {
+ return GetInstFromList(InstT32FastLookup[ToFastLookupIndexT(opCode)], opCode);
+ }
+
+ public static (InstDescriptor inst, MakeOp makeOp) GetInstA64(int opCode)
+ {
+ return GetInstFromList(InstA64FastLookup[ToFastLookupIndexA(opCode)], opCode);
+ }
+
+ private static (InstDescriptor inst, MakeOp makeOp) GetInstFromList(InstInfo[] insts, int opCode)
+ {
+ foreach (InstInfo info in insts)
+ {
+ if ((opCode & info.Mask) == info.Value)
+ {
+ return (info.Inst, info.MakeOp);
+ }
+ }
+
+ return (new InstDescriptor(InstName.Und, InstEmit.Und), null);
+ }
+
+ private static int ToFastLookupIndexA(int value)
+ {
+ return ((value >> 10) & 0x00F) | ((value >> 18) & 0xFF0);
+ }
+
+ private static int ToFastLookupIndexT(int value)
+ {
+ return (value >> 4) & 0xFFF;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/Optimizations/TailCallRemover.cs b/src/ARMeilleure/Decoders/Optimizations/TailCallRemover.cs
new file mode 100644
index 00000000..17c17812
--- /dev/null
+++ b/src/ARMeilleure/Decoders/Optimizations/TailCallRemover.cs
@@ -0,0 +1,88 @@
+using System;
+using System.Collections.Generic;
+
+namespace ARMeilleure.Decoders.Optimizations
+{
+ static class TailCallRemover
+ {
+ public static Block[] RunPass(ulong entryAddress, List<Block> blocks)
+ {
+ // Detect tail calls:
+ // - Assume this function spans the space covered by contiguous code blocks surrounding the entry address.
+ // - A jump to an area outside this contiguous region will be treated as an exit block.
+ // - Include a small allowance for jumps outside the contiguous range.
+
+ if (!Decoder.BinarySearch(blocks, entryAddress, out int entryBlockId))
+ {
+ throw new InvalidOperationException("Function entry point is not contained in a block.");
+ }
+
+ const ulong allowance = 4;
+
+ Block entryBlock = blocks[entryBlockId];
+
+ Block startBlock = entryBlock;
+ Block endBlock = entryBlock;
+
+ int startBlockIndex = entryBlockId;
+ int endBlockIndex = entryBlockId;
+
+ for (int i = entryBlockId + 1; i < blocks.Count; i++) // Search forwards.
+ {
+ Block block = blocks[i];
+
+ if (endBlock.EndAddress < block.Address - allowance)
+ {
+ break; // End of contiguous function.
+ }
+
+ endBlock = block;
+ endBlockIndex = i;
+ }
+
+ for (int i = entryBlockId - 1; i >= 0; i--) // Search backwards.
+ {
+ Block block = blocks[i];
+
+ if (startBlock.Address > block.EndAddress + allowance)
+ {
+ break; // End of contiguous function.
+ }
+
+ startBlock = block;
+ startBlockIndex = i;
+ }
+
+ if (startBlockIndex == 0 && endBlockIndex == blocks.Count - 1)
+ {
+ return blocks.ToArray(); // Nothing to do here.
+ }
+
+ // Mark branches whose target is outside of the contiguous region as an exit block.
+ for (int i = startBlockIndex; i <= endBlockIndex; i++)
+ {
+ Block block = blocks[i];
+
+ if (block.Branch != null && (block.Branch.Address > endBlock.EndAddress || block.Branch.EndAddress < startBlock.Address))
+ {
+ block.Branch.Exit = true;
+ }
+ }
+
+ var newBlocks = new List<Block>(blocks.Count);
+
+ // Finally, rebuild decoded block list, ignoring blocks outside the contiguous range.
+ for (int i = 0; i < blocks.Count; i++)
+ {
+ Block block = blocks[i];
+
+ if (block.Exit || (i >= startBlockIndex && i <= endBlockIndex))
+ {
+ newBlocks.Add(block);
+ }
+ }
+
+ return newBlocks.ToArray();
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/RegisterSize.cs b/src/ARMeilleure/Decoders/RegisterSize.cs
new file mode 100644
index 00000000..c9cea03e
--- /dev/null
+++ b/src/ARMeilleure/Decoders/RegisterSize.cs
@@ -0,0 +1,10 @@
+namespace ARMeilleure.Decoders
+{
+ enum RegisterSize
+ {
+ Int32,
+ Int64,
+ Simd64,
+ Simd128
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Decoders/ShiftType.cs b/src/ARMeilleure/Decoders/ShiftType.cs
new file mode 100644
index 00000000..8583f16a
--- /dev/null
+++ b/src/ARMeilleure/Decoders/ShiftType.cs
@@ -0,0 +1,10 @@
+namespace ARMeilleure.Decoders
+{
+ enum ShiftType
+ {
+ Lsl = 0,
+ Lsr = 1,
+ Asr = 2,
+ Ror = 3
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Diagnostics/IRDumper.cs b/src/ARMeilleure/Diagnostics/IRDumper.cs
new file mode 100644
index 00000000..3d1a60e5
--- /dev/null
+++ b/src/ARMeilleure/Diagnostics/IRDumper.cs
@@ -0,0 +1,311 @@
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace ARMeilleure.Diagnostics
+{
+ class IRDumper
+ {
+ private const string Indentation = " ";
+
+ private int _indentLevel;
+
+ private readonly StringBuilder _builder;
+
+ private readonly Dictionary<Operand, string> _localNames;
+ private readonly Dictionary<ulong, string> _symbolNames;
+
+ public IRDumper(int indent)
+ {
+ _indentLevel = indent;
+
+ _builder = new StringBuilder();
+
+ _localNames = new Dictionary<Operand, string>();
+ _symbolNames = new Dictionary<ulong, string>();
+ }
+
+ private void Indent()
+ {
+ _builder.EnsureCapacity(_builder.Capacity + _indentLevel * Indentation.Length);
+
+ for (int index = 0; index < _indentLevel; index++)
+ {
+ _builder.Append(Indentation);
+ }
+ }
+
+ private void IncreaseIndentation()
+ {
+ _indentLevel++;
+ }
+
+ private void DecreaseIndentation()
+ {
+ _indentLevel--;
+ }
+
+ private void DumpBlockName(BasicBlock block)
+ {
+ _builder.Append("block").Append(block.Index);
+ }
+
+ private void DumpBlockHeader(BasicBlock block)
+ {
+ DumpBlockName(block);
+
+ if (block.Frequency == BasicBlockFrequency.Cold)
+ {
+ _builder.Append(" cold");
+ }
+
+ if (block.SuccessorsCount > 0)
+ {
+ _builder.Append(" (");
+
+ for (int i = 0; i < block.SuccessorsCount; i++)
+ {
+ DumpBlockName(block.GetSuccessor(i));
+
+ if (i < block.SuccessorsCount - 1)
+ {
+ _builder.Append(", ");
+ }
+ }
+
+ _builder.Append(')');
+ }
+
+ _builder.Append(':');
+ }
+
+ private void DumpOperand(Operand operand)
+ {
+ if (operand == default)
+ {
+ _builder.Append("<NULL>");
+ return;
+ }
+
+ _builder.Append(GetTypeName(operand.Type)).Append(' ');
+
+ switch (operand.Kind)
+ {
+ case OperandKind.LocalVariable:
+ if (!_localNames.TryGetValue(operand, out string localName))
+ {
+ localName = $"%{_localNames.Count}";
+
+ _localNames.Add(operand, localName);
+ }
+
+ _builder.Append(localName);
+ break;
+
+ case OperandKind.Register:
+ Register reg = operand.GetRegister();
+
+ switch (reg.Type)
+ {
+ case RegisterType.Flag: _builder.Append('b'); break;
+ case RegisterType.FpFlag: _builder.Append('f'); break;
+ case RegisterType.Integer: _builder.Append('r'); break;
+ case RegisterType.Vector: _builder.Append('v'); break;
+ }
+
+ _builder.Append(reg.Index);
+ break;
+
+ case OperandKind.Constant:
+ string symbolName = Symbols.Get(operand.Value);
+
+ if (symbolName != null && !_symbolNames.ContainsKey(operand.Value))
+ {
+ _symbolNames.Add(operand.Value, symbolName);
+ }
+
+ _builder.Append("0x").Append(operand.Value.ToString("X"));
+ break;
+
+ case OperandKind.Memory:
+ var memOp = operand.GetMemory();
+
+ _builder.Append('[');
+
+ DumpOperand(memOp.BaseAddress);
+
+ if (memOp.Index != default)
+ {
+ _builder.Append(" + ");
+
+ DumpOperand(memOp.Index);
+
+ switch (memOp.Scale)
+ {
+ case Multiplier.x2: _builder.Append("*2"); break;
+ case Multiplier.x4: _builder.Append("*4"); break;
+ case Multiplier.x8: _builder.Append("*8"); break;
+ }
+ }
+
+ if (memOp.Displacement != 0)
+ {
+ _builder.Append(" + 0x").Append(memOp.Displacement.ToString("X"));
+ }
+
+ _builder.Append(']');
+ break;
+
+ default:
+ _builder.Append(operand.Type);
+ break;
+ }
+ }
+
+ private void DumpNode(ControlFlowGraph cfg, Operation node)
+ {
+ for (int index = 0; index < node.DestinationsCount; index++)
+ {
+ DumpOperand(node.GetDestination(index));
+
+ if (index == node.DestinationsCount - 1)
+ {
+ _builder.Append(" = ");
+ }
+ else
+ {
+ _builder.Append(", ");
+ }
+ }
+
+ switch (node)
+ {
+ case Operation operation:
+ if (operation.Instruction == Instruction.Phi)
+ {
+ PhiOperation phi = operation.AsPhi();
+
+ _builder.Append("Phi ");
+
+ for (int index = 0; index < phi.SourcesCount; index++)
+ {
+ _builder.Append('(');
+
+ DumpBlockName(phi.GetBlock(cfg, index));
+
+ _builder.Append(": ");
+
+ DumpOperand(phi.GetSource(index));
+
+ _builder.Append(')');
+
+ if (index < phi.SourcesCount - 1)
+ {
+ _builder.Append(", ");
+ }
+ }
+
+ break;
+ }
+
+ bool comparison = false;
+
+ _builder.Append(operation.Instruction);
+
+ if (operation.Instruction == Instruction.Extended)
+ {
+ _builder.Append('.').Append(operation.Intrinsic);
+ }
+ else if (operation.Instruction == Instruction.BranchIf ||
+ operation.Instruction == Instruction.Compare)
+ {
+ comparison = true;
+ }
+
+ _builder.Append(' ');
+
+ for (int index = 0; index < operation.SourcesCount; index++)
+ {
+ Operand source = operation.GetSource(index);
+
+ if (index < operation.SourcesCount - 1)
+ {
+ DumpOperand(source);
+
+ _builder.Append(", ");
+ }
+ else if (comparison)
+ {
+ _builder.Append((Comparison)source.AsInt32());
+ }
+ else
+ {
+ DumpOperand(source);
+ }
+ }
+ break;
+ }
+
+ if (_symbolNames.Count == 1)
+ {
+ _builder.Append(" ;; ").Append(_symbolNames.First().Value);
+ }
+ else if (_symbolNames.Count > 1)
+ {
+ _builder.Append(" ;;");
+
+ foreach ((ulong value, string name) in _symbolNames)
+ {
+ _builder.Append(" 0x").Append(value.ToString("X")).Append(" = ").Append(name);
+ }
+ }
+
+ // Reset the set of symbols for the next Node we're going to dump.
+ _symbolNames.Clear();
+ }
+
+ public static string GetDump(ControlFlowGraph cfg)
+ {
+ var dumper = new IRDumper(1);
+
+ for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
+ {
+ dumper.Indent();
+ dumper.DumpBlockHeader(block);
+
+ dumper._builder.AppendLine();
+
+ dumper.IncreaseIndentation();
+
+ for (Operation node = block.Operations.First; node != default; node = node.ListNext)
+ {
+ dumper.Indent();
+ dumper.DumpNode(cfg, node);
+
+ dumper._builder.AppendLine();
+ }
+
+ dumper.DecreaseIndentation();
+ }
+
+ return dumper._builder.ToString();
+ }
+
+ private static string GetTypeName(OperandType type)
+ {
+ return type switch
+ {
+ OperandType.None => "none",
+ OperandType.I32 => "i32",
+ OperandType.I64 => "i64",
+ OperandType.FP32 => "f32",
+ OperandType.FP64 => "f64",
+ OperandType.V128 => "v128",
+ _ => throw new ArgumentException($"Invalid operand type \"{type}\"."),
+ };
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Diagnostics/Logger.cs b/src/ARMeilleure/Diagnostics/Logger.cs
new file mode 100644
index 00000000..07a60667
--- /dev/null
+++ b/src/ARMeilleure/Diagnostics/Logger.cs
@@ -0,0 +1,56 @@
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+
+namespace ARMeilleure.Diagnostics
+{
+ static class Logger
+ {
+ private static long _startTime;
+
+ private static long[] _accumulatedTime;
+
+ static Logger()
+ {
+ _accumulatedTime = new long[(int)PassName.Count];
+ }
+
+ [Conditional("M_DEBUG")]
+ public static void StartPass(PassName name)
+ {
+ WriteOutput(name + " pass started...");
+
+ _startTime = Stopwatch.GetTimestamp();
+ }
+
+ [Conditional("M_DEBUG")]
+ public static void EndPass(PassName name, ControlFlowGraph cfg)
+ {
+ EndPass(name);
+
+ WriteOutput("IR after " + name + " pass:");
+
+ WriteOutput(IRDumper.GetDump(cfg));
+ }
+
+ [Conditional("M_DEBUG")]
+ public static void EndPass(PassName name)
+ {
+ long elapsedTime = Stopwatch.GetTimestamp() - _startTime;
+
+ _accumulatedTime[(int)name] += elapsedTime;
+
+ WriteOutput($"{name} pass ended after {GetMilliseconds(_accumulatedTime[(int)name])} ms...");
+ }
+
+ private static long GetMilliseconds(long ticks)
+ {
+ return (long)(((double)ticks / Stopwatch.Frequency) * 1000);
+ }
+
+ private static void WriteOutput(string text)
+ {
+ Console.WriteLine(text);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Diagnostics/PassName.cs b/src/ARMeilleure/Diagnostics/PassName.cs
new file mode 100644
index 00000000..e34bf0d2
--- /dev/null
+++ b/src/ARMeilleure/Diagnostics/PassName.cs
@@ -0,0 +1,19 @@
+namespace ARMeilleure.Diagnostics
+{
+ enum PassName
+ {
+ Decoding,
+ Translation,
+ RegisterUsage,
+ TailMerge,
+ Dominance,
+ SsaConstruction,
+ RegisterToLocal,
+ Optimization,
+ PreAllocation,
+ RegisterAllocation,
+ CodeGeneration,
+
+ Count
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Diagnostics/Symbols.cs b/src/ARMeilleure/Diagnostics/Symbols.cs
new file mode 100644
index 00000000..6bde62f5
--- /dev/null
+++ b/src/ARMeilleure/Diagnostics/Symbols.cs
@@ -0,0 +1,84 @@
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+using System.Diagnostics;
+
+namespace ARMeilleure.Diagnostics
+{
+ static class Symbols
+ {
+ private readonly struct RangedSymbol
+ {
+ public readonly ulong Start;
+ public readonly ulong End;
+ public readonly ulong ElementSize;
+ public readonly string Name;
+
+ public RangedSymbol(ulong start, ulong end, ulong elemSize, string name)
+ {
+ Start = start;
+ End = end;
+ ElementSize = elemSize;
+ Name = name;
+ }
+ }
+
+ private static readonly ConcurrentDictionary<ulong, string> _symbols;
+ private static readonly List<RangedSymbol> _rangedSymbols;
+
+ static Symbols()
+ {
+ _symbols = new ConcurrentDictionary<ulong, string>();
+ _rangedSymbols = new List<RangedSymbol>();
+ }
+
+ public static string Get(ulong address)
+ {
+ string result;
+
+ if (_symbols.TryGetValue(address, out result))
+ {
+ return result;
+ }
+
+ lock (_rangedSymbols)
+ {
+ foreach (RangedSymbol symbol in _rangedSymbols)
+ {
+ if (address >= symbol.Start && address <= symbol.End)
+ {
+ ulong diff = address - symbol.Start;
+ ulong rem = diff % symbol.ElementSize;
+
+ result = symbol.Name + "_" + diff / symbol.ElementSize;
+
+ if (rem != 0)
+ {
+ result += "+" + rem;
+ }
+
+ _symbols.TryAdd(address, result);
+
+ return result;
+ }
+ }
+ }
+
+ return null;
+ }
+
+ [Conditional("M_DEBUG")]
+ public static void Add(ulong address, string name)
+ {
+ _symbols.TryAdd(address, name);
+ }
+
+ [Conditional("M_DEBUG")]
+ public static void Add(ulong address, ulong size, ulong elemSize, string name)
+ {
+ lock (_rangedSymbols)
+ {
+ _rangedSymbols.Add(new RangedSymbol(address, address + size, elemSize, name));
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Diagnostics/TranslatorEventSource.cs b/src/ARMeilleure/Diagnostics/TranslatorEventSource.cs
new file mode 100644
index 00000000..a4f17844
--- /dev/null
+++ b/src/ARMeilleure/Diagnostics/TranslatorEventSource.cs
@@ -0,0 +1,67 @@
+using System.Diagnostics.Tracing;
+using System.Threading;
+
+namespace ARMeilleure.Diagnostics
+{
+ [EventSource(Name = "ARMeilleure")]
+ class TranslatorEventSource : EventSource
+ {
+ public static readonly TranslatorEventSource Log = new();
+
+ private int _rejitQueue;
+ private ulong _funcTabSize;
+ private ulong _funcTabLeafSize;
+ private PollingCounter _rejitQueueCounter;
+ private PollingCounter _funcTabSizeCounter;
+ private PollingCounter _funcTabLeafSizeCounter;
+
+ public TranslatorEventSource()
+ {
+ _rejitQueueCounter = new PollingCounter("rejit-queue-length", this, () => _rejitQueue)
+ {
+ DisplayName = "Rejit Queue Length"
+ };
+
+ _funcTabSizeCounter = new PollingCounter("addr-tab-alloc", this, () => _funcTabSize / 1024d / 1024d)
+ {
+ DisplayName = "AddressTable Total Bytes Allocated",
+ DisplayUnits = "MiB"
+ };
+
+ _funcTabLeafSizeCounter = new PollingCounter("addr-tab-leaf-alloc", this, () => _funcTabLeafSize / 1024d / 1024d)
+ {
+ DisplayName = "AddressTable Total Leaf Bytes Allocated",
+ DisplayUnits = "MiB"
+ };
+ }
+
+ public void RejitQueueAdd(int count)
+ {
+ Interlocked.Add(ref _rejitQueue, count);
+ }
+
+ public void AddressTableAllocated(int bytes, bool leaf)
+ {
+ _funcTabSize += (uint)bytes;
+
+ if (leaf)
+ {
+ _funcTabLeafSize += (uint)bytes;
+ }
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ _rejitQueueCounter.Dispose();
+ _rejitQueueCounter = null;
+
+ _funcTabLeafSizeCounter.Dispose();
+ _funcTabLeafSizeCounter = null;
+
+ _funcTabSizeCounter.Dispose();
+ _funcTabSizeCounter = null;
+
+ base.Dispose(disposing);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/CryptoHelper.cs b/src/ARMeilleure/Instructions/CryptoHelper.cs
new file mode 100644
index 00000000..e517c75d
--- /dev/null
+++ b/src/ARMeilleure/Instructions/CryptoHelper.cs
@@ -0,0 +1,280 @@
+// https://www.intel.com/content/dam/doc/white-paper/advanced-encryption-standard-new-instructions-set-paper.pdf
+
+using ARMeilleure.State;
+using System;
+
+namespace ARMeilleure.Instructions
+{
+ static class CryptoHelper
+ {
+#region "LookUp Tables"
+ private static ReadOnlySpan<byte> _sBox => new byte[]
+ {
+ 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
+ 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
+ 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
+ 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
+ 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
+ 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
+ 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
+ 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
+ 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
+ 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
+ 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
+ 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
+ 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
+ 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
+ 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
+ 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
+ };
+
+ private static ReadOnlySpan<byte> _invSBox => new byte[]
+ {
+ 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
+ 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
+ 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
+ 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
+ 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
+ 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
+ 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
+ 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
+ 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
+ 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
+ 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
+ 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
+ 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
+ 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
+ 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
+ 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
+ };
+
+ private static ReadOnlySpan<byte> _gfMul02 => new byte[]
+ {
+ 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e,
+ 0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e,
+ 0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e,
+ 0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, 0x74, 0x76, 0x78, 0x7a, 0x7c, 0x7e,
+ 0x80, 0x82, 0x84, 0x86, 0x88, 0x8a, 0x8c, 0x8e, 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e,
+ 0xa0, 0xa2, 0xa4, 0xa6, 0xa8, 0xaa, 0xac, 0xae, 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe,
+ 0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6, 0xd8, 0xda, 0xdc, 0xde,
+ 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee, 0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe,
+ 0x1b, 0x19, 0x1f, 0x1d, 0x13, 0x11, 0x17, 0x15, 0x0b, 0x09, 0x0f, 0x0d, 0x03, 0x01, 0x07, 0x05,
+ 0x3b, 0x39, 0x3f, 0x3d, 0x33, 0x31, 0x37, 0x35, 0x2b, 0x29, 0x2f, 0x2d, 0x23, 0x21, 0x27, 0x25,
+ 0x5b, 0x59, 0x5f, 0x5d, 0x53, 0x51, 0x57, 0x55, 0x4b, 0x49, 0x4f, 0x4d, 0x43, 0x41, 0x47, 0x45,
+ 0x7b, 0x79, 0x7f, 0x7d, 0x73, 0x71, 0x77, 0x75, 0x6b, 0x69, 0x6f, 0x6d, 0x63, 0x61, 0x67, 0x65,
+ 0x9b, 0x99, 0x9f, 0x9d, 0x93, 0x91, 0x97, 0x95, 0x8b, 0x89, 0x8f, 0x8d, 0x83, 0x81, 0x87, 0x85,
+ 0xbb, 0xb9, 0xbf, 0xbd, 0xb3, 0xb1, 0xb7, 0xb5, 0xab, 0xa9, 0xaf, 0xad, 0xa3, 0xa1, 0xa7, 0xa5,
+ 0xdb, 0xd9, 0xdf, 0xdd, 0xd3, 0xd1, 0xd7, 0xd5, 0xcb, 0xc9, 0xcf, 0xcd, 0xc3, 0xc1, 0xc7, 0xc5,
+ 0xfb, 0xf9, 0xff, 0xfd, 0xf3, 0xf1, 0xf7, 0xf5, 0xeb, 0xe9, 0xef, 0xed, 0xe3, 0xe1, 0xe7, 0xe5
+ };
+
+ private static ReadOnlySpan<byte> _gfMul03 => new byte[]
+ {
+ 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11,
+ 0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, 0x3a, 0x39, 0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, 0x21,
+ 0x60, 0x63, 0x66, 0x65, 0x6c, 0x6f, 0x6a, 0x69, 0x78, 0x7b, 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71,
+ 0x50, 0x53, 0x56, 0x55, 0x5c, 0x5f, 0x5a, 0x59, 0x48, 0x4b, 0x4e, 0x4d, 0x44, 0x47, 0x42, 0x41,
+ 0xc0, 0xc3, 0xc6, 0xc5, 0xcc, 0xcf, 0xca, 0xc9, 0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1,
+ 0xf0, 0xf3, 0xf6, 0xf5, 0xfc, 0xff, 0xfa, 0xf9, 0xe8, 0xeb, 0xee, 0xed, 0xe4, 0xe7, 0xe2, 0xe1,
+ 0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf, 0xaa, 0xa9, 0xb8, 0xbb, 0xbe, 0xbd, 0xb4, 0xb7, 0xb2, 0xb1,
+ 0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99, 0x88, 0x8b, 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81,
+ 0x9b, 0x98, 0x9d, 0x9e, 0x97, 0x94, 0x91, 0x92, 0x83, 0x80, 0x85, 0x86, 0x8f, 0x8c, 0x89, 0x8a,
+ 0xab, 0xa8, 0xad, 0xae, 0xa7, 0xa4, 0xa1, 0xa2, 0xb3, 0xb0, 0xb5, 0xb6, 0xbf, 0xbc, 0xb9, 0xba,
+ 0xfb, 0xf8, 0xfd, 0xfe, 0xf7, 0xf4, 0xf1, 0xf2, 0xe3, 0xe0, 0xe5, 0xe6, 0xef, 0xec, 0xe9, 0xea,
+ 0xcb, 0xc8, 0xcd, 0xce, 0xc7, 0xc4, 0xc1, 0xc2, 0xd3, 0xd0, 0xd5, 0xd6, 0xdf, 0xdc, 0xd9, 0xda,
+ 0x5b, 0x58, 0x5d, 0x5e, 0x57, 0x54, 0x51, 0x52, 0x43, 0x40, 0x45, 0x46, 0x4f, 0x4c, 0x49, 0x4a,
+ 0x6b, 0x68, 0x6d, 0x6e, 0x67, 0x64, 0x61, 0x62, 0x73, 0x70, 0x75, 0x76, 0x7f, 0x7c, 0x79, 0x7a,
+ 0x3b, 0x38, 0x3d, 0x3e, 0x37, 0x34, 0x31, 0x32, 0x23, 0x20, 0x25, 0x26, 0x2f, 0x2c, 0x29, 0x2a,
+ 0x0b, 0x08, 0x0d, 0x0e, 0x07, 0x04, 0x01, 0x02, 0x13, 0x10, 0x15, 0x16, 0x1f, 0x1c, 0x19, 0x1a
+ };
+
+ private static ReadOnlySpan<byte> _gfMul09 => new byte[]
+ {
+ 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77,
+ 0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf, 0xd8, 0xd1, 0xca, 0xc3, 0xfc, 0xf5, 0xee, 0xe7,
+ 0x3b, 0x32, 0x29, 0x20, 0x1f, 0x16, 0x0d, 0x04, 0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c,
+ 0xab, 0xa2, 0xb9, 0xb0, 0x8f, 0x86, 0x9d, 0x94, 0xe3, 0xea, 0xf1, 0xf8, 0xc7, 0xce, 0xd5, 0xdc,
+ 0x76, 0x7f, 0x64, 0x6d, 0x52, 0x5b, 0x40, 0x49, 0x3e, 0x37, 0x2c, 0x25, 0x1a, 0x13, 0x08, 0x01,
+ 0xe6, 0xef, 0xf4, 0xfd, 0xc2, 0xcb, 0xd0, 0xd9, 0xae, 0xa7, 0xbc, 0xb5, 0x8a, 0x83, 0x98, 0x91,
+ 0x4d, 0x44, 0x5f, 0x56, 0x69, 0x60, 0x7b, 0x72, 0x05, 0x0c, 0x17, 0x1e, 0x21, 0x28, 0x33, 0x3a,
+ 0xdd, 0xd4, 0xcf, 0xc6, 0xf9, 0xf0, 0xeb, 0xe2, 0x95, 0x9c, 0x87, 0x8e, 0xb1, 0xb8, 0xa3, 0xaa,
+ 0xec, 0xe5, 0xfe, 0xf7, 0xc8, 0xc1, 0xda, 0xd3, 0xa4, 0xad, 0xb6, 0xbf, 0x80, 0x89, 0x92, 0x9b,
+ 0x7c, 0x75, 0x6e, 0x67, 0x58, 0x51, 0x4a, 0x43, 0x34, 0x3d, 0x26, 0x2f, 0x10, 0x19, 0x02, 0x0b,
+ 0xd7, 0xde, 0xc5, 0xcc, 0xf3, 0xfa, 0xe1, 0xe8, 0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2, 0xa9, 0xa0,
+ 0x47, 0x4e, 0x55, 0x5c, 0x63, 0x6a, 0x71, 0x78, 0x0f, 0x06, 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30,
+ 0x9a, 0x93, 0x88, 0x81, 0xbe, 0xb7, 0xac, 0xa5, 0xd2, 0xdb, 0xc0, 0xc9, 0xf6, 0xff, 0xe4, 0xed,
+ 0x0a, 0x03, 0x18, 0x11, 0x2e, 0x27, 0x3c, 0x35, 0x42, 0x4b, 0x50, 0x59, 0x66, 0x6f, 0x74, 0x7d,
+ 0xa1, 0xa8, 0xb3, 0xba, 0x85, 0x8c, 0x97, 0x9e, 0xe9, 0xe0, 0xfb, 0xf2, 0xcd, 0xc4, 0xdf, 0xd6,
+ 0x31, 0x38, 0x23, 0x2a, 0x15, 0x1c, 0x07, 0x0e, 0x79, 0x70, 0x6b, 0x62, 0x5d, 0x54, 0x4f, 0x46
+ };
+
+ private static ReadOnlySpan<byte> _gfMul0B => new byte[]
+ {
+ 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69,
+ 0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81, 0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, 0xd9,
+ 0x7b, 0x70, 0x6d, 0x66, 0x57, 0x5c, 0x41, 0x4a, 0x23, 0x28, 0x35, 0x3e, 0x0f, 0x04, 0x19, 0x12,
+ 0xcb, 0xc0, 0xdd, 0xd6, 0xe7, 0xec, 0xf1, 0xfa, 0x93, 0x98, 0x85, 0x8e, 0xbf, 0xb4, 0xa9, 0xa2,
+ 0xf6, 0xfd, 0xe0, 0xeb, 0xda, 0xd1, 0xcc, 0xc7, 0xae, 0xa5, 0xb8, 0xb3, 0x82, 0x89, 0x94, 0x9f,
+ 0x46, 0x4d, 0x50, 0x5b, 0x6a, 0x61, 0x7c, 0x77, 0x1e, 0x15, 0x08, 0x03, 0x32, 0x39, 0x24, 0x2f,
+ 0x8d, 0x86, 0x9b, 0x90, 0xa1, 0xaa, 0xb7, 0xbc, 0xd5, 0xde, 0xc3, 0xc8, 0xf9, 0xf2, 0xef, 0xe4,
+ 0x3d, 0x36, 0x2b, 0x20, 0x11, 0x1a, 0x07, 0x0c, 0x65, 0x6e, 0x73, 0x78, 0x49, 0x42, 0x5f, 0x54,
+ 0xf7, 0xfc, 0xe1, 0xea, 0xdb, 0xd0, 0xcd, 0xc6, 0xaf, 0xa4, 0xb9, 0xb2, 0x83, 0x88, 0x95, 0x9e,
+ 0x47, 0x4c, 0x51, 0x5a, 0x6b, 0x60, 0x7d, 0x76, 0x1f, 0x14, 0x09, 0x02, 0x33, 0x38, 0x25, 0x2e,
+ 0x8c, 0x87, 0x9a, 0x91, 0xa0, 0xab, 0xb6, 0xbd, 0xd4, 0xdf, 0xc2, 0xc9, 0xf8, 0xf3, 0xee, 0xe5,
+ 0x3c, 0x37, 0x2a, 0x21, 0x10, 0x1b, 0x06, 0x0d, 0x64, 0x6f, 0x72, 0x79, 0x48, 0x43, 0x5e, 0x55,
+ 0x01, 0x0a, 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30, 0x59, 0x52, 0x4f, 0x44, 0x75, 0x7e, 0x63, 0x68,
+ 0xb1, 0xba, 0xa7, 0xac, 0x9d, 0x96, 0x8b, 0x80, 0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce, 0xd3, 0xd8,
+ 0x7a, 0x71, 0x6c, 0x67, 0x56, 0x5d, 0x40, 0x4b, 0x22, 0x29, 0x34, 0x3f, 0x0e, 0x05, 0x18, 0x13,
+ 0xca, 0xc1, 0xdc, 0xd7, 0xe6, 0xed, 0xf0, 0xfb, 0x92, 0x99, 0x84, 0x8f, 0xbe, 0xb5, 0xa8, 0xa3
+ };
+
+ private static ReadOnlySpan<byte> _gfMul0D => new byte[]
+ {
+ 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b,
+ 0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, 0xfe, 0xf3, 0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, 0x9b,
+ 0xbb, 0xb6, 0xa1, 0xac, 0x8f, 0x82, 0x95, 0x98, 0xd3, 0xde, 0xc9, 0xc4, 0xe7, 0xea, 0xfd, 0xf0,
+ 0x6b, 0x66, 0x71, 0x7c, 0x5f, 0x52, 0x45, 0x48, 0x03, 0x0e, 0x19, 0x14, 0x37, 0x3a, 0x2d, 0x20,
+ 0x6d, 0x60, 0x77, 0x7a, 0x59, 0x54, 0x43, 0x4e, 0x05, 0x08, 0x1f, 0x12, 0x31, 0x3c, 0x2b, 0x26,
+ 0xbd, 0xb0, 0xa7, 0xaa, 0x89, 0x84, 0x93, 0x9e, 0xd5, 0xd8, 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6,
+ 0xd6, 0xdb, 0xcc, 0xc1, 0xe2, 0xef, 0xf8, 0xf5, 0xbe, 0xb3, 0xa4, 0xa9, 0x8a, 0x87, 0x90, 0x9d,
+ 0x06, 0x0b, 0x1c, 0x11, 0x32, 0x3f, 0x28, 0x25, 0x6e, 0x63, 0x74, 0x79, 0x5a, 0x57, 0x40, 0x4d,
+ 0xda, 0xd7, 0xc0, 0xcd, 0xee, 0xe3, 0xf4, 0xf9, 0xb2, 0xbf, 0xa8, 0xa5, 0x86, 0x8b, 0x9c, 0x91,
+ 0x0a, 0x07, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29, 0x62, 0x6f, 0x78, 0x75, 0x56, 0x5b, 0x4c, 0x41,
+ 0x61, 0x6c, 0x7b, 0x76, 0x55, 0x58, 0x4f, 0x42, 0x09, 0x04, 0x13, 0x1e, 0x3d, 0x30, 0x27, 0x2a,
+ 0xb1, 0xbc, 0xab, 0xa6, 0x85, 0x88, 0x9f, 0x92, 0xd9, 0xd4, 0xc3, 0xce, 0xed, 0xe0, 0xf7, 0xfa,
+ 0xb7, 0xba, 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94, 0xdf, 0xd2, 0xc5, 0xc8, 0xeb, 0xe6, 0xf1, 0xfc,
+ 0x67, 0x6a, 0x7d, 0x70, 0x53, 0x5e, 0x49, 0x44, 0x0f, 0x02, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c,
+ 0x0c, 0x01, 0x16, 0x1b, 0x38, 0x35, 0x22, 0x2f, 0x64, 0x69, 0x7e, 0x73, 0x50, 0x5d, 0x4a, 0x47,
+ 0xdc, 0xd1, 0xc6, 0xcb, 0xe8, 0xe5, 0xf2, 0xff, 0xb4, 0xb9, 0xae, 0xa3, 0x80, 0x8d, 0x9a, 0x97
+ };
+
+ private static ReadOnlySpan<byte> _gfMul0E => new byte[]
+ {
+ 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a,
+ 0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca, 0x90, 0x9e, 0x8c, 0x82, 0xa8, 0xa6, 0xb4, 0xba,
+ 0xdb, 0xd5, 0xc7, 0xc9, 0xe3, 0xed, 0xff, 0xf1, 0xab, 0xa5, 0xb7, 0xb9, 0x93, 0x9d, 0x8f, 0x81,
+ 0x3b, 0x35, 0x27, 0x29, 0x03, 0x0d, 0x1f, 0x11, 0x4b, 0x45, 0x57, 0x59, 0x73, 0x7d, 0x6f, 0x61,
+ 0xad, 0xa3, 0xb1, 0xbf, 0x95, 0x9b, 0x89, 0x87, 0xdd, 0xd3, 0xc1, 0xcf, 0xe5, 0xeb, 0xf9, 0xf7,
+ 0x4d, 0x43, 0x51, 0x5f, 0x75, 0x7b, 0x69, 0x67, 0x3d, 0x33, 0x21, 0x2f, 0x05, 0x0b, 0x19, 0x17,
+ 0x76, 0x78, 0x6a, 0x64, 0x4e, 0x40, 0x52, 0x5c, 0x06, 0x08, 0x1a, 0x14, 0x3e, 0x30, 0x22, 0x2c,
+ 0x96, 0x98, 0x8a, 0x84, 0xae, 0xa0, 0xb2, 0xbc, 0xe6, 0xe8, 0xfa, 0xf4, 0xde, 0xd0, 0xc2, 0xcc,
+ 0x41, 0x4f, 0x5d, 0x53, 0x79, 0x77, 0x65, 0x6b, 0x31, 0x3f, 0x2d, 0x23, 0x09, 0x07, 0x15, 0x1b,
+ 0xa1, 0xaf, 0xbd, 0xb3, 0x99, 0x97, 0x85, 0x8b, 0xd1, 0xdf, 0xcd, 0xc3, 0xe9, 0xe7, 0xf5, 0xfb,
+ 0x9a, 0x94, 0x86, 0x88, 0xa2, 0xac, 0xbe, 0xb0, 0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc, 0xce, 0xc0,
+ 0x7a, 0x74, 0x66, 0x68, 0x42, 0x4c, 0x5e, 0x50, 0x0a, 0x04, 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20,
+ 0xec, 0xe2, 0xf0, 0xfe, 0xd4, 0xda, 0xc8, 0xc6, 0x9c, 0x92, 0x80, 0x8e, 0xa4, 0xaa, 0xb8, 0xb6,
+ 0x0c, 0x02, 0x10, 0x1e, 0x34, 0x3a, 0x28, 0x26, 0x7c, 0x72, 0x60, 0x6e, 0x44, 0x4a, 0x58, 0x56,
+ 0x37, 0x39, 0x2b, 0x25, 0x0f, 0x01, 0x13, 0x1d, 0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71, 0x63, 0x6d,
+ 0xd7, 0xd9, 0xcb, 0xc5, 0xef, 0xe1, 0xf3, 0xfd, 0xa7, 0xa9, 0xbb, 0xb5, 0x9f, 0x91, 0x83, 0x8d
+ };
+
+ private static ReadOnlySpan<byte> _srPerm => new byte[]
+ {
+ 0, 13, 10, 7, 4, 1, 14, 11, 8, 5, 2, 15, 12, 9, 6, 3
+ };
+
+ private static ReadOnlySpan<byte> _isrPerm => new byte[]
+ {
+ 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, 1, 6, 11
+ };
+#endregion
+
+ public static V128 AesInvMixColumns(V128 op)
+ {
+ byte[] inState = op.ToArray();
+ byte[] outState = new byte[16];
+
+ for (int columns = 0; columns <= 3; columns++)
+ {
+ int idx = columns << 2;
+
+ byte row0 = inState[idx + 0]; // A, E, I, M: [row0, col0-col3]
+ byte row1 = inState[idx + 1]; // B, F, J, N: [row1, col0-col3]
+ byte row2 = inState[idx + 2]; // C, G, K, O: [row2, col0-col3]
+ byte row3 = inState[idx + 3]; // D, H, L, P: [row3, col0-col3]
+
+ outState[idx + 0] = (byte)((uint)_gfMul0E[row0] ^ _gfMul0B[row1] ^ _gfMul0D[row2] ^ _gfMul09[row3]);
+ outState[idx + 1] = (byte)((uint)_gfMul09[row0] ^ _gfMul0E[row1] ^ _gfMul0B[row2] ^ _gfMul0D[row3]);
+ outState[idx + 2] = (byte)((uint)_gfMul0D[row0] ^ _gfMul09[row1] ^ _gfMul0E[row2] ^ _gfMul0B[row3]);
+ outState[idx + 3] = (byte)((uint)_gfMul0B[row0] ^ _gfMul0D[row1] ^ _gfMul09[row2] ^ _gfMul0E[row3]);
+ }
+
+ return new V128(outState);
+ }
+
+ public static V128 AesInvShiftRows(V128 op)
+ {
+ byte[] inState = op.ToArray();
+ byte[] outState = new byte[16];
+
+ for (int idx = 0; idx <= 15; idx++)
+ {
+ outState[_isrPerm[idx]] = inState[idx];
+ }
+
+ return new V128(outState);
+ }
+
+ public static V128 AesInvSubBytes(V128 op)
+ {
+ byte[] inState = op.ToArray();
+ byte[] outState = new byte[16];
+
+ for (int idx = 0; idx <= 15; idx++)
+ {
+ outState[idx] = _invSBox[inState[idx]];
+ }
+
+ return new V128(outState);
+ }
+
+ public static V128 AesMixColumns(V128 op)
+ {
+ byte[] inState = op.ToArray();
+ byte[] outState = new byte[16];
+
+ for (int columns = 0; columns <= 3; columns++)
+ {
+ int idx = columns << 2;
+
+ byte row0 = inState[idx + 0]; // A, E, I, M: [row0, col0-col3]
+ byte row1 = inState[idx + 1]; // B, F, J, N: [row1, col0-col3]
+ byte row2 = inState[idx + 2]; // C, G, K, O: [row2, col0-col3]
+ byte row3 = inState[idx + 3]; // D, H, L, P: [row3, col0-col3]
+
+ outState[idx + 0] = (byte)((uint)_gfMul02[row0] ^ _gfMul03[row1] ^ row2 ^ row3);
+ outState[idx + 1] = (byte)((uint)row0 ^ _gfMul02[row1] ^ _gfMul03[row2] ^ row3);
+ outState[idx + 2] = (byte)((uint)row0 ^ row1 ^ _gfMul02[row2] ^ _gfMul03[row3]);
+ outState[idx + 3] = (byte)((uint)_gfMul03[row0] ^ row1 ^ row2 ^ _gfMul02[row3]);
+ }
+
+ return new V128(outState);
+ }
+
+ public static V128 AesShiftRows(V128 op)
+ {
+ byte[] inState = op.ToArray();
+ byte[] outState = new byte[16];
+
+ for (int idx = 0; idx <= 15; idx++)
+ {
+ outState[_srPerm[idx]] = inState[idx];
+ }
+
+ return new V128(outState);
+ }
+
+ public static V128 AesSubBytes(V128 op)
+ {
+ byte[] inState = op.ToArray();
+ byte[] outState = new byte[16];
+
+ for (int idx = 0; idx <= 15; idx++)
+ {
+ outState[idx] = _sBox[inState[idx]];
+ }
+
+ return new V128(outState);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitAlu.cs b/src/ARMeilleure/Instructions/InstEmitAlu.cs
new file mode 100644
index 00000000..e0d10e77
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitAlu.cs
@@ -0,0 +1,400 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System.Diagnostics;
+
+using static ARMeilleure.Instructions.InstEmitAluHelper;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Adc(ArmEmitterContext context) => EmitAdc(context, setFlags: false);
+ public static void Adcs(ArmEmitterContext context) => EmitAdc(context, setFlags: true);
+
+ private static void EmitAdc(ArmEmitterContext context, bool setFlags)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand d = context.Add(n, m);
+
+ Operand carry = GetFlag(PState.CFlag);
+
+ if (context.CurrOp.RegisterSize == RegisterSize.Int64)
+ {
+ carry = context.ZeroExtend32(OperandType.I64, carry);
+ }
+
+ d = context.Add(d, carry);
+
+ if (setFlags)
+ {
+ EmitNZFlagsCheck(context, d);
+
+ EmitAdcsCCheck(context, n, d);
+ EmitAddsVCheck(context, n, m, d);
+ }
+
+ SetAluDOrZR(context, d);
+ }
+
+ public static void Add(ArmEmitterContext context)
+ {
+ SetAluD(context, context.Add(GetAluN(context), GetAluM(context)));
+ }
+
+ public static void Adds(ArmEmitterContext context)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ context.MarkComparison(n, m);
+
+ Operand d = context.Add(n, m);
+
+ EmitNZFlagsCheck(context, d);
+
+ EmitAddsCCheck(context, n, d);
+ EmitAddsVCheck(context, n, m, d);
+
+ SetAluDOrZR(context, d);
+ }
+
+ public static void And(ArmEmitterContext context)
+ {
+ SetAluD(context, context.BitwiseAnd(GetAluN(context), GetAluM(context)));
+ }
+
+ public static void Ands(ArmEmitterContext context)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand d = context.BitwiseAnd(n, m);
+
+ EmitNZFlagsCheck(context, d);
+ EmitCVFlagsClear(context);
+
+ SetAluDOrZR(context, d);
+ }
+
+ public static void Asrv(ArmEmitterContext context)
+ {
+ SetAluDOrZR(context, context.ShiftRightSI(GetAluN(context), GetAluMShift(context)));
+ }
+
+ public static void Bic(ArmEmitterContext context) => EmitBic(context, setFlags: false);
+ public static void Bics(ArmEmitterContext context) => EmitBic(context, setFlags: true);
+
+ private static void EmitBic(ArmEmitterContext context, bool setFlags)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand d = context.BitwiseAnd(n, context.BitwiseNot(m));
+
+ if (setFlags)
+ {
+ EmitNZFlagsCheck(context, d);
+ EmitCVFlagsClear(context);
+ }
+
+ SetAluD(context, d, setFlags);
+ }
+
+ public static void Cls(ArmEmitterContext context)
+ {
+ OpCodeAlu op = (OpCodeAlu)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ Operand nHigh = context.ShiftRightUI(n, Const(1));
+
+ bool is32Bits = op.RegisterSize == RegisterSize.Int32;
+
+ Operand mask = is32Bits ? Const(int.MaxValue) : Const(long.MaxValue);
+
+ Operand nLow = context.BitwiseAnd(n, mask);
+
+ Operand res = context.CountLeadingZeros(context.BitwiseExclusiveOr(nHigh, nLow));
+
+ res = context.Subtract(res, Const(res.Type, 1));
+
+ SetAluDOrZR(context, res);
+ }
+
+ public static void Clz(ArmEmitterContext context)
+ {
+ OpCodeAlu op = (OpCodeAlu)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ Operand d = context.CountLeadingZeros(n);
+
+ SetAluDOrZR(context, d);
+ }
+
+ public static void Eon(ArmEmitterContext context)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand d = context.BitwiseExclusiveOr(n, context.BitwiseNot(m));
+
+ SetAluD(context, d);
+ }
+
+ public static void Eor(ArmEmitterContext context)
+ {
+ SetAluD(context, context.BitwiseExclusiveOr(GetAluN(context), GetAluM(context)));
+ }
+
+ public static void Extr(ArmEmitterContext context)
+ {
+ OpCodeAluRs op = (OpCodeAluRs)context.CurrOp;
+
+ Operand res = GetIntOrZR(context, op.Rm);
+
+ if (op.Shift != 0)
+ {
+ if (op.Rn == op.Rm)
+ {
+ res = context.RotateRight(res, Const(op.Shift));
+ }
+ else
+ {
+ res = context.ShiftRightUI(res, Const(op.Shift));
+
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ int invShift = op.GetBitsCount() - op.Shift;
+
+ res = context.BitwiseOr(res, context.ShiftLeft(n, Const(invShift)));
+ }
+ }
+
+ SetAluDOrZR(context, res);
+ }
+
+ public static void Lslv(ArmEmitterContext context)
+ {
+ SetAluDOrZR(context, context.ShiftLeft(GetAluN(context), GetAluMShift(context)));
+ }
+
+ public static void Lsrv(ArmEmitterContext context)
+ {
+ SetAluDOrZR(context, context.ShiftRightUI(GetAluN(context), GetAluMShift(context)));
+ }
+
+ public static void Sbc(ArmEmitterContext context) => EmitSbc(context, setFlags: false);
+ public static void Sbcs(ArmEmitterContext context) => EmitSbc(context, setFlags: true);
+
+ private static void EmitSbc(ArmEmitterContext context, bool setFlags)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand d = context.Subtract(n, m);
+
+ Operand borrow = context.BitwiseExclusiveOr(GetFlag(PState.CFlag), Const(1));
+
+ if (context.CurrOp.RegisterSize == RegisterSize.Int64)
+ {
+ borrow = context.ZeroExtend32(OperandType.I64, borrow);
+ }
+
+ d = context.Subtract(d, borrow);
+
+ if (setFlags)
+ {
+ EmitNZFlagsCheck(context, d);
+
+ EmitSbcsCCheck(context, n, m);
+ EmitSubsVCheck(context, n, m, d);
+ }
+
+ SetAluDOrZR(context, d);
+ }
+
+ public static void Sub(ArmEmitterContext context)
+ {
+ SetAluD(context, context.Subtract(GetAluN(context), GetAluM(context)));
+ }
+
+ public static void Subs(ArmEmitterContext context)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ context.MarkComparison(n, m);
+
+ Operand d = context.Subtract(n, m);
+
+ EmitNZFlagsCheck(context, d);
+
+ EmitSubsCCheck(context, n, m);
+ EmitSubsVCheck(context, n, m, d);
+
+ SetAluDOrZR(context, d);
+ }
+
+ public static void Orn(ArmEmitterContext context)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand d = context.BitwiseOr(n, context.BitwiseNot(m));
+
+ SetAluD(context, d);
+ }
+
+ public static void Orr(ArmEmitterContext context)
+ {
+ SetAluD(context, context.BitwiseOr(GetAluN(context), GetAluM(context)));
+ }
+
+ public static void Rbit(ArmEmitterContext context)
+ {
+ OpCodeAlu op = (OpCodeAlu)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+ Operand d;
+
+ if (op.RegisterSize == RegisterSize.Int32)
+ {
+ d = EmitReverseBits32Op(context, n);
+ }
+ else
+ {
+ d = EmitReverseBits64Op(context, n);
+ }
+
+ SetAluDOrZR(context, d);
+ }
+
+ private static Operand EmitReverseBits64Op(ArmEmitterContext context, Operand op)
+ {
+ Debug.Assert(op.Type == OperandType.I64);
+
+ Operand val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op, Const(0xaaaaaaaaaaaaaaaaul)), Const(1)),
+ context.ShiftLeft (context.BitwiseAnd(op, Const(0x5555555555555555ul)), Const(1)));
+
+ val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xccccccccccccccccul)), Const(2)),
+ context.ShiftLeft (context.BitwiseAnd(val, Const(0x3333333333333333ul)), Const(2)));
+ val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xf0f0f0f0f0f0f0f0ul)), Const(4)),
+ context.ShiftLeft (context.BitwiseAnd(val, Const(0x0f0f0f0f0f0f0f0ful)), Const(4)));
+ val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xff00ff00ff00ff00ul)), Const(8)),
+ context.ShiftLeft (context.BitwiseAnd(val, Const(0x00ff00ff00ff00fful)), Const(8)));
+ val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xffff0000ffff0000ul)), Const(16)),
+ context.ShiftLeft (context.BitwiseAnd(val, Const(0x0000ffff0000fffful)), Const(16)));
+
+ return context.BitwiseOr(context.ShiftRightUI(val, Const(32)), context.ShiftLeft(val, Const(32)));
+ }
+
+ public static void Rev16(ArmEmitterContext context)
+ {
+ OpCodeAlu op = (OpCodeAlu)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+ Operand d;
+
+ if (op.RegisterSize == RegisterSize.Int32)
+ {
+ d = EmitReverseBytes16_32Op(context, n);
+ }
+ else
+ {
+ d = EmitReverseBytes16_64Op(context, n);
+ }
+
+ SetAluDOrZR(context, d);
+ }
+
+ public static void Rev32(ArmEmitterContext context)
+ {
+ OpCodeAlu op = (OpCodeAlu)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+ Operand d;
+
+ if (op.RegisterSize == RegisterSize.Int32)
+ {
+ d = context.ByteSwap(n);
+ }
+ else
+ {
+ d = EmitReverseBytes32_64Op(context, n);
+ }
+
+ SetAluDOrZR(context, d);
+ }
+
+ private static Operand EmitReverseBytes32_64Op(ArmEmitterContext context, Operand op)
+ {
+ Debug.Assert(op.Type == OperandType.I64);
+
+ Operand val = EmitReverseBytes16_64Op(context, op);
+
+ return context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xffff0000ffff0000ul)), Const(16)),
+ context.ShiftLeft (context.BitwiseAnd(val, Const(0x0000ffff0000fffful)), Const(16)));
+ }
+
+ public static void Rev64(ArmEmitterContext context)
+ {
+ OpCodeAlu op = (OpCodeAlu)context.CurrOp;
+
+ SetAluDOrZR(context, context.ByteSwap(GetIntOrZR(context, op.Rn)));
+ }
+
+ public static void Rorv(ArmEmitterContext context)
+ {
+ SetAluDOrZR(context, context.RotateRight(GetAluN(context), GetAluMShift(context)));
+ }
+
+ private static Operand GetAluMShift(ArmEmitterContext context)
+ {
+ IOpCodeAluRs op = (IOpCodeAluRs)context.CurrOp;
+
+ Operand m = GetIntOrZR(context, op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Int64)
+ {
+ m = context.ConvertI64ToI32(m);
+ }
+
+ return context.BitwiseAnd(m, Const(context.CurrOp.GetBitsCount() - 1));
+ }
+
+ private static void EmitCVFlagsClear(ArmEmitterContext context)
+ {
+ SetFlag(context, PState.CFlag, Const(0));
+ SetFlag(context, PState.VFlag, Const(0));
+ }
+
+ public static void SetAluD(ArmEmitterContext context, Operand d)
+ {
+ SetAluD(context, d, x31IsZR: false);
+ }
+
+ public static void SetAluDOrZR(ArmEmitterContext context, Operand d)
+ {
+ SetAluD(context, d, x31IsZR: true);
+ }
+
+ public static void SetAluD(ArmEmitterContext context, Operand d, bool x31IsZR)
+ {
+ IOpCodeAlu op = (IOpCodeAlu)context.CurrOp;
+
+ if ((x31IsZR || op is IOpCodeAluRs) && op.Rd == RegisterConsts.ZeroIndex)
+ {
+ return;
+ }
+
+ SetIntOrSP(context, op.Rd, d);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitAlu32.cs b/src/ARMeilleure/Instructions/InstEmitAlu32.cs
new file mode 100644
index 00000000..584ada7e
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitAlu32.cs
@@ -0,0 +1,931 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitAluHelper;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ public static void Add(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context, setCarry: false);
+
+ Operand res = context.Add(n, m);
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+
+ EmitAddsCCheck(context, n, res);
+ EmitAddsVCheck(context, n, m, res);
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Adc(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context, setCarry: false);
+
+ Operand res = context.Add(n, m);
+
+ Operand carry = GetFlag(PState.CFlag);
+
+ res = context.Add(res, carry);
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+
+ EmitAdcsCCheck(context, n, res);
+ EmitAddsVCheck(context, n, m, res);
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void And(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand res = context.BitwiseAnd(n, m);
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Bfc(ArmEmitterContext context)
+ {
+ IOpCode32AluBf op = (IOpCode32AluBf)context.CurrOp;
+
+ Operand d = GetIntA32(context, op.Rd);
+ Operand res = context.BitwiseAnd(d, Const(~op.DestMask));
+
+ SetIntA32(context, op.Rd, res);
+ }
+
+ public static void Bfi(ArmEmitterContext context)
+ {
+ IOpCode32AluBf op = (IOpCode32AluBf)context.CurrOp;
+
+ Operand n = GetIntA32(context, op.Rn);
+ Operand d = GetIntA32(context, op.Rd);
+ Operand part = context.BitwiseAnd(n, Const(op.SourceMask));
+
+ if (op.Lsb != 0)
+ {
+ part = context.ShiftLeft(part, Const(op.Lsb));
+ }
+
+ Operand res = context.BitwiseAnd(d, Const(~op.DestMask));
+ res = context.BitwiseOr(res, context.BitwiseAnd(part, Const(op.DestMask)));
+
+ SetIntA32(context, op.Rd, res);
+ }
+
+ public static void Bic(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand res = context.BitwiseAnd(n, context.BitwiseNot(m));
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Clz(ArmEmitterContext context)
+ {
+ Operand m = GetAluM(context, setCarry: false);
+
+ Operand res = context.CountLeadingZeros(m);
+ EmitAluStore(context, res);
+ }
+
+ public static void Cmp(ArmEmitterContext context)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context, setCarry: false);
+
+ Operand res = context.Subtract(n, m);
+
+ EmitNZFlagsCheck(context, res);
+
+ EmitSubsCCheck(context, n, res);
+ EmitSubsVCheck(context, n, m, res);
+ }
+
+ public static void Cmn(ArmEmitterContext context)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context, setCarry: false);
+
+ Operand res = context.Add(n, m);
+
+ EmitNZFlagsCheck(context, res);
+
+ EmitAddsCCheck(context, n, res);
+ EmitAddsVCheck(context, n, m, res);
+ }
+
+ public static void Eor(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand res = context.BitwiseExclusiveOr(n, m);
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Mov(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand m = GetAluM(context);
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, m);
+ }
+
+ EmitAluStore(context, m);
+ }
+
+ public static void Movt(ArmEmitterContext context)
+ {
+ IOpCode32AluImm16 op = (IOpCode32AluImm16)context.CurrOp;
+
+ Operand d = GetIntA32(context, op.Rd);
+ Operand imm = Const(op.Immediate << 16); // Immeditate value as top halfword.
+ Operand res = context.BitwiseAnd(d, Const(0x0000ffff));
+ res = context.BitwiseOr(res, imm);
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Mul(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand res = context.Multiply(n, m);
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Mvn(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+ Operand m = GetAluM(context);
+
+ Operand res = context.BitwiseNot(m);
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Orr(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand res = context.BitwiseOr(n, m);
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Orn(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand res = context.BitwiseOr(n, context.BitwiseNot(m));
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Pkh(ArmEmitterContext context)
+ {
+ OpCode32AluRsImm op = (OpCode32AluRsImm)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand res;
+
+ bool tbform = op.ShiftType == ShiftType.Asr;
+ if (tbform)
+ {
+ res = context.BitwiseOr(context.BitwiseAnd(n, Const(0xFFFF0000)), context.BitwiseAnd(m, Const(0xFFFF)));
+ }
+ else
+ {
+ res = context.BitwiseOr(context.BitwiseAnd(m, Const(0xFFFF0000)), context.BitwiseAnd(n, Const(0xFFFF)));
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Rbit(ArmEmitterContext context)
+ {
+ Operand m = GetAluM(context);
+
+ Operand res = EmitReverseBits32Op(context, m);
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Rev(ArmEmitterContext context)
+ {
+ Operand m = GetAluM(context);
+
+ Operand res = context.ByteSwap(m);
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Rev16(ArmEmitterContext context)
+ {
+ Operand m = GetAluM(context);
+
+ Operand res = EmitReverseBytes16_32Op(context, m);
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Revsh(ArmEmitterContext context)
+ {
+ Operand m = GetAluM(context);
+
+ Operand res = EmitReverseBytes16_32Op(context, m);
+
+ EmitAluStore(context, context.SignExtend16(OperandType.I32, res));
+ }
+
+ public static void Rsc(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context, setCarry: false);
+
+ Operand res = context.Subtract(m, n);
+
+ Operand borrow = context.BitwiseExclusiveOr(GetFlag(PState.CFlag), Const(1));
+
+ res = context.Subtract(res, borrow);
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+
+ EmitSbcsCCheck(context, m, n);
+ EmitSubsVCheck(context, m, n, res);
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Rsb(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context, setCarry: false);
+
+ Operand res = context.Subtract(m, n);
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+
+ EmitSubsCCheck(context, m, res);
+ EmitSubsVCheck(context, m, n, res);
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Sadd8(ArmEmitterContext context)
+ {
+ EmitAddSub8(context, add: true, unsigned: false);
+ }
+
+ public static void Sbc(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context, setCarry: false);
+
+ Operand res = context.Subtract(n, m);
+
+ Operand borrow = context.BitwiseExclusiveOr(GetFlag(PState.CFlag), Const(1));
+
+ res = context.Subtract(res, borrow);
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+
+ EmitSbcsCCheck(context, n, m);
+ EmitSubsVCheck(context, n, m, res);
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Sbfx(ArmEmitterContext context)
+ {
+ IOpCode32AluBf op = (IOpCode32AluBf)context.CurrOp;
+
+ var msb = op.Lsb + op.Msb; // For this instruction, the msb is actually a width.
+
+ Operand n = GetIntA32(context, op.Rn);
+ Operand res = context.ShiftRightSI(context.ShiftLeft(n, Const(31 - msb)), Const(31 - op.Msb));
+
+ SetIntA32(context, op.Rd, res);
+ }
+
+ public static void Sdiv(ArmEmitterContext context)
+ {
+ EmitDiv(context, unsigned: false);
+ }
+
+ public static void Sel(ArmEmitterContext context)
+ {
+ IOpCode32AluReg op = (IOpCode32AluReg)context.CurrOp;
+
+ Operand n = GetIntA32(context, op.Rn);
+ Operand m = GetIntA32(context, op.Rm);
+
+ Operand ge0 = context.ZeroExtend8(OperandType.I32, context.Negate(GetFlag(PState.GE0Flag)));
+ Operand ge1 = context.ZeroExtend8(OperandType.I32, context.Negate(GetFlag(PState.GE1Flag)));
+ Operand ge2 = context.ZeroExtend8(OperandType.I32, context.Negate(GetFlag(PState.GE2Flag)));
+ Operand ge3 = context.Negate(GetFlag(PState.GE3Flag));
+
+ Operand mask = context.BitwiseOr(ge0, context.ShiftLeft(ge1, Const(8)));
+ mask = context.BitwiseOr(mask, context.ShiftLeft(ge2, Const(16)));
+ mask = context.BitwiseOr(mask, context.ShiftLeft(ge3, Const(24)));
+
+ Operand res = context.BitwiseOr(context.BitwiseAnd(n, mask), context.BitwiseAnd(m, context.BitwiseNot(mask)));
+
+ SetIntA32(context, op.Rd, res);
+ }
+
+ public static void Shadd8(ArmEmitterContext context)
+ {
+ EmitHadd8(context, unsigned: false);
+ }
+
+ public static void Shsub8(ArmEmitterContext context)
+ {
+ EmitHsub8(context, unsigned: false);
+ }
+
+ public static void Ssat(ArmEmitterContext context)
+ {
+ OpCode32Sat op = (OpCode32Sat)context.CurrOp;
+
+ EmitSat(context, -(1 << op.SatImm), (1 << op.SatImm) - 1);
+ }
+
+ public static void Ssat16(ArmEmitterContext context)
+ {
+ OpCode32Sat16 op = (OpCode32Sat16)context.CurrOp;
+
+ EmitSat16(context, -(1 << op.SatImm), (1 << op.SatImm) - 1);
+ }
+
+ public static void Ssub8(ArmEmitterContext context)
+ {
+ EmitAddSub8(context, add: false, unsigned: false);
+ }
+
+ public static void Sub(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context, setCarry: false);
+
+ Operand res = context.Subtract(n, m);
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+
+ EmitSubsCCheck(context, n, res);
+ EmitSubsVCheck(context, n, m, res);
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Sxtb(ArmEmitterContext context)
+ {
+ EmitSignExtend(context, true, 8);
+ }
+
+ public static void Sxtb16(ArmEmitterContext context)
+ {
+ EmitExtend16(context, true);
+ }
+
+ public static void Sxth(ArmEmitterContext context)
+ {
+ EmitSignExtend(context, true, 16);
+ }
+
+ public static void Teq(ArmEmitterContext context)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand res = context.BitwiseExclusiveOr(n, m);
+
+ EmitNZFlagsCheck(context, res);
+ }
+
+ public static void Tst(ArmEmitterContext context)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand res = context.BitwiseAnd(n, m);
+ EmitNZFlagsCheck(context, res);
+ }
+
+ public static void Uadd8(ArmEmitterContext context)
+ {
+ EmitAddSub8(context, add: true, unsigned: true);
+ }
+
+ public static void Ubfx(ArmEmitterContext context)
+ {
+ IOpCode32AluBf op = (IOpCode32AluBf)context.CurrOp;
+
+ var msb = op.Lsb + op.Msb; // For this instruction, the msb is actually a width.
+
+ Operand n = GetIntA32(context, op.Rn);
+ Operand res = context.ShiftRightUI(context.ShiftLeft(n, Const(31 - msb)), Const(31 - op.Msb));
+
+ SetIntA32(context, op.Rd, res);
+ }
+
+ public static void Udiv(ArmEmitterContext context)
+ {
+ EmitDiv(context, unsigned: true);
+ }
+
+ public static void Uhadd8(ArmEmitterContext context)
+ {
+ EmitHadd8(context, unsigned: true);
+ }
+
+ public static void Uhsub8(ArmEmitterContext context)
+ {
+ EmitHsub8(context, unsigned: true);
+ }
+
+ public static void Usat(ArmEmitterContext context)
+ {
+ OpCode32Sat op = (OpCode32Sat)context.CurrOp;
+
+ EmitSat(context, 0, op.SatImm == 32 ? (int)(~0) : (1 << op.SatImm) - 1);
+ }
+
+ public static void Usat16(ArmEmitterContext context)
+ {
+ OpCode32Sat16 op = (OpCode32Sat16)context.CurrOp;
+
+ EmitSat16(context, 0, (1 << op.SatImm) - 1);
+ }
+
+ public static void Usub8(ArmEmitterContext context)
+ {
+ EmitAddSub8(context, add: false, unsigned: true);
+ }
+
+ public static void Uxtb(ArmEmitterContext context)
+ {
+ EmitSignExtend(context, false, 8);
+ }
+
+ public static void Uxtb16(ArmEmitterContext context)
+ {
+ EmitExtend16(context, false);
+ }
+
+ public static void Uxth(ArmEmitterContext context)
+ {
+ EmitSignExtend(context, false, 16);
+ }
+
+ private static void EmitSignExtend(ArmEmitterContext context, bool signed, int bits)
+ {
+ IOpCode32AluUx op = (IOpCode32AluUx)context.CurrOp;
+
+ Operand m = GetAluM(context);
+ Operand res;
+
+ if (op.RotateBits == 0)
+ {
+ res = m;
+ }
+ else
+ {
+ Operand rotate = Const(op.RotateBits);
+ res = context.RotateRight(m, rotate);
+ }
+
+ switch (bits)
+ {
+ case 8:
+ res = (signed) ? context.SignExtend8(OperandType.I32, res) : context.ZeroExtend8(OperandType.I32, res);
+ break;
+ case 16:
+ res = (signed) ? context.SignExtend16(OperandType.I32, res) : context.ZeroExtend16(OperandType.I32, res);
+ break;
+ }
+
+ if (op.Add)
+ {
+ res = context.Add(res, GetAluN(context));
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ private static void EmitExtend16(ArmEmitterContext context, bool signed)
+ {
+ IOpCode32AluUx op = (IOpCode32AluUx)context.CurrOp;
+
+ Operand m = GetAluM(context);
+ Operand res;
+
+ if (op.RotateBits == 0)
+ {
+ res = m;
+ }
+ else
+ {
+ Operand rotate = Const(op.RotateBits);
+ res = context.RotateRight(m, rotate);
+ }
+
+ Operand low16, high16;
+ if (signed)
+ {
+ low16 = context.SignExtend8(OperandType.I32, res);
+ high16 = context.SignExtend8(OperandType.I32, context.ShiftRightUI(res, Const(16)));
+ }
+ else
+ {
+ low16 = context.ZeroExtend8(OperandType.I32, res);
+ high16 = context.ZeroExtend8(OperandType.I32, context.ShiftRightUI(res, Const(16)));
+ }
+
+ if (op.Add)
+ {
+ Operand n = GetAluN(context);
+ Operand lowAdd, highAdd;
+ if (signed)
+ {
+ lowAdd = context.SignExtend16(OperandType.I32, n);
+ highAdd = context.SignExtend16(OperandType.I32, context.ShiftRightUI(n, Const(16)));
+ }
+ else
+ {
+ lowAdd = context.ZeroExtend16(OperandType.I32, n);
+ highAdd = context.ZeroExtend16(OperandType.I32, context.ShiftRightUI(n, Const(16)));
+ }
+
+ low16 = context.Add(low16, lowAdd);
+ high16 = context.Add(high16, highAdd);
+ }
+
+ res = context.BitwiseOr(
+ context.ZeroExtend16(OperandType.I32, low16),
+ context.ShiftLeft(context.ZeroExtend16(OperandType.I32, high16), Const(16)));
+
+ EmitAluStore(context, res);
+ }
+
+ private static void EmitDiv(ArmEmitterContext context, bool unsigned)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+ Operand zero = Const(m.Type, 0);
+
+ Operand divisorIsZero = context.ICompareEqual(m, zero);
+
+ Operand lblBadDiv = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfTrue(lblBadDiv, divisorIsZero);
+
+ if (!unsigned)
+ {
+ // ARM64 behaviour: If Rn == INT_MIN && Rm == -1, Rd = INT_MIN (overflow).
+ // TODO: tests to ensure A32 works the same
+
+ Operand intMin = Const(int.MinValue);
+ Operand minus1 = Const(-1);
+
+ Operand nIsIntMin = context.ICompareEqual(n, intMin);
+ Operand mIsMinus1 = context.ICompareEqual(m, minus1);
+
+ Operand lblGoodDiv = Label();
+
+ context.BranchIfFalse(lblGoodDiv, context.BitwiseAnd(nIsIntMin, mIsMinus1));
+
+ EmitAluStore(context, intMin);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblGoodDiv);
+ }
+
+ Operand res = unsigned
+ ? context.DivideUI(n, m)
+ : context.Divide(n, m);
+
+ EmitAluStore(context, res);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblBadDiv);
+
+ EmitAluStore(context, zero);
+
+ context.MarkLabel(lblEnd);
+ }
+
+ private static void EmitAddSub8(ArmEmitterContext context, bool add, bool unsigned)
+ {
+ IOpCode32AluReg op = (IOpCode32AluReg)context.CurrOp;
+
+ Operand n = GetIntA32(context, op.Rn);
+ Operand m = GetIntA32(context, op.Rm);
+
+ Operand res = Const(0);
+
+ for (int byteSel = 0; byteSel < 4; byteSel++)
+ {
+ Operand shift = Const(byteSel * 8);
+
+ Operand nByte = context.ShiftRightUI(n, shift);
+ Operand mByte = context.ShiftRightUI(m, shift);
+
+ nByte = unsigned ? context.ZeroExtend8(OperandType.I32, nByte) : context.SignExtend8(OperandType.I32, nByte);
+ mByte = unsigned ? context.ZeroExtend8(OperandType.I32, mByte) : context.SignExtend8(OperandType.I32, mByte);
+
+ Operand resByte = add ? context.Add(nByte, mByte) : context.Subtract(nByte, mByte);
+
+ res = context.BitwiseOr(res, context.ShiftLeft(context.ZeroExtend8(OperandType.I32, resByte), shift));
+
+ SetFlag(context, PState.GE0Flag + byteSel, unsigned && add
+ ? context.ShiftRightUI(resByte, Const(8))
+ : context.ShiftRightUI(context.BitwiseNot(resByte), Const(31)));
+ }
+
+ SetIntA32(context, op.Rd, res);
+ }
+
+ private static void EmitHadd8(ArmEmitterContext context, bool unsigned)
+ {
+ IOpCode32AluReg op = (IOpCode32AluReg)context.CurrOp;
+
+ Operand m = GetIntA32(context, op.Rm);
+ Operand n = GetIntA32(context, op.Rn);
+
+ Operand xor, res, carry;
+
+ // This relies on the equality x+y == ((x&y) << 1) + (x^y).
+ // Note that x^y always contains the LSB of the result.
+ // Since we want to calculate (x+y)/2, we can instead calculate (x&y) + ((x^y)>>1).
+ // We mask by 0x7F to remove the LSB so that it doesn't leak into the field below.
+
+ res = context.BitwiseAnd(m, n);
+ carry = context.BitwiseExclusiveOr(m, n);
+ xor = context.ShiftRightUI(carry, Const(1));
+ xor = context.BitwiseAnd(xor, Const(0x7F7F7F7Fu));
+ res = context.Add(res, xor);
+
+ if (!unsigned)
+ {
+ // Propagates the sign bit from (x^y)>>1 upwards by one.
+ carry = context.BitwiseAnd(carry, Const(0x80808080u));
+ res = context.BitwiseExclusiveOr(res, carry);
+ }
+
+ SetIntA32(context, op.Rd, res);
+ }
+
+ private static void EmitHsub8(ArmEmitterContext context, bool unsigned)
+ {
+ IOpCode32AluReg op = (IOpCode32AluReg)context.CurrOp;
+
+ Operand m = GetIntA32(context, op.Rm);
+ Operand n = GetIntA32(context, op.Rn);
+ Operand left, right, carry, res;
+
+ // This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
+ // Note that x^y always contains the LSB of the result.
+ // Since we want to calculate (x+y)/2, we can instead calculate ((x^y)>>1) - ((x^y)&y).
+
+ carry = context.BitwiseExclusiveOr(m, n);
+ left = context.ShiftRightUI(carry, Const(1));
+ right = context.BitwiseAnd(carry, m);
+
+ // We must now perform a partitioned subtraction.
+ // We can do this because minuend contains 7 bit fields.
+ // We use the extra bit in minuend as a bit to borrow from; we set this bit.
+ // We invert this bit at the end as this tells us if that bit was borrowed from.
+
+ res = context.BitwiseOr(left, Const(0x80808080));
+ res = context.Subtract(res, right);
+ res = context.BitwiseExclusiveOr(res, Const(0x80808080));
+
+ if (!unsigned)
+ {
+ // We then sign extend the result into this bit.
+ carry = context.BitwiseAnd(carry, Const(0x80808080));
+ res = context.BitwiseExclusiveOr(res, carry);
+ }
+
+ SetIntA32(context, op.Rd, res);
+ }
+
+ private static void EmitSat(ArmEmitterContext context, int intMin, int intMax)
+ {
+ OpCode32Sat op = (OpCode32Sat)context.CurrOp;
+
+ Operand n = GetIntA32(context, op.Rn);
+
+ int shift = DecodeImmShift(op.ShiftType, op.Imm5);
+
+ switch (op.ShiftType)
+ {
+ case ShiftType.Lsl:
+ if (shift == 32)
+ {
+ n = Const(0);
+ }
+ else
+ {
+ n = context.ShiftLeft(n, Const(shift));
+ }
+ break;
+ case ShiftType.Asr:
+ if (shift == 32)
+ {
+ n = context.ShiftRightSI(n, Const(31));
+ }
+ else
+ {
+ n = context.ShiftRightSI(n, Const(shift));
+ }
+ break;
+ }
+
+ Operand lblCheckLtIntMin = Label();
+ Operand lblNoSat = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfFalse(lblCheckLtIntMin, context.ICompareGreater(n, Const(intMax)));
+
+ SetFlag(context, PState.QFlag, Const(1));
+ SetIntA32(context, op.Rd, Const(intMax));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblCheckLtIntMin);
+ context.BranchIfFalse(lblNoSat, context.ICompareLess(n, Const(intMin)));
+
+ SetFlag(context, PState.QFlag, Const(1));
+ SetIntA32(context, op.Rd, Const(intMin));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblNoSat);
+
+ SetIntA32(context, op.Rd, n);
+
+ context.MarkLabel(lblEnd);
+ }
+
+ private static void EmitSat16(ArmEmitterContext context, int intMin, int intMax)
+ {
+ OpCode32Sat16 op = (OpCode32Sat16)context.CurrOp;
+
+ void SetD(int part, Operand value)
+ {
+ if (part == 0)
+ {
+ SetIntA32(context, op.Rd, context.ZeroExtend16(OperandType.I32, value));
+ }
+ else
+ {
+ SetIntA32(context, op.Rd, context.BitwiseOr(GetIntA32(context, op.Rd), context.ShiftLeft(value, Const(16))));
+ }
+ }
+
+ Operand n = GetIntA32(context, op.Rn);
+
+ Operand nLow = context.SignExtend16(OperandType.I32, n);
+ Operand nHigh = context.ShiftRightSI(n, Const(16));
+
+ for (int part = 0; part < 2; part++)
+ {
+ Operand nPart = part == 0 ? nLow : nHigh;
+
+ Operand lblCheckLtIntMin = Label();
+ Operand lblNoSat = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfFalse(lblCheckLtIntMin, context.ICompareGreater(nPart, Const(intMax)));
+
+ SetFlag(context, PState.QFlag, Const(1));
+ SetD(part, Const(intMax));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblCheckLtIntMin);
+ context.BranchIfFalse(lblNoSat, context.ICompareLess(nPart, Const(intMin)));
+
+ SetFlag(context, PState.QFlag, Const(1));
+ SetD(part, Const(intMin));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblNoSat);
+
+ SetD(part, nPart);
+
+ context.MarkLabel(lblEnd);
+ }
+ }
+
+ private static void EmitAluStore(ArmEmitterContext context, Operand value)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ EmitGenericAluStoreA32(context, op.Rd, ShouldSetFlags(context), value);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitAluHelper.cs b/src/ARMeilleure/Instructions/InstEmitAluHelper.cs
new file mode 100644
index 00000000..994878ad
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitAluHelper.cs
@@ -0,0 +1,613 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static class InstEmitAluHelper
+ {
+ public static bool ShouldSetFlags(ArmEmitterContext context)
+ {
+ IOpCode32HasSetFlags op = (IOpCode32HasSetFlags)context.CurrOp;
+
+ if (op.SetFlags == null)
+ {
+ return !context.IsInIfThenBlock;
+ }
+
+ return op.SetFlags.Value;
+ }
+
+ public static void EmitNZFlagsCheck(ArmEmitterContext context, Operand d)
+ {
+ SetFlag(context, PState.NFlag, context.ICompareLess (d, Const(d.Type, 0)));
+ SetFlag(context, PState.ZFlag, context.ICompareEqual(d, Const(d.Type, 0)));
+ }
+
+ public static void EmitAdcsCCheck(ArmEmitterContext context, Operand n, Operand d)
+ {
+ // C = (Rd == Rn && CIn) || Rd < Rn
+ Operand cIn = GetFlag(PState.CFlag);
+
+ Operand cOut = context.BitwiseAnd(context.ICompareEqual(d, n), cIn);
+
+ cOut = context.BitwiseOr(cOut, context.ICompareLessUI(d, n));
+
+ SetFlag(context, PState.CFlag, cOut);
+ }
+
+ public static void EmitAddsCCheck(ArmEmitterContext context, Operand n, Operand d)
+ {
+ // C = Rd < Rn
+ SetFlag(context, PState.CFlag, context.ICompareLessUI(d, n));
+ }
+
+ public static void EmitAddsVCheck(ArmEmitterContext context, Operand n, Operand m, Operand d)
+ {
+ // V = (Rd ^ Rn) & ~(Rn ^ Rm) < 0
+ Operand vOut = context.BitwiseExclusiveOr(d, n);
+
+ vOut = context.BitwiseAnd(vOut, context.BitwiseNot(context.BitwiseExclusiveOr(n, m)));
+
+ vOut = context.ICompareLess(vOut, Const(vOut.Type, 0));
+
+ SetFlag(context, PState.VFlag, vOut);
+ }
+
+ public static void EmitSbcsCCheck(ArmEmitterContext context, Operand n, Operand m)
+ {
+ // C = (Rn == Rm && CIn) || Rn > Rm
+ Operand cIn = GetFlag(PState.CFlag);
+
+ Operand cOut = context.BitwiseAnd(context.ICompareEqual(n, m), cIn);
+
+ cOut = context.BitwiseOr(cOut, context.ICompareGreaterUI(n, m));
+
+ SetFlag(context, PState.CFlag, cOut);
+ }
+
+ public static void EmitSubsCCheck(ArmEmitterContext context, Operand n, Operand m)
+ {
+ // C = Rn >= Rm
+ SetFlag(context, PState.CFlag, context.ICompareGreaterOrEqualUI(n, m));
+ }
+
+ public static void EmitSubsVCheck(ArmEmitterContext context, Operand n, Operand m, Operand d)
+ {
+ // V = (Rd ^ Rn) & (Rn ^ Rm) < 0
+ Operand vOut = context.BitwiseExclusiveOr(d, n);
+
+ vOut = context.BitwiseAnd(vOut, context.BitwiseExclusiveOr(n, m));
+
+ vOut = context.ICompareLess(vOut, Const(vOut.Type, 0));
+
+ SetFlag(context, PState.VFlag, vOut);
+ }
+
+ public static Operand EmitReverseBits32Op(ArmEmitterContext context, Operand op)
+ {
+ Debug.Assert(op.Type == OperandType.I32);
+
+ Operand val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op, Const(0xaaaaaaaau)), Const(1)),
+ context.ShiftLeft(context.BitwiseAnd(op, Const(0x55555555u)), Const(1)));
+
+ val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xccccccccu)), Const(2)),
+ context.ShiftLeft(context.BitwiseAnd(val, Const(0x33333333u)), Const(2)));
+ val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xf0f0f0f0u)), Const(4)),
+ context.ShiftLeft(context.BitwiseAnd(val, Const(0x0f0f0f0fu)), Const(4)));
+ val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xff00ff00u)), Const(8)),
+ context.ShiftLeft(context.BitwiseAnd(val, Const(0x00ff00ffu)), Const(8)));
+
+ return context.BitwiseOr(context.ShiftRightUI(val, Const(16)), context.ShiftLeft(val, Const(16)));
+ }
+
+ public static Operand EmitReverseBytes16_64Op(ArmEmitterContext context, Operand op)
+ {
+ Debug.Assert(op.Type == OperandType.I64);
+
+ return context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op, Const(0xff00ff00ff00ff00ul)), Const(8)),
+ context.ShiftLeft(context.BitwiseAnd(op, Const(0x00ff00ff00ff00fful)), Const(8)));
+ }
+
+ public static Operand EmitReverseBytes16_32Op(ArmEmitterContext context, Operand op)
+ {
+ Debug.Assert(op.Type == OperandType.I32);
+
+ Operand val = EmitReverseBytes16_64Op(context, context.ZeroExtend32(OperandType.I64, op));
+
+ return context.ConvertI64ToI32(val);
+ }
+
+ private static void EmitAluWritePc(ArmEmitterContext context, Operand value)
+ {
+ Debug.Assert(value.Type == OperandType.I32);
+
+ if (((OpCode32)context.CurrOp).IsThumb)
+ {
+ bool isReturn = IsA32Return(context);
+ if (!isReturn)
+ {
+ context.StoreToContext();
+ }
+
+ InstEmitFlowHelper.EmitVirtualJump(context, value, isReturn);
+ }
+ else
+ {
+ EmitBxWritePc(context, value);
+ }
+ }
+
+ public static void EmitGenericAluStoreA32(ArmEmitterContext context, int rd, bool setFlags, Operand value)
+ {
+ Debug.Assert(value.Type == OperandType.I32);
+
+ if (rd == RegisterAlias.Aarch32Pc && setFlags)
+ {
+ if (setFlags)
+ {
+ // TODO: Load SPSR etc.
+
+ EmitBxWritePc(context, value);
+ }
+ else
+ {
+ EmitAluWritePc(context, value);
+ }
+ }
+ else
+ {
+ SetIntA32(context, rd, value);
+ }
+ }
+
+ public static Operand GetAluN(ArmEmitterContext context)
+ {
+ if (context.CurrOp is IOpCodeAlu op)
+ {
+ if (op.DataOp == DataOp.Logical || op is IOpCodeAluRs)
+ {
+ return GetIntOrZR(context, op.Rn);
+ }
+ else
+ {
+ return GetIntOrSP(context, op.Rn);
+ }
+ }
+ else if (context.CurrOp is IOpCode32Alu op32)
+ {
+ return GetIntA32(context, op32.Rn);
+ }
+ else
+ {
+ throw InvalidOpCodeType(context.CurrOp);
+ }
+ }
+
+ public static Operand GetAluM(ArmEmitterContext context, bool setCarry = true)
+ {
+ switch (context.CurrOp)
+ {
+ // ARM32.
+ case IOpCode32AluImm op:
+ {
+ if (ShouldSetFlags(context) && op.IsRotated && setCarry)
+ {
+ SetFlag(context, PState.CFlag, Const((uint)op.Immediate >> 31));
+ }
+
+ return Const(op.Immediate);
+ }
+
+ case IOpCode32AluImm16 op: return Const(op.Immediate);
+
+ case IOpCode32AluRsImm op: return GetMShiftedByImmediate(context, op, setCarry);
+ case IOpCode32AluRsReg op: return GetMShiftedByReg(context, op, setCarry);
+
+ case IOpCode32AluReg op: return GetIntA32(context, op.Rm);
+
+ // ARM64.
+ case IOpCodeAluImm op:
+ {
+ if (op.GetOperandType() == OperandType.I32)
+ {
+ return Const((int)op.Immediate);
+ }
+ else
+ {
+ return Const(op.Immediate);
+ }
+ }
+
+ case IOpCodeAluRs op:
+ {
+ Operand value = GetIntOrZR(context, op.Rm);
+
+ switch (op.ShiftType)
+ {
+ case ShiftType.Lsl: value = context.ShiftLeft (value, Const(op.Shift)); break;
+ case ShiftType.Lsr: value = context.ShiftRightUI(value, Const(op.Shift)); break;
+ case ShiftType.Asr: value = context.ShiftRightSI(value, Const(op.Shift)); break;
+ case ShiftType.Ror: value = context.RotateRight (value, Const(op.Shift)); break;
+ }
+
+ return value;
+ }
+
+ case IOpCodeAluRx op:
+ {
+ Operand value = GetExtendedM(context, op.Rm, op.IntType);
+
+ value = context.ShiftLeft(value, Const(op.Shift));
+
+ return value;
+ }
+
+ default: throw InvalidOpCodeType(context.CurrOp);
+ }
+ }
+
+ private static Exception InvalidOpCodeType(OpCode opCode)
+ {
+ return new InvalidOperationException($"Invalid OpCode type \"{opCode?.GetType().Name ?? "null"}\".");
+ }
+
+ // ARM32 helpers.
+ public static Operand GetMShiftedByImmediate(ArmEmitterContext context, IOpCode32AluRsImm op, bool setCarry)
+ {
+ Operand m = GetIntA32(context, op.Rm);
+
+ int shift = op.Immediate;
+
+ if (shift == 0)
+ {
+ switch (op.ShiftType)
+ {
+ case ShiftType.Lsr: shift = 32; break;
+ case ShiftType.Asr: shift = 32; break;
+ case ShiftType.Ror: shift = 1; break;
+ }
+ }
+
+ if (shift != 0)
+ {
+ setCarry &= ShouldSetFlags(context);
+
+ switch (op.ShiftType)
+ {
+ case ShiftType.Lsl: m = GetLslC(context, m, setCarry, shift); break;
+ case ShiftType.Lsr: m = GetLsrC(context, m, setCarry, shift); break;
+ case ShiftType.Asr: m = GetAsrC(context, m, setCarry, shift); break;
+ case ShiftType.Ror:
+ if (op.Immediate != 0)
+ {
+ m = GetRorC(context, m, setCarry, shift);
+ }
+ else
+ {
+ m = GetRrxC(context, m, setCarry);
+ }
+ break;
+ }
+ }
+
+ return m;
+ }
+
+ public static int DecodeImmShift(ShiftType shiftType, int shift)
+ {
+ if (shift == 0)
+ {
+ switch (shiftType)
+ {
+ case ShiftType.Lsr: shift = 32; break;
+ case ShiftType.Asr: shift = 32; break;
+ case ShiftType.Ror: shift = 1; break;
+ }
+ }
+
+ return shift;
+ }
+
+ public static Operand GetMShiftedByReg(ArmEmitterContext context, IOpCode32AluRsReg op, bool setCarry)
+ {
+ Operand m = GetIntA32(context, op.Rm);
+ Operand s = context.ZeroExtend8(OperandType.I32, GetIntA32(context, op.Rs));
+ Operand shiftIsZero = context.ICompareEqual(s, Const(0));
+
+ Operand zeroResult = m;
+ Operand shiftResult = m;
+
+ setCarry &= ShouldSetFlags(context);
+
+ switch (op.ShiftType)
+ {
+ case ShiftType.Lsl: shiftResult = EmitLslC(context, m, setCarry, s, shiftIsZero); break;
+ case ShiftType.Lsr: shiftResult = EmitLsrC(context, m, setCarry, s, shiftIsZero); break;
+ case ShiftType.Asr: shiftResult = EmitAsrC(context, m, setCarry, s, shiftIsZero); break;
+ case ShiftType.Ror: shiftResult = EmitRorC(context, m, setCarry, s, shiftIsZero); break;
+ }
+
+ return context.ConditionalSelect(shiftIsZero, zeroResult, shiftResult);
+ }
+
+ public static void EmitIfHelper(ArmEmitterContext context, Operand boolValue, Action action, bool expected = true)
+ {
+ Debug.Assert(boolValue.Type == OperandType.I32);
+
+ Operand endLabel = Label();
+
+ if (expected)
+ {
+ context.BranchIfFalse(endLabel, boolValue);
+ }
+ else
+ {
+ context.BranchIfTrue(endLabel, boolValue);
+ }
+
+ action();
+
+ context.MarkLabel(endLabel);
+ }
+
+ public static Operand EmitLslC(ArmEmitterContext context, Operand m, bool setCarry, Operand shift, Operand shiftIsZero)
+ {
+ Debug.Assert(m.Type == OperandType.I32 && shift.Type == OperandType.I32 && shiftIsZero.Type == OperandType.I32);
+
+ Operand shiftLarge = context.ICompareGreaterOrEqual(shift, Const(32));
+ Operand result = context.ShiftLeft(m, shift);
+ if (setCarry)
+ {
+ EmitIfHelper(context, shiftIsZero, () =>
+ {
+ Operand cOut = context.ShiftRightUI(m, context.Subtract(Const(32), shift));
+
+ cOut = context.BitwiseAnd(cOut, Const(1));
+ cOut = context.ConditionalSelect(context.ICompareGreater(shift, Const(32)), Const(0), cOut);
+
+ SetFlag(context, PState.CFlag, cOut);
+ }, false);
+ }
+
+ return context.ConditionalSelect(shiftLarge, Const(0), result);
+ }
+
+ public static Operand GetLslC(ArmEmitterContext context, Operand m, bool setCarry, int shift)
+ {
+ Debug.Assert(m.Type == OperandType.I32);
+
+ if ((uint)shift > 32)
+ {
+ return GetShiftByMoreThan32(context, setCarry);
+ }
+ else if (shift == 32)
+ {
+ if (setCarry)
+ {
+ SetCarryMLsb(context, m);
+ }
+
+ return Const(0);
+ }
+ else
+ {
+ if (setCarry)
+ {
+ Operand cOut = context.ShiftRightUI(m, Const(32 - shift));
+
+ cOut = context.BitwiseAnd(cOut, Const(1));
+
+ SetFlag(context, PState.CFlag, cOut);
+ }
+
+ return context.ShiftLeft(m, Const(shift));
+ }
+ }
+
+ public static Operand EmitLsrC(ArmEmitterContext context, Operand m, bool setCarry, Operand shift, Operand shiftIsZero)
+ {
+ Debug.Assert(m.Type == OperandType.I32 && shift.Type == OperandType.I32 && shiftIsZero.Type == OperandType.I32);
+
+ Operand shiftLarge = context.ICompareGreaterOrEqual(shift, Const(32));
+ Operand result = context.ShiftRightUI(m, shift);
+ if (setCarry)
+ {
+ EmitIfHelper(context, shiftIsZero, () =>
+ {
+ Operand cOut = context.ShiftRightUI(m, context.Subtract(shift, Const(1)));
+
+ cOut = context.BitwiseAnd(cOut, Const(1));
+ cOut = context.ConditionalSelect(context.ICompareGreater(shift, Const(32)), Const(0), cOut);
+
+ SetFlag(context, PState.CFlag, cOut);
+ }, false);
+ }
+
+ return context.ConditionalSelect(shiftLarge, Const(0), result);
+ }
+
+ public static Operand GetLsrC(ArmEmitterContext context, Operand m, bool setCarry, int shift)
+ {
+ Debug.Assert(m.Type == OperandType.I32);
+
+ if ((uint)shift > 32)
+ {
+ return GetShiftByMoreThan32(context, setCarry);
+ }
+ else if (shift == 32)
+ {
+ if (setCarry)
+ {
+ SetCarryMMsb(context, m);
+ }
+
+ return Const(0);
+ }
+ else
+ {
+ if (setCarry)
+ {
+ SetCarryMShrOut(context, m, shift);
+ }
+
+ return context.ShiftRightUI(m, Const(shift));
+ }
+ }
+
+ private static Operand GetShiftByMoreThan32(ArmEmitterContext context, bool setCarry)
+ {
+ if (setCarry)
+ {
+ SetFlag(context, PState.CFlag, Const(0));
+ }
+
+ return Const(0);
+ }
+
+ public static Operand EmitAsrC(ArmEmitterContext context, Operand m, bool setCarry, Operand shift, Operand shiftIsZero)
+ {
+ Debug.Assert(m.Type == OperandType.I32 && shift.Type == OperandType.I32 && shiftIsZero.Type == OperandType.I32);
+
+ Operand l32Result;
+ Operand ge32Result;
+
+ Operand less32 = context.ICompareLess(shift, Const(32));
+
+ ge32Result = context.ShiftRightSI(m, Const(31));
+
+ if (setCarry)
+ {
+ EmitIfHelper(context, context.BitwiseOr(less32, shiftIsZero), () =>
+ {
+ SetCarryMLsb(context, ge32Result);
+ }, false);
+ }
+
+ l32Result = context.ShiftRightSI(m, shift);
+ if (setCarry)
+ {
+ EmitIfHelper(context, context.BitwiseAnd(less32, context.BitwiseNot(shiftIsZero)), () =>
+ {
+ Operand cOut = context.ShiftRightUI(m, context.Subtract(shift, Const(1)));
+
+ cOut = context.BitwiseAnd(cOut, Const(1));
+
+ SetFlag(context, PState.CFlag, cOut);
+ });
+ }
+
+ return context.ConditionalSelect(less32, l32Result, ge32Result);
+ }
+
+ public static Operand GetAsrC(ArmEmitterContext context, Operand m, bool setCarry, int shift)
+ {
+ Debug.Assert(m.Type == OperandType.I32);
+
+ if ((uint)shift >= 32)
+ {
+ m = context.ShiftRightSI(m, Const(31));
+
+ if (setCarry)
+ {
+ SetCarryMLsb(context, m);
+ }
+
+ return m;
+ }
+ else
+ {
+ if (setCarry)
+ {
+ SetCarryMShrOut(context, m, shift);
+ }
+
+ return context.ShiftRightSI(m, Const(shift));
+ }
+ }
+
+ public static Operand EmitRorC(ArmEmitterContext context, Operand m, bool setCarry, Operand shift, Operand shiftIsZero)
+ {
+ Debug.Assert(m.Type == OperandType.I32 && shift.Type == OperandType.I32 && shiftIsZero.Type == OperandType.I32);
+
+ shift = context.BitwiseAnd(shift, Const(0x1f));
+ m = context.RotateRight(m, shift);
+
+ if (setCarry)
+ {
+ EmitIfHelper(context, shiftIsZero, () =>
+ {
+ SetCarryMMsb(context, m);
+ }, false);
+ }
+
+ return m;
+ }
+
+ public static Operand GetRorC(ArmEmitterContext context, Operand m, bool setCarry, int shift)
+ {
+ Debug.Assert(m.Type == OperandType.I32);
+
+ shift &= 0x1f;
+
+ m = context.RotateRight(m, Const(shift));
+
+ if (setCarry)
+ {
+ SetCarryMMsb(context, m);
+ }
+
+ return m;
+ }
+
+ public static Operand GetRrxC(ArmEmitterContext context, Operand m, bool setCarry)
+ {
+ Debug.Assert(m.Type == OperandType.I32);
+
+ // Rotate right by 1 with carry.
+ Operand cIn = context.Copy(GetFlag(PState.CFlag));
+
+ if (setCarry)
+ {
+ SetCarryMLsb(context, m);
+ }
+
+ m = context.ShiftRightUI(m, Const(1));
+
+ m = context.BitwiseOr(m, context.ShiftLeft(cIn, Const(31)));
+
+ return m;
+ }
+
+ private static void SetCarryMLsb(ArmEmitterContext context, Operand m)
+ {
+ Debug.Assert(m.Type == OperandType.I32);
+
+ SetFlag(context, PState.CFlag, context.BitwiseAnd(m, Const(1)));
+ }
+
+ private static void SetCarryMMsb(ArmEmitterContext context, Operand m)
+ {
+ Debug.Assert(m.Type == OperandType.I32);
+
+ SetFlag(context, PState.CFlag, context.ShiftRightUI(m, Const(31)));
+ }
+
+ private static void SetCarryMShrOut(ArmEmitterContext context, Operand m, int shift)
+ {
+ Debug.Assert(m.Type == OperandType.I32);
+
+ Operand cOut = context.ShiftRightUI(m, Const(shift - 1));
+
+ cOut = context.BitwiseAnd(cOut, Const(1));
+
+ SetFlag(context, PState.CFlag, cOut);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitBfm.cs b/src/ARMeilleure/Instructions/InstEmitBfm.cs
new file mode 100644
index 00000000..46a7dddd
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitBfm.cs
@@ -0,0 +1,196 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Bfm(ArmEmitterContext context)
+ {
+ OpCodeBfm op = (OpCodeBfm)context.CurrOp;
+
+ Operand d = GetIntOrZR(context, op.Rd);
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ Operand res;
+
+ if (op.Pos < op.Shift)
+ {
+ // BFI.
+ int shift = op.GetBitsCount() - op.Shift;
+
+ int width = op.Pos + 1;
+
+ long mask = (long)(ulong.MaxValue >> (64 - width));
+
+ res = context.ShiftLeft(context.BitwiseAnd(n, Const(n.Type, mask)), Const(shift));
+
+ res = context.BitwiseOr(res, context.BitwiseAnd(d, Const(d.Type, ~(mask << shift))));
+ }
+ else
+ {
+ // BFXIL.
+ int shift = op.Shift;
+
+ int width = op.Pos - shift + 1;
+
+ long mask = (long)(ulong.MaxValue >> (64 - width));
+
+ res = context.BitwiseAnd(context.ShiftRightUI(n, Const(shift)), Const(n.Type, mask));
+
+ res = context.BitwiseOr(res, context.BitwiseAnd(d, Const(d.Type, ~mask)));
+ }
+
+ SetIntOrZR(context, op.Rd, res);
+ }
+
+ public static void Sbfm(ArmEmitterContext context)
+ {
+ OpCodeBfm op = (OpCodeBfm)context.CurrOp;
+
+ int bitsCount = op.GetBitsCount();
+
+ if (op.Pos + 1 == bitsCount)
+ {
+ EmitSbfmShift(context);
+ }
+ else if (op.Pos < op.Shift)
+ {
+ EmitSbfiz(context);
+ }
+ else if (op.Pos == 7 && op.Shift == 0)
+ {
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ SetIntOrZR(context, op.Rd, context.SignExtend8(n.Type, n));
+ }
+ else if (op.Pos == 15 && op.Shift == 0)
+ {
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ SetIntOrZR(context, op.Rd, context.SignExtend16(n.Type, n));
+ }
+ else if (op.Pos == 31 && op.Shift == 0)
+ {
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ SetIntOrZR(context, op.Rd, context.SignExtend32(n.Type, n));
+ }
+ else
+ {
+ Operand res = GetIntOrZR(context, op.Rn);
+
+ res = context.ShiftLeft (res, Const(bitsCount - 1 - op.Pos));
+ res = context.ShiftRightSI(res, Const(bitsCount - 1));
+ res = context.BitwiseAnd (res, Const(res.Type, ~op.TMask));
+
+ Operand n2 = GetBfmN(context);
+
+ SetIntOrZR(context, op.Rd, context.BitwiseOr(res, n2));
+ }
+ }
+
+ public static void Ubfm(ArmEmitterContext context)
+ {
+ OpCodeBfm op = (OpCodeBfm)context.CurrOp;
+
+ if (op.Pos + 1 == op.GetBitsCount())
+ {
+ EmitUbfmShift(context);
+ }
+ else if (op.Pos < op.Shift)
+ {
+ EmitUbfiz(context);
+ }
+ else if (op.Pos + 1 == op.Shift)
+ {
+ EmitBfmLsl(context);
+ }
+ else if (op.Pos == 7 && op.Shift == 0)
+ {
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ SetIntOrZR(context, op.Rd, context.BitwiseAnd(n, Const(n.Type, 0xff)));
+ }
+ else if (op.Pos == 15 && op.Shift == 0)
+ {
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ SetIntOrZR(context, op.Rd, context.BitwiseAnd(n, Const(n.Type, 0xffff)));
+ }
+ else
+ {
+ SetIntOrZR(context, op.Rd, GetBfmN(context));
+ }
+ }
+
+ private static void EmitSbfiz(ArmEmitterContext context) => EmitBfiz(context, signed: true);
+ private static void EmitUbfiz(ArmEmitterContext context) => EmitBfiz(context, signed: false);
+
+ private static void EmitBfiz(ArmEmitterContext context, bool signed)
+ {
+ OpCodeBfm op = (OpCodeBfm)context.CurrOp;
+
+ int width = op.Pos + 1;
+
+ Operand res = GetIntOrZR(context, op.Rn);
+
+ res = context.ShiftLeft(res, Const(op.GetBitsCount() - width));
+
+ res = signed
+ ? context.ShiftRightSI(res, Const(op.Shift - width))
+ : context.ShiftRightUI(res, Const(op.Shift - width));
+
+ SetIntOrZR(context, op.Rd, res);
+ }
+
+ private static void EmitSbfmShift(ArmEmitterContext context)
+ {
+ EmitBfmShift(context, signed: true);
+ }
+
+ private static void EmitUbfmShift(ArmEmitterContext context)
+ {
+ EmitBfmShift(context, signed: false);
+ }
+
+ private static void EmitBfmShift(ArmEmitterContext context, bool signed)
+ {
+ OpCodeBfm op = (OpCodeBfm)context.CurrOp;
+
+ Operand res = GetIntOrZR(context, op.Rn);
+
+ res = signed
+ ? context.ShiftRightSI(res, Const(op.Shift))
+ : context.ShiftRightUI(res, Const(op.Shift));
+
+ SetIntOrZR(context, op.Rd, res);
+ }
+
+ private static void EmitBfmLsl(ArmEmitterContext context)
+ {
+ OpCodeBfm op = (OpCodeBfm)context.CurrOp;
+
+ Operand res = GetIntOrZR(context, op.Rn);
+
+ int shift = op.GetBitsCount() - op.Shift;
+
+ SetIntOrZR(context, op.Rd, context.ShiftLeft(res, Const(shift)));
+ }
+
+ private static Operand GetBfmN(ArmEmitterContext context)
+ {
+ OpCodeBfm op = (OpCodeBfm)context.CurrOp;
+
+ Operand res = GetIntOrZR(context, op.Rn);
+
+ long mask = op.WMask & op.TMask;
+
+ return context.BitwiseAnd(context.RotateRight(res, Const(op.Shift)), Const(res.Type, mask));
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Instructions/InstEmitCcmp.cs b/src/ARMeilleure/Instructions/InstEmitCcmp.cs
new file mode 100644
index 00000000..7f0beb6c
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitCcmp.cs
@@ -0,0 +1,61 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitAluHelper;
+using static ARMeilleure.Instructions.InstEmitFlowHelper;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Ccmn(ArmEmitterContext context) => EmitCcmp(context, isNegated: true);
+ public static void Ccmp(ArmEmitterContext context) => EmitCcmp(context, isNegated: false);
+
+ private static void EmitCcmp(ArmEmitterContext context, bool isNegated)
+ {
+ OpCodeCcmp op = (OpCodeCcmp)context.CurrOp;
+
+ Operand lblTrue = Label();
+ Operand lblEnd = Label();
+
+ EmitCondBranch(context, lblTrue, op.Cond);
+
+ SetFlag(context, PState.VFlag, Const((op.Nzcv >> 0) & 1));
+ SetFlag(context, PState.CFlag, Const((op.Nzcv >> 1) & 1));
+ SetFlag(context, PState.ZFlag, Const((op.Nzcv >> 2) & 1));
+ SetFlag(context, PState.NFlag, Const((op.Nzcv >> 3) & 1));
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblTrue);
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ if (isNegated)
+ {
+ Operand d = context.Add(n, m);
+
+ EmitNZFlagsCheck(context, d);
+
+ EmitAddsCCheck(context, n, d);
+ EmitAddsVCheck(context, n, m, d);
+ }
+ else
+ {
+ Operand d = context.Subtract(n, m);
+
+ EmitNZFlagsCheck(context, d);
+
+ EmitSubsCCheck(context, n, m);
+ EmitSubsVCheck(context, n, m, d);
+ }
+
+ context.MarkLabel(lblEnd);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Instructions/InstEmitCsel.cs b/src/ARMeilleure/Instructions/InstEmitCsel.cs
new file mode 100644
index 00000000..926b9a9e
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitCsel.cs
@@ -0,0 +1,53 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitFlowHelper;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ private enum CselOperation
+ {
+ None,
+ Increment,
+ Invert,
+ Negate
+ }
+
+ public static void Csel(ArmEmitterContext context) => EmitCsel(context, CselOperation.None);
+ public static void Csinc(ArmEmitterContext context) => EmitCsel(context, CselOperation.Increment);
+ public static void Csinv(ArmEmitterContext context) => EmitCsel(context, CselOperation.Invert);
+ public static void Csneg(ArmEmitterContext context) => EmitCsel(context, CselOperation.Negate);
+
+ private static void EmitCsel(ArmEmitterContext context, CselOperation cselOp)
+ {
+ OpCodeCsel op = (OpCodeCsel)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+ Operand m = GetIntOrZR(context, op.Rm);
+
+ if (cselOp == CselOperation.Increment)
+ {
+ m = context.Add(m, Const(m.Type, 1));
+ }
+ else if (cselOp == CselOperation.Invert)
+ {
+ m = context.BitwiseNot(m);
+ }
+ else if (cselOp == CselOperation.Negate)
+ {
+ m = context.Negate(m);
+ }
+
+ Operand condTrue = GetCondTrue(context, op.Cond);
+
+ Operand d = context.ConditionalSelect(condTrue, n, m);
+
+ SetIntOrZR(context, op.Rd, d);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Instructions/InstEmitDiv.cs b/src/ARMeilleure/Instructions/InstEmitDiv.cs
new file mode 100644
index 00000000..39a5c32e
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitDiv.cs
@@ -0,0 +1,67 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Sdiv(ArmEmitterContext context) => EmitDiv(context, unsigned: false);
+ public static void Udiv(ArmEmitterContext context) => EmitDiv(context, unsigned: true);
+
+ private static void EmitDiv(ArmEmitterContext context, bool unsigned)
+ {
+ OpCodeAluBinary op = (OpCodeAluBinary)context.CurrOp;
+
+ // If Rm == 0, Rd = 0 (division by zero).
+ Operand n = GetIntOrZR(context, op.Rn);
+ Operand m = GetIntOrZR(context, op.Rm);
+
+ Operand divisorIsZero = context.ICompareEqual(m, Const(m.Type, 0));
+
+ Operand lblBadDiv = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfTrue(lblBadDiv, divisorIsZero);
+
+ if (!unsigned)
+ {
+ // If Rn == INT_MIN && Rm == -1, Rd = INT_MIN (overflow).
+ bool is32Bits = op.RegisterSize == RegisterSize.Int32;
+
+ Operand intMin = is32Bits ? Const(int.MinValue) : Const(long.MinValue);
+ Operand minus1 = is32Bits ? Const(-1) : Const(-1L);
+
+ Operand nIsIntMin = context.ICompareEqual(n, intMin);
+ Operand mIsMinus1 = context.ICompareEqual(m, minus1);
+
+ Operand lblGoodDiv = Label();
+
+ context.BranchIfFalse(lblGoodDiv, context.BitwiseAnd(nIsIntMin, mIsMinus1));
+
+ SetAluDOrZR(context, intMin);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblGoodDiv);
+ }
+
+ Operand d = unsigned
+ ? context.DivideUI(n, m)
+ : context.Divide (n, m);
+
+ SetAluDOrZR(context, d);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblBadDiv);
+
+ SetAluDOrZR(context, Const(op.GetOperandType(), 0));
+
+ context.MarkLabel(lblEnd);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitException.cs b/src/ARMeilleure/Instructions/InstEmitException.cs
new file mode 100644
index 00000000..0baaa87d
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitException.cs
@@ -0,0 +1,55 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Brk(ArmEmitterContext context)
+ {
+ OpCodeException op = (OpCodeException)context.CurrOp;
+
+ string name = nameof(NativeInterface.Break);
+
+ context.StoreToContext();
+
+ context.Call(typeof(NativeInterface).GetMethod(name), Const(op.Address), Const(op.Id));
+
+ context.LoadFromContext();
+
+ context.Return(Const(op.Address));
+ }
+
+ public static void Svc(ArmEmitterContext context)
+ {
+ OpCodeException op = (OpCodeException)context.CurrOp;
+
+ string name = nameof(NativeInterface.SupervisorCall);
+
+ context.StoreToContext();
+
+ context.Call(typeof(NativeInterface).GetMethod(name), Const(op.Address), Const(op.Id));
+
+ context.LoadFromContext();
+
+ Translator.EmitSynchronization(context);
+ }
+
+ public static void Und(ArmEmitterContext context)
+ {
+ OpCode op = context.CurrOp;
+
+ string name = nameof(NativeInterface.Undefined);
+
+ context.StoreToContext();
+
+ context.Call(typeof(NativeInterface).GetMethod(name), Const(op.Address), Const(op.RawOpCode));
+
+ context.LoadFromContext();
+
+ context.Return(Const(op.Address));
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Instructions/InstEmitException32.cs b/src/ARMeilleure/Instructions/InstEmitException32.cs
new file mode 100644
index 00000000..ec0c32bf
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitException32.cs
@@ -0,0 +1,39 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.Translation;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ public static void Svc(ArmEmitterContext context)
+ {
+ IOpCode32Exception op = (IOpCode32Exception)context.CurrOp;
+
+ string name = nameof(NativeInterface.SupervisorCall);
+
+ context.StoreToContext();
+
+ context.Call(typeof(NativeInterface).GetMethod(name), Const(((IOpCode)op).Address), Const(op.Id));
+
+ context.LoadFromContext();
+
+ Translator.EmitSynchronization(context);
+ }
+
+ public static void Trap(ArmEmitterContext context)
+ {
+ IOpCode32Exception op = (IOpCode32Exception)context.CurrOp;
+
+ string name = nameof(NativeInterface.Break);
+
+ context.StoreToContext();
+
+ context.Call(typeof(NativeInterface).GetMethod(name), Const(((IOpCode)op).Address), Const(op.Id));
+
+ context.LoadFromContext();
+
+ context.Return(Const(context.CurrOp.Address));
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitFlow.cs b/src/ARMeilleure/Instructions/InstEmitFlow.cs
new file mode 100644
index 00000000..c40eb55c
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitFlow.cs
@@ -0,0 +1,107 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitFlowHelper;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void B(ArmEmitterContext context)
+ {
+ OpCodeBImmAl op = (OpCodeBImmAl)context.CurrOp;
+
+ context.Branch(context.GetLabel((ulong)op.Immediate));
+ }
+
+ public static void B_Cond(ArmEmitterContext context)
+ {
+ OpCodeBImmCond op = (OpCodeBImmCond)context.CurrOp;
+
+ EmitBranch(context, op.Cond);
+ }
+
+ public static void Bl(ArmEmitterContext context)
+ {
+ OpCodeBImmAl op = (OpCodeBImmAl)context.CurrOp;
+
+ context.Copy(GetIntOrZR(context, RegisterAlias.Lr), Const(op.Address + 4));
+
+ EmitCall(context, (ulong)op.Immediate);
+ }
+
+ public static void Blr(ArmEmitterContext context)
+ {
+ OpCodeBReg op = (OpCodeBReg)context.CurrOp;
+
+ Operand n = context.Copy(GetIntOrZR(context, op.Rn));
+
+ context.Copy(GetIntOrZR(context, RegisterAlias.Lr), Const(op.Address + 4));
+
+ EmitVirtualCall(context, n);
+ }
+
+ public static void Br(ArmEmitterContext context)
+ {
+ OpCodeBReg op = (OpCodeBReg)context.CurrOp;
+
+ EmitVirtualJump(context, GetIntOrZR(context, op.Rn), op.Rn == RegisterAlias.Lr);
+ }
+
+ public static void Cbnz(ArmEmitterContext context) => EmitCb(context, onNotZero: true);
+ public static void Cbz(ArmEmitterContext context) => EmitCb(context, onNotZero: false);
+
+ private static void EmitCb(ArmEmitterContext context, bool onNotZero)
+ {
+ OpCodeBImmCmp op = (OpCodeBImmCmp)context.CurrOp;
+
+ EmitBranch(context, GetIntOrZR(context, op.Rt), onNotZero);
+ }
+
+ public static void Ret(ArmEmitterContext context)
+ {
+ OpCodeBReg op = (OpCodeBReg)context.CurrOp;
+
+ context.Return(GetIntOrZR(context, op.Rn));
+ }
+
+ public static void Tbnz(ArmEmitterContext context) => EmitTb(context, onNotZero: true);
+ public static void Tbz(ArmEmitterContext context) => EmitTb(context, onNotZero: false);
+
+ private static void EmitTb(ArmEmitterContext context, bool onNotZero)
+ {
+ OpCodeBImmTest op = (OpCodeBImmTest)context.CurrOp;
+
+ Operand value = context.BitwiseAnd(GetIntOrZR(context, op.Rt), Const(1L << op.Bit));
+
+ EmitBranch(context, value, onNotZero);
+ }
+
+ private static void EmitBranch(ArmEmitterContext context, Condition cond)
+ {
+ OpCodeBImm op = (OpCodeBImm)context.CurrOp;
+
+ EmitCondBranch(context, context.GetLabel((ulong)op.Immediate), cond);
+ }
+
+ private static void EmitBranch(ArmEmitterContext context, Operand value, bool onNotZero)
+ {
+ OpCodeBImm op = (OpCodeBImm)context.CurrOp;
+
+ Operand lblTarget = context.GetLabel((ulong)op.Immediate);
+
+ if (onNotZero)
+ {
+ context.BranchIfTrue(lblTarget, value);
+ }
+ else
+ {
+ context.BranchIfFalse(lblTarget, value);
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Instructions/InstEmitFlow32.cs b/src/ARMeilleure/Instructions/InstEmitFlow32.cs
new file mode 100644
index 00000000..3a7707ee
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitFlow32.cs
@@ -0,0 +1,136 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitFlowHelper;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ public static void B(ArmEmitterContext context)
+ {
+ IOpCode32BImm op = (IOpCode32BImm)context.CurrOp;
+
+ context.Branch(context.GetLabel((ulong)op.Immediate));
+ }
+
+ public static void Bl(ArmEmitterContext context)
+ {
+ Blx(context, x: false);
+ }
+
+ public static void Blx(ArmEmitterContext context)
+ {
+ Blx(context, x: true);
+ }
+
+ private static void Blx(ArmEmitterContext context, bool x)
+ {
+ IOpCode32BImm op = (IOpCode32BImm)context.CurrOp;
+
+ uint pc = op.GetPc();
+
+ bool isThumb = ((OpCode32)context.CurrOp).IsThumb;
+
+ uint currentPc = isThumb
+ ? pc | 1
+ : pc - 4;
+
+ SetIntA32(context, GetBankedRegisterAlias(context.Mode, RegisterAlias.Aarch32Lr), Const(currentPc));
+
+ // If x is true, then this is a branch with link and exchange.
+ // In this case we need to swap the mode between Arm <-> Thumb.
+ if (x)
+ {
+ SetFlag(context, PState.TFlag, Const(isThumb ? 0 : 1));
+ }
+
+ EmitCall(context, (ulong)op.Immediate);
+ }
+
+ public static void Blxr(ArmEmitterContext context)
+ {
+ IOpCode32BReg op = (IOpCode32BReg)context.CurrOp;
+
+ uint pc = op.GetPc();
+
+ Operand addr = context.Copy(GetIntA32(context, op.Rm));
+ Operand bitOne = context.BitwiseAnd(addr, Const(1));
+
+ bool isThumb = ((OpCode32)context.CurrOp).IsThumb;
+
+ uint currentPc = isThumb
+ ? (pc - 2) | 1
+ : pc - 4;
+
+ SetIntA32(context, GetBankedRegisterAlias(context.Mode, RegisterAlias.Aarch32Lr), Const(currentPc));
+
+ SetFlag(context, PState.TFlag, bitOne);
+
+ EmitBxWritePc(context, addr);
+ }
+
+ public static void Bx(ArmEmitterContext context)
+ {
+ IOpCode32BReg op = (IOpCode32BReg)context.CurrOp;
+
+ EmitBxWritePc(context, GetIntA32(context, op.Rm), op.Rm);
+ }
+
+ public static void Cbnz(ArmEmitterContext context) => EmitCb(context, onNotZero: true);
+ public static void Cbz(ArmEmitterContext context) => EmitCb(context, onNotZero: false);
+
+ private static void EmitCb(ArmEmitterContext context, bool onNotZero)
+ {
+ OpCodeT16BImmCmp op = (OpCodeT16BImmCmp)context.CurrOp;
+
+ Operand value = GetIntA32(context, op.Rn);
+ Operand lblTarget = context.GetLabel((ulong)op.Immediate);
+
+ if (onNotZero)
+ {
+ context.BranchIfTrue(lblTarget, value);
+ }
+ else
+ {
+ context.BranchIfFalse(lblTarget, value);
+ }
+ }
+
+ public static void It(ArmEmitterContext context)
+ {
+ OpCodeT16IfThen op = (OpCodeT16IfThen)context.CurrOp;
+
+ context.SetIfThenBlockState(op.IfThenBlockConds);
+ }
+
+ public static void Tbb(ArmEmitterContext context) => EmitTb(context, halfword: false);
+ public static void Tbh(ArmEmitterContext context) => EmitTb(context, halfword: true);
+
+ private static void EmitTb(ArmEmitterContext context, bool halfword)
+ {
+ OpCodeT32Tb op = (OpCodeT32Tb)context.CurrOp;
+
+ Operand halfwords;
+
+ if (halfword)
+ {
+ Operand address = context.Add(GetIntA32(context, op.Rn), context.ShiftLeft(GetIntA32(context, op.Rm), Const(1)));
+ halfwords = InstEmitMemoryHelper.EmitReadInt(context, address, 1);
+ }
+ else
+ {
+ Operand address = context.Add(GetIntA32(context, op.Rn), GetIntA32(context, op.Rm));
+ halfwords = InstEmitMemoryHelper.EmitReadIntAligned(context, address, 0);
+ }
+
+ Operand targetAddress = context.Add(Const((int)op.GetPc()), context.ShiftLeft(halfwords, Const(1)));
+
+ EmitVirtualJump(context, targetAddress, isReturn: false);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Instructions/InstEmitFlowHelper.cs b/src/ARMeilleure/Instructions/InstEmitFlowHelper.cs
new file mode 100644
index 00000000..6ac32908
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitFlowHelper.cs
@@ -0,0 +1,240 @@
+using ARMeilleure.CodeGen.Linking;
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using ARMeilleure.Translation.PTC;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static class InstEmitFlowHelper
+ {
+ public static void EmitCondBranch(ArmEmitterContext context, Operand target, Condition cond)
+ {
+ if (cond != Condition.Al)
+ {
+ context.BranchIfTrue(target, GetCondTrue(context, cond));
+ }
+ else
+ {
+ context.Branch(target);
+ }
+ }
+
+ public static Operand GetCondTrue(ArmEmitterContext context, Condition condition)
+ {
+ Operand cmpResult = context.TryGetComparisonResult(condition);
+
+ if (cmpResult != default)
+ {
+ return cmpResult;
+ }
+
+ Operand value = Const(1);
+
+ Operand Inverse(Operand val)
+ {
+ return context.BitwiseExclusiveOr(val, Const(1));
+ }
+
+ switch (condition)
+ {
+ case Condition.Eq:
+ value = GetFlag(PState.ZFlag);
+ break;
+
+ case Condition.Ne:
+ value = Inverse(GetFlag(PState.ZFlag));
+ break;
+
+ case Condition.GeUn:
+ value = GetFlag(PState.CFlag);
+ break;
+
+ case Condition.LtUn:
+ value = Inverse(GetFlag(PState.CFlag));
+ break;
+
+ case Condition.Mi:
+ value = GetFlag(PState.NFlag);
+ break;
+
+ case Condition.Pl:
+ value = Inverse(GetFlag(PState.NFlag));
+ break;
+
+ case Condition.Vs:
+ value = GetFlag(PState.VFlag);
+ break;
+
+ case Condition.Vc:
+ value = Inverse(GetFlag(PState.VFlag));
+ break;
+
+ case Condition.GtUn:
+ {
+ Operand c = GetFlag(PState.CFlag);
+ Operand z = GetFlag(PState.ZFlag);
+
+ value = context.BitwiseAnd(c, Inverse(z));
+
+ break;
+ }
+
+ case Condition.LeUn:
+ {
+ Operand c = GetFlag(PState.CFlag);
+ Operand z = GetFlag(PState.ZFlag);
+
+ value = context.BitwiseOr(Inverse(c), z);
+
+ break;
+ }
+
+ case Condition.Ge:
+ {
+ Operand n = GetFlag(PState.NFlag);
+ Operand v = GetFlag(PState.VFlag);
+
+ value = context.ICompareEqual(n, v);
+
+ break;
+ }
+
+ case Condition.Lt:
+ {
+ Operand n = GetFlag(PState.NFlag);
+ Operand v = GetFlag(PState.VFlag);
+
+ value = context.ICompareNotEqual(n, v);
+
+ break;
+ }
+
+ case Condition.Gt:
+ {
+ Operand n = GetFlag(PState.NFlag);
+ Operand z = GetFlag(PState.ZFlag);
+ Operand v = GetFlag(PState.VFlag);
+
+ value = context.BitwiseAnd(Inverse(z), context.ICompareEqual(n, v));
+
+ break;
+ }
+
+ case Condition.Le:
+ {
+ Operand n = GetFlag(PState.NFlag);
+ Operand z = GetFlag(PState.ZFlag);
+ Operand v = GetFlag(PState.VFlag);
+
+ value = context.BitwiseOr(z, context.ICompareNotEqual(n, v));
+
+ break;
+ }
+ }
+
+ return value;
+ }
+
+ public static void EmitCall(ArmEmitterContext context, ulong immediate)
+ {
+ bool isRecursive = immediate == context.EntryAddress;
+
+ if (isRecursive)
+ {
+ context.Branch(context.GetLabel(immediate));
+ }
+ else
+ {
+ EmitTableBranch(context, Const(immediate), isJump: false);
+ }
+ }
+
+ public static void EmitVirtualCall(ArmEmitterContext context, Operand target)
+ {
+ EmitTableBranch(context, target, isJump: false);
+ }
+
+ public static void EmitVirtualJump(ArmEmitterContext context, Operand target, bool isReturn)
+ {
+ if (isReturn)
+ {
+ if (target.Type == OperandType.I32)
+ {
+ target = context.ZeroExtend32(OperandType.I64, target);
+ }
+
+ context.Return(target);
+ }
+ else
+ {
+ EmitTableBranch(context, target, isJump: true);
+ }
+ }
+
+ private static void EmitTableBranch(ArmEmitterContext context, Operand guestAddress, bool isJump)
+ {
+ context.StoreToContext();
+
+ if (guestAddress.Type == OperandType.I32)
+ {
+ guestAddress = context.ZeroExtend32(OperandType.I64, guestAddress);
+ }
+
+ // Store the target guest address into the native context. The stubs uses this address to dispatch into the
+ // next translation.
+ Operand nativeContext = context.LoadArgument(OperandType.I64, 0);
+ Operand dispAddressAddr = context.Add(nativeContext, Const((ulong)NativeContext.GetDispatchAddressOffset()));
+ context.Store(dispAddressAddr, guestAddress);
+
+ Operand hostAddress;
+
+ // If address is mapped onto the function table, we can skip the table walk. Otherwise we fallback
+ // onto the dispatch stub.
+ if (guestAddress.Kind == OperandKind.Constant && context.FunctionTable.IsValid(guestAddress.Value))
+ {
+ Operand hostAddressAddr = !context.HasPtc ?
+ Const(ref context.FunctionTable.GetValue(guestAddress.Value)) :
+ Const(ref context.FunctionTable.GetValue(guestAddress.Value), new Symbol(SymbolType.FunctionTable, guestAddress.Value));
+
+ hostAddress = context.Load(OperandType.I64, hostAddressAddr);
+ }
+ else
+ {
+ hostAddress = !context.HasPtc ?
+ Const((long)context.Stubs.DispatchStub) :
+ Const((long)context.Stubs.DispatchStub, Ptc.DispatchStubSymbol);
+ }
+
+ if (isJump)
+ {
+ context.Tailcall(hostAddress, nativeContext);
+ }
+ else
+ {
+ OpCode op = context.CurrOp;
+
+ Operand returnAddress = context.Call(hostAddress, OperandType.I64, nativeContext);
+
+ context.LoadFromContext();
+
+ // Note: The return value of a translated function is always an Int64 with the address execution has
+ // returned to. We expect this address to be immediately after the current instruction, if it isn't we
+ // keep returning until we reach the dispatcher.
+ Operand nextAddr = Const((long)op.Address + op.OpCodeSizeInBytes);
+
+ // Try to continue within this block.
+ // If the return address isn't to our next instruction, we need to return so the JIT can figure out
+ // what to do.
+ Operand lblContinue = context.GetLabel(nextAddr.Value);
+ context.BranchIf(lblContinue, returnAddress, nextAddr, Comparison.Equal, BasicBlockFrequency.Cold);
+
+ context.Return(returnAddress);
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitHash.cs b/src/ARMeilleure/Instructions/InstEmitHash.cs
new file mode 100644
index 00000000..82b3e353
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitHash.cs
@@ -0,0 +1,69 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHashHelper;
+using static ARMeilleure.Instructions.InstEmitHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ private const int ByteSizeLog2 = 0;
+ private const int HWordSizeLog2 = 1;
+ private const int WordSizeLog2 = 2;
+ private const int DWordSizeLog2 = 3;
+
+ public static void Crc32b(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, ByteSizeLog2, false);
+ }
+
+ public static void Crc32h(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, HWordSizeLog2, false);
+ }
+
+ public static void Crc32w(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, WordSizeLog2, false);
+ }
+
+ public static void Crc32x(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, DWordSizeLog2, false);
+ }
+
+ public static void Crc32cb(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, ByteSizeLog2, true);
+ }
+
+ public static void Crc32ch(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, HWordSizeLog2, true);
+ }
+
+ public static void Crc32cw(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, WordSizeLog2, true);
+ }
+
+ public static void Crc32cx(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, DWordSizeLog2, true);
+ }
+
+ private static void EmitCrc32Call(ArmEmitterContext context, int size, bool c)
+ {
+ OpCodeAluBinary op = (OpCodeAluBinary)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+ Operand m = GetIntOrZR(context, op.Rm);
+
+ Operand d = EmitCrc32(context, n, m, size, c);
+
+ SetIntOrZR(context, op.Rd, d);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitHash32.cs b/src/ARMeilleure/Instructions/InstEmitHash32.cs
new file mode 100644
index 00000000..5d39f8af
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitHash32.cs
@@ -0,0 +1,53 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using static ARMeilleure.Instructions.InstEmitHashHelper;
+using static ARMeilleure.Instructions.InstEmitHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ public static void Crc32b(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, ByteSizeLog2, false);
+ }
+
+ public static void Crc32h(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, HWordSizeLog2, false);
+ }
+
+ public static void Crc32w(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, WordSizeLog2, false);
+ }
+
+ public static void Crc32cb(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, ByteSizeLog2, true);
+ }
+
+ public static void Crc32ch(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, HWordSizeLog2, true);
+ }
+
+ public static void Crc32cw(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, WordSizeLog2, true);
+ }
+
+ private static void EmitCrc32Call(ArmEmitterContext context, int size, bool c)
+ {
+ IOpCode32AluReg op = (IOpCode32AluReg)context.CurrOp;
+
+ Operand n = GetIntA32(context, op.Rn);
+ Operand m = GetIntA32(context, op.Rm);
+
+ Operand d = EmitCrc32(context, n, m, size, c);
+
+ EmitAluStore(context, d);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitHashHelper.cs b/src/ARMeilleure/Instructions/InstEmitHashHelper.cs
new file mode 100644
index 00000000..55a03a4f
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitHashHelper.cs
@@ -0,0 +1,118 @@
+// https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
+
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static class InstEmitHashHelper
+ {
+ public const uint Crc32RevPoly = 0xedb88320;
+ public const uint Crc32cRevPoly = 0x82f63b78;
+
+ public static Operand EmitCrc32(ArmEmitterContext context, Operand crc, Operand value, int size, bool castagnoli)
+ {
+ Debug.Assert(crc.Type.IsInteger() && value.Type.IsInteger());
+ Debug.Assert(size >= 0 && size < 4);
+ Debug.Assert((size < 3) || (value.Type == OperandType.I64));
+
+ if (castagnoli && Optimizations.UseSse42)
+ {
+ // The CRC32 instruction does not have an immediate variant, so ensure both inputs are in registers.
+ value = (value.Kind == OperandKind.Constant) ? context.Copy(value) : value;
+ crc = (crc.Kind == OperandKind.Constant) ? context.Copy(crc) : crc;
+
+ Intrinsic op = size switch
+ {
+ 0 => Intrinsic.X86Crc32_8,
+ 1 => Intrinsic.X86Crc32_16,
+ _ => Intrinsic.X86Crc32,
+ };
+
+ return (size == 3) ? context.ConvertI64ToI32(context.AddIntrinsicLong(op, crc, value)) : context.AddIntrinsicInt(op, crc, value);
+ }
+ else if (Optimizations.UsePclmulqdq)
+ {
+ return size switch
+ {
+ 3 => EmitCrc32Optimized64(context, crc, value, castagnoli),
+ _ => EmitCrc32Optimized(context, crc, value, castagnoli, size),
+ };
+ }
+ else
+ {
+ string name = (size, castagnoli) switch
+ {
+ (0, false) => nameof(SoftFallback.Crc32b),
+ (1, false) => nameof(SoftFallback.Crc32h),
+ (2, false) => nameof(SoftFallback.Crc32w),
+ (3, false) => nameof(SoftFallback.Crc32x),
+ (0, true) => nameof(SoftFallback.Crc32cb),
+ (1, true) => nameof(SoftFallback.Crc32ch),
+ (2, true) => nameof(SoftFallback.Crc32cw),
+ (3, true) => nameof(SoftFallback.Crc32cx),
+ _ => throw new ArgumentOutOfRangeException(nameof(size))
+ };
+
+ return context.Call(typeof(SoftFallback).GetMethod(name), crc, value);
+ }
+ }
+
+ private static Operand EmitCrc32Optimized(ArmEmitterContext context, Operand crc, Operand data, bool castagnoli, int size)
+ {
+ long mu = castagnoli ? 0x0DEA713F1 : 0x1F7011641; // mu' = floor(x^64/P(x))'
+ long polynomial = castagnoli ? 0x105EC76F0 : 0x1DB710641; // P'(x) << 1
+
+ crc = context.VectorInsert(context.VectorZero(), crc, 0);
+
+ switch (size)
+ {
+ case 0: data = context.VectorInsert8(context.VectorZero(), data, 0); break;
+ case 1: data = context.VectorInsert16(context.VectorZero(), data, 0); break;
+ case 2: data = context.VectorInsert(context.VectorZero(), data, 0); break;
+ }
+
+ int bitsize = 8 << size;
+
+ Operand tmp = context.AddIntrinsic(Intrinsic.X86Pxor, crc, data);
+ tmp = context.AddIntrinsic(Intrinsic.X86Psllq, tmp, Const(64 - bitsize));
+ tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, mu), Const(0));
+ tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, polynomial), Const(0));
+
+ if (bitsize < 32)
+ {
+ crc = context.AddIntrinsic(Intrinsic.X86Pslldq, crc, Const((64 - bitsize) / 8));
+ tmp = context.AddIntrinsic(Intrinsic.X86Pxor, tmp, crc);
+ }
+
+ return context.VectorExtract(OperandType.I32, tmp, 2);
+ }
+
+ private static Operand EmitCrc32Optimized64(ArmEmitterContext context, Operand crc, Operand data, bool castagnoli)
+ {
+ long mu = castagnoli ? 0x0DEA713F1 : 0x1F7011641; // mu' = floor(x^64/P(x))'
+ long polynomial = castagnoli ? 0x105EC76F0 : 0x1DB710641; // P'(x) << 1
+
+ crc = context.VectorInsert(context.VectorZero(), crc, 0);
+ data = context.VectorInsert(context.VectorZero(), data, 0);
+
+ Operand tmp = context.AddIntrinsic(Intrinsic.X86Pxor, crc, data);
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pslldq, tmp, Const(4));
+
+ tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, res, X86GetScalar(context, mu), Const(0));
+ tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, polynomial), Const(0));
+
+ tmp = context.AddIntrinsic(Intrinsic.X86Pxor, tmp, res);
+ tmp = context.AddIntrinsic(Intrinsic.X86Psllq, tmp, Const(32));
+
+ tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, mu), Const(1));
+ tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, polynomial), Const(0));
+
+ return context.VectorExtract(OperandType.I32, tmp, 2);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitHelper.cs b/src/ARMeilleure/Instructions/InstEmitHelper.cs
new file mode 100644
index 00000000..a22bb3fb
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitHelper.cs
@@ -0,0 +1,264 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static class InstEmitHelper
+ {
+ public static Operand GetExtendedM(ArmEmitterContext context, int rm, IntType type)
+ {
+ Operand value = GetIntOrZR(context, rm);
+
+ switch (type)
+ {
+ case IntType.UInt8: value = context.ZeroExtend8 (value.Type, value); break;
+ case IntType.UInt16: value = context.ZeroExtend16(value.Type, value); break;
+ case IntType.UInt32: value = context.ZeroExtend32(value.Type, value); break;
+
+ case IntType.Int8: value = context.SignExtend8 (value.Type, value); break;
+ case IntType.Int16: value = context.SignExtend16(value.Type, value); break;
+ case IntType.Int32: value = context.SignExtend32(value.Type, value); break;
+ }
+
+ return value;
+ }
+
+ public static Operand GetIntA32(ArmEmitterContext context, int regIndex)
+ {
+ if (regIndex == RegisterAlias.Aarch32Pc)
+ {
+ OpCode32 op = (OpCode32)context.CurrOp;
+
+ return Const((int)op.GetPc());
+ }
+ else
+ {
+ return Register(GetRegisterAlias(context.Mode, regIndex), RegisterType.Integer, OperandType.I32);
+ }
+ }
+
+ public static Operand GetIntA32AlignedPC(ArmEmitterContext context, int regIndex)
+ {
+ if (regIndex == RegisterAlias.Aarch32Pc)
+ {
+ OpCode32 op = (OpCode32)context.CurrOp;
+
+ return Const((int)(op.GetPc() & 0xfffffffc));
+ }
+ else
+ {
+ return Register(GetRegisterAlias(context.Mode, regIndex), RegisterType.Integer, OperandType.I32);
+ }
+ }
+
+ public static Operand GetVecA32(int regIndex)
+ {
+ return Register(regIndex, RegisterType.Vector, OperandType.V128);
+ }
+
+ public static void SetIntA32(ArmEmitterContext context, int regIndex, Operand value)
+ {
+ if (regIndex == RegisterAlias.Aarch32Pc)
+ {
+ if (!IsA32Return(context))
+ {
+ context.StoreToContext();
+ }
+
+ EmitBxWritePc(context, value);
+ }
+ else
+ {
+ if (value.Type == OperandType.I64)
+ {
+ value = context.ConvertI64ToI32(value);
+ }
+ Operand reg = Register(GetRegisterAlias(context.Mode, regIndex), RegisterType.Integer, OperandType.I32);
+
+ context.Copy(reg, value);
+ }
+ }
+
+ public static int GetRegisterAlias(Aarch32Mode mode, int regIndex)
+ {
+ // Only registers >= 8 are banked,
+ // with registers in the range [8, 12] being
+ // banked for the FIQ mode, and registers
+ // 13 and 14 being banked for all modes.
+ if ((uint)regIndex < 8)
+ {
+ return regIndex;
+ }
+
+ return GetBankedRegisterAlias(mode, regIndex);
+ }
+
+ public static int GetBankedRegisterAlias(Aarch32Mode mode, int regIndex)
+ {
+ switch (regIndex)
+ {
+ case 8: return mode == Aarch32Mode.Fiq
+ ? RegisterAlias.R8Fiq
+ : RegisterAlias.R8Usr;
+
+ case 9: return mode == Aarch32Mode.Fiq
+ ? RegisterAlias.R9Fiq
+ : RegisterAlias.R9Usr;
+
+ case 10: return mode == Aarch32Mode.Fiq
+ ? RegisterAlias.R10Fiq
+ : RegisterAlias.R10Usr;
+
+ case 11: return mode == Aarch32Mode.Fiq
+ ? RegisterAlias.R11Fiq
+ : RegisterAlias.R11Usr;
+
+ case 12: return mode == Aarch32Mode.Fiq
+ ? RegisterAlias.R12Fiq
+ : RegisterAlias.R12Usr;
+
+ case 13:
+ switch (mode)
+ {
+ case Aarch32Mode.User:
+ case Aarch32Mode.System: return RegisterAlias.SpUsr;
+ case Aarch32Mode.Fiq: return RegisterAlias.SpFiq;
+ case Aarch32Mode.Irq: return RegisterAlias.SpIrq;
+ case Aarch32Mode.Supervisor: return RegisterAlias.SpSvc;
+ case Aarch32Mode.Abort: return RegisterAlias.SpAbt;
+ case Aarch32Mode.Hypervisor: return RegisterAlias.SpHyp;
+ case Aarch32Mode.Undefined: return RegisterAlias.SpUnd;
+
+ default: throw new ArgumentException(nameof(mode));
+ }
+
+ case 14:
+ switch (mode)
+ {
+ case Aarch32Mode.User:
+ case Aarch32Mode.Hypervisor:
+ case Aarch32Mode.System: return RegisterAlias.LrUsr;
+ case Aarch32Mode.Fiq: return RegisterAlias.LrFiq;
+ case Aarch32Mode.Irq: return RegisterAlias.LrIrq;
+ case Aarch32Mode.Supervisor: return RegisterAlias.LrSvc;
+ case Aarch32Mode.Abort: return RegisterAlias.LrAbt;
+ case Aarch32Mode.Undefined: return RegisterAlias.LrUnd;
+
+ default: throw new ArgumentException(nameof(mode));
+ }
+
+ default: throw new ArgumentOutOfRangeException(nameof(regIndex));
+ }
+ }
+
+ public static bool IsA32Return(ArmEmitterContext context)
+ {
+ switch (context.CurrOp)
+ {
+ case IOpCode32MemMult op:
+ return true; // Setting PC using LDM is nearly always a return.
+ case OpCode32AluRsImm op:
+ return op.Rm == RegisterAlias.Aarch32Lr;
+ case OpCode32AluRsReg op:
+ return op.Rm == RegisterAlias.Aarch32Lr;
+ case OpCode32AluReg op:
+ return op.Rm == RegisterAlias.Aarch32Lr;
+ case OpCode32Mem op:
+ return op.Rn == RegisterAlias.Aarch32Sp && op.WBack && !op.Index; // Setting PC to an address stored on the stack is nearly always a return.
+ }
+ return false;
+ }
+
+ public static void EmitBxWritePc(ArmEmitterContext context, Operand pc, int sourceRegister = 0)
+ {
+ bool isReturn = sourceRegister == RegisterAlias.Aarch32Lr || IsA32Return(context);
+ Operand mode = context.BitwiseAnd(pc, Const(1));
+
+ SetFlag(context, PState.TFlag, mode);
+
+ Operand addr = context.ConditionalSelect(mode, context.BitwiseAnd(pc, Const(~1)), context.BitwiseAnd(pc, Const(~3)));
+
+ InstEmitFlowHelper.EmitVirtualJump(context, addr, isReturn);
+ }
+
+ public static Operand GetIntOrZR(ArmEmitterContext context, int regIndex)
+ {
+ if (regIndex == RegisterConsts.ZeroIndex)
+ {
+ OperandType type = context.CurrOp.GetOperandType();
+
+ return type == OperandType.I32 ? Const(0) : Const(0L);
+ }
+ else
+ {
+ return GetIntOrSP(context, regIndex);
+ }
+ }
+
+ public static void SetIntOrZR(ArmEmitterContext context, int regIndex, Operand value)
+ {
+ if (regIndex == RegisterConsts.ZeroIndex)
+ {
+ return;
+ }
+
+ SetIntOrSP(context, regIndex, value);
+ }
+
+ public static Operand GetIntOrSP(ArmEmitterContext context, int regIndex)
+ {
+ Operand value = Register(regIndex, RegisterType.Integer, OperandType.I64);
+
+ if (context.CurrOp.RegisterSize == RegisterSize.Int32)
+ {
+ value = context.ConvertI64ToI32(value);
+ }
+
+ return value;
+ }
+
+ public static void SetIntOrSP(ArmEmitterContext context, int regIndex, Operand value)
+ {
+ Operand reg = Register(regIndex, RegisterType.Integer, OperandType.I64);
+
+ if (value.Type == OperandType.I32)
+ {
+ value = context.ZeroExtend32(OperandType.I64, value);
+ }
+
+ context.Copy(reg, value);
+ }
+
+ public static Operand GetVec(int regIndex)
+ {
+ return Register(regIndex, RegisterType.Vector, OperandType.V128);
+ }
+
+ public static Operand GetFlag(PState stateFlag)
+ {
+ return Register((int)stateFlag, RegisterType.Flag, OperandType.I32);
+ }
+
+ public static Operand GetFpFlag(FPState stateFlag)
+ {
+ return Register((int)stateFlag, RegisterType.FpFlag, OperandType.I32);
+ }
+
+ public static void SetFlag(ArmEmitterContext context, PState stateFlag, Operand value)
+ {
+ context.Copy(GetFlag(stateFlag), value);
+
+ context.MarkFlagSet(stateFlag);
+ }
+
+ public static void SetFpFlag(ArmEmitterContext context, FPState stateFlag, Operand value)
+ {
+ context.Copy(GetFpFlag(stateFlag), value);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitMemory.cs b/src/ARMeilleure/Instructions/InstEmitMemory.cs
new file mode 100644
index 00000000..7baed14c
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitMemory.cs
@@ -0,0 +1,184 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitMemoryHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Adr(ArmEmitterContext context)
+ {
+ OpCodeAdr op = (OpCodeAdr)context.CurrOp;
+
+ SetIntOrZR(context, op.Rd, Const(op.Address + (ulong)op.Immediate));
+ }
+
+ public static void Adrp(ArmEmitterContext context)
+ {
+ OpCodeAdr op = (OpCodeAdr)context.CurrOp;
+
+ ulong address = (op.Address & ~0xfffUL) + ((ulong)op.Immediate << 12);
+
+ SetIntOrZR(context, op.Rd, Const(address));
+ }
+
+ public static void Ldr(ArmEmitterContext context) => EmitLdr(context, signed: false);
+ public static void Ldrs(ArmEmitterContext context) => EmitLdr(context, signed: true);
+
+ private static void EmitLdr(ArmEmitterContext context, bool signed)
+ {
+ OpCodeMem op = (OpCodeMem)context.CurrOp;
+
+ Operand address = GetAddress(context);
+
+ if (signed && op.Extend64)
+ {
+ EmitLoadSx64(context, address, op.Rt, op.Size);
+ }
+ else if (signed)
+ {
+ EmitLoadSx32(context, address, op.Rt, op.Size);
+ }
+ else
+ {
+ EmitLoadZx(context, address, op.Rt, op.Size);
+ }
+
+ EmitWBackIfNeeded(context, address);
+ }
+
+ public static void Ldr_Literal(ArmEmitterContext context)
+ {
+ IOpCodeLit op = (IOpCodeLit)context.CurrOp;
+
+ if (op.Prefetch)
+ {
+ return;
+ }
+
+ if (op.Signed)
+ {
+ EmitLoadSx64(context, Const(op.Immediate), op.Rt, op.Size);
+ }
+ else
+ {
+ EmitLoadZx(context, Const(op.Immediate), op.Rt, op.Size);
+ }
+ }
+
+ public static void Ldp(ArmEmitterContext context)
+ {
+ OpCodeMemPair op = (OpCodeMemPair)context.CurrOp;
+
+ void EmitLoad(int rt, Operand ldAddr)
+ {
+ if (op.Extend64)
+ {
+ EmitLoadSx64(context, ldAddr, rt, op.Size);
+ }
+ else
+ {
+ EmitLoadZx(context, ldAddr, rt, op.Size);
+ }
+ }
+
+ Operand address = GetAddress(context);
+ Operand address2 = GetAddress(context, 1L << op.Size);
+
+ EmitLoad(op.Rt, address);
+ EmitLoad(op.Rt2, address2);
+
+ EmitWBackIfNeeded(context, address);
+ }
+
+ public static void Str(ArmEmitterContext context)
+ {
+ OpCodeMem op = (OpCodeMem)context.CurrOp;
+
+ Operand address = GetAddress(context);
+
+ EmitStore(context, address, op.Rt, op.Size);
+
+ EmitWBackIfNeeded(context, address);
+ }
+
+ public static void Stp(ArmEmitterContext context)
+ {
+ OpCodeMemPair op = (OpCodeMemPair)context.CurrOp;
+
+ Operand address = GetAddress(context);
+ Operand address2 = GetAddress(context, 1L << op.Size);
+
+ EmitStore(context, address, op.Rt, op.Size);
+ EmitStore(context, address2, op.Rt2, op.Size);
+
+ EmitWBackIfNeeded(context, address);
+ }
+
+ private static Operand GetAddress(ArmEmitterContext context, long addend = 0)
+ {
+ Operand address = default;
+
+ switch (context.CurrOp)
+ {
+ case OpCodeMemImm op:
+ {
+ address = context.Copy(GetIntOrSP(context, op.Rn));
+
+ // Pre-indexing.
+ if (!op.PostIdx)
+ {
+ address = context.Add(address, Const(op.Immediate + addend));
+ }
+ else if (addend != 0)
+ {
+ address = context.Add(address, Const(addend));
+ }
+
+ break;
+ }
+
+ case OpCodeMemReg op:
+ {
+ Operand n = GetIntOrSP(context, op.Rn);
+
+ Operand m = GetExtendedM(context, op.Rm, op.IntType);
+
+ if (op.Shift)
+ {
+ m = context.ShiftLeft(m, Const(op.Size));
+ }
+
+ address = context.Add(n, m);
+
+ if (addend != 0)
+ {
+ address = context.Add(address, Const(addend));
+ }
+
+ break;
+ }
+ }
+
+ return address;
+ }
+
+ private static void EmitWBackIfNeeded(ArmEmitterContext context, Operand address)
+ {
+ // Check whenever the current OpCode has post-indexed write back, if so write it.
+ if (context.CurrOp is OpCodeMemImm op && op.WBack)
+ {
+ if (op.PostIdx)
+ {
+ address = context.Add(address, Const(op.Immediate));
+ }
+
+ SetIntOrSP(context, op.Rn, address);
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Instructions/InstEmitMemory32.cs b/src/ARMeilleure/Instructions/InstEmitMemory32.cs
new file mode 100644
index 00000000..17ec97aa
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitMemory32.cs
@@ -0,0 +1,265 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitMemoryHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ private const int ByteSizeLog2 = 0;
+ private const int HWordSizeLog2 = 1;
+ private const int WordSizeLog2 = 2;
+ private const int DWordSizeLog2 = 3;
+
+ [Flags]
+ enum AccessType
+ {
+ Store = 0,
+ Signed = 1,
+ Load = 2,
+ Ordered = 4,
+ Exclusive = 8,
+
+ LoadZx = Load,
+ LoadSx = Load | Signed,
+ }
+
+ public static void Ldm(ArmEmitterContext context)
+ {
+ IOpCode32MemMult op = (IOpCode32MemMult)context.CurrOp;
+
+ Operand n = GetIntA32(context, op.Rn);
+
+ Operand baseAddress = context.Add(n, Const(op.Offset));
+
+ bool writesToPc = (op.RegisterMask & (1 << RegisterAlias.Aarch32Pc)) != 0;
+
+ bool writeBack = op.PostOffset != 0 && (op.Rn != RegisterAlias.Aarch32Pc || !writesToPc);
+
+ if (writeBack)
+ {
+ SetIntA32(context, op.Rn, context.Add(n, Const(op.PostOffset)));
+ }
+
+ int mask = op.RegisterMask;
+ int offset = 0;
+
+ for (int register = 0; mask != 0; mask >>= 1, register++)
+ {
+ if ((mask & 1) != 0)
+ {
+ Operand address = context.Add(baseAddress, Const(offset));
+
+ EmitLoadZx(context, address, register, WordSizeLog2);
+
+ offset += 4;
+ }
+ }
+ }
+
+ public static void Ldr(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, WordSizeLog2, AccessType.LoadZx);
+ }
+
+ public static void Ldrb(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, ByteSizeLog2, AccessType.LoadZx);
+ }
+
+ public static void Ldrd(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, DWordSizeLog2, AccessType.LoadZx);
+ }
+
+ public static void Ldrh(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, HWordSizeLog2, AccessType.LoadZx);
+ }
+
+ public static void Ldrsb(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, ByteSizeLog2, AccessType.LoadSx);
+ }
+
+ public static void Ldrsh(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, HWordSizeLog2, AccessType.LoadSx);
+ }
+
+ public static void Stm(ArmEmitterContext context)
+ {
+ IOpCode32MemMult op = (IOpCode32MemMult)context.CurrOp;
+
+ Operand n = context.Copy(GetIntA32(context, op.Rn));
+
+ Operand baseAddress = context.Add(n, Const(op.Offset));
+
+ int mask = op.RegisterMask;
+ int offset = 0;
+
+ for (int register = 0; mask != 0; mask >>= 1, register++)
+ {
+ if ((mask & 1) != 0)
+ {
+ Operand address = context.Add(baseAddress, Const(offset));
+
+ EmitStore(context, address, register, WordSizeLog2);
+
+ // Note: If Rn is also specified on the register list,
+ // and Rn is the first register on this list, then the
+ // value that is written to memory is the unmodified value,
+ // before the write back. If it is on the list, but it's
+ // not the first one, then the value written to memory
+ // varies between CPUs.
+ if (offset == 0 && op.PostOffset != 0)
+ {
+ // Emit write back after the first write.
+ SetIntA32(context, op.Rn, context.Add(n, Const(op.PostOffset)));
+ }
+
+ offset += 4;
+ }
+ }
+ }
+
+ public static void Str(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, WordSizeLog2, AccessType.Store);
+ }
+
+ public static void Strb(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, ByteSizeLog2, AccessType.Store);
+ }
+
+ public static void Strd(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, DWordSizeLog2, AccessType.Store);
+ }
+
+ public static void Strh(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, HWordSizeLog2, AccessType.Store);
+ }
+
+ private static void EmitLoadOrStore(ArmEmitterContext context, int size, AccessType accType)
+ {
+ IOpCode32Mem op = (IOpCode32Mem)context.CurrOp;
+
+ Operand n = context.Copy(GetIntA32AlignedPC(context, op.Rn));
+ Operand m = GetMemM(context, setCarry: false);
+
+ Operand temp = default;
+
+ if (op.Index || op.WBack)
+ {
+ temp = op.Add
+ ? context.Add (n, m)
+ : context.Subtract(n, m);
+ }
+
+ if (op.WBack)
+ {
+ SetIntA32(context, op.Rn, temp);
+ }
+
+ Operand address;
+
+ if (op.Index)
+ {
+ address = temp;
+ }
+ else
+ {
+ address = n;
+ }
+
+ if ((accType & AccessType.Load) != 0)
+ {
+ void Load(int rt, int offs, int loadSize)
+ {
+ Operand addr = context.Add(address, Const(offs));
+
+ if ((accType & AccessType.Signed) != 0)
+ {
+ EmitLoadSx32(context, addr, rt, loadSize);
+ }
+ else
+ {
+ EmitLoadZx(context, addr, rt, loadSize);
+ }
+ }
+
+ if (size == DWordSizeLog2)
+ {
+ Operand lblBigEndian = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag));
+
+ Load(op.Rt, 0, WordSizeLog2);
+ Load(op.Rt2, 4, WordSizeLog2);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblBigEndian);
+
+ Load(op.Rt2, 0, WordSizeLog2);
+ Load(op.Rt, 4, WordSizeLog2);
+
+ context.MarkLabel(lblEnd);
+ }
+ else
+ {
+ Load(op.Rt, 0, size);
+ }
+ }
+ else
+ {
+ void Store(int rt, int offs, int storeSize)
+ {
+ Operand addr = context.Add(address, Const(offs));
+
+ EmitStore(context, addr, rt, storeSize);
+ }
+
+ if (size == DWordSizeLog2)
+ {
+ Operand lblBigEndian = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag));
+
+ Store(op.Rt, 0, WordSizeLog2);
+ Store(op.Rt2, 4, WordSizeLog2);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblBigEndian);
+
+ Store(op.Rt2, 0, WordSizeLog2);
+ Store(op.Rt, 4, WordSizeLog2);
+
+ context.MarkLabel(lblEnd);
+ }
+ else
+ {
+ Store(op.Rt, 0, size);
+ }
+ }
+ }
+
+ public static void Adr(ArmEmitterContext context)
+ {
+ IOpCode32Adr op = (IOpCode32Adr)context.CurrOp;
+ SetIntA32(context, op.Rd, Const(op.Immediate));
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Instructions/InstEmitMemoryEx.cs b/src/ARMeilleure/Instructions/InstEmitMemoryEx.cs
new file mode 100644
index 00000000..c7ed01e3
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitMemoryEx.cs
@@ -0,0 +1,178 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitMemoryExHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ [Flags]
+ private enum AccessType
+ {
+ None = 0,
+ Ordered = 1,
+ Exclusive = 2,
+ OrderedEx = Ordered | Exclusive
+ }
+
+ public static void Clrex(ArmEmitterContext context)
+ {
+ EmitClearExclusive(context);
+ }
+
+ public static void Csdb(ArmEmitterContext context)
+ {
+ // Execute as no-op.
+ }
+
+ public static void Dmb(ArmEmitterContext context) => EmitBarrier(context);
+ public static void Dsb(ArmEmitterContext context) => EmitBarrier(context);
+
+ public static void Ldar(ArmEmitterContext context) => EmitLdr(context, AccessType.Ordered);
+ public static void Ldaxr(ArmEmitterContext context) => EmitLdr(context, AccessType.OrderedEx);
+ public static void Ldxr(ArmEmitterContext context) => EmitLdr(context, AccessType.Exclusive);
+ public static void Ldxp(ArmEmitterContext context) => EmitLdp(context, AccessType.Exclusive);
+ public static void Ldaxp(ArmEmitterContext context) => EmitLdp(context, AccessType.OrderedEx);
+
+ private static void EmitLdr(ArmEmitterContext context, AccessType accType)
+ {
+ EmitLoadEx(context, accType, pair: false);
+ }
+
+ private static void EmitLdp(ArmEmitterContext context, AccessType accType)
+ {
+ EmitLoadEx(context, accType, pair: true);
+ }
+
+ private static void EmitLoadEx(ArmEmitterContext context, AccessType accType, bool pair)
+ {
+ OpCodeMemEx op = (OpCodeMemEx)context.CurrOp;
+
+ bool ordered = (accType & AccessType.Ordered) != 0;
+ bool exclusive = (accType & AccessType.Exclusive) != 0;
+
+ if (ordered)
+ {
+ EmitBarrier(context);
+ }
+
+ Operand address = context.Copy(GetIntOrSP(context, op.Rn));
+
+ if (pair)
+ {
+ // Exclusive loads should be atomic. For pairwise loads, we need to
+ // read all the data at once. For a 32-bits pairwise load, we do a
+ // simple 64-bits load, for a 128-bits load, we need to call a special
+ // method to read 128-bits atomically.
+ if (op.Size == 2)
+ {
+ Operand value = EmitLoadExclusive(context, address, exclusive, 3);
+
+ Operand valueLow = context.ConvertI64ToI32(value);
+
+ valueLow = context.ZeroExtend32(OperandType.I64, valueLow);
+
+ Operand valueHigh = context.ShiftRightUI(value, Const(32));
+
+ SetIntOrZR(context, op.Rt, valueLow);
+ SetIntOrZR(context, op.Rt2, valueHigh);
+ }
+ else if (op.Size == 3)
+ {
+ Operand value = EmitLoadExclusive(context, address, exclusive, 4);
+
+ Operand valueLow = context.VectorExtract(OperandType.I64, value, 0);
+ Operand valueHigh = context.VectorExtract(OperandType.I64, value, 1);
+
+ SetIntOrZR(context, op.Rt, valueLow);
+ SetIntOrZR(context, op.Rt2, valueHigh);
+ }
+ else
+ {
+ throw new InvalidOperationException($"Invalid load size of {1 << op.Size} bytes.");
+ }
+ }
+ else
+ {
+ // 8, 16, 32 or 64-bits (non-pairwise) load.
+ Operand value = EmitLoadExclusive(context, address, exclusive, op.Size);
+
+ SetIntOrZR(context, op.Rt, value);
+ }
+ }
+
+ public static void Prfm(ArmEmitterContext context)
+ {
+ // Memory Prefetch, execute as no-op.
+ }
+
+ public static void Stlr(ArmEmitterContext context) => EmitStr(context, AccessType.Ordered);
+ public static void Stlxr(ArmEmitterContext context) => EmitStr(context, AccessType.OrderedEx);
+ public static void Stxr(ArmEmitterContext context) => EmitStr(context, AccessType.Exclusive);
+ public static void Stxp(ArmEmitterContext context) => EmitStp(context, AccessType.Exclusive);
+ public static void Stlxp(ArmEmitterContext context) => EmitStp(context, AccessType.OrderedEx);
+
+ private static void EmitStr(ArmEmitterContext context, AccessType accType)
+ {
+ EmitStoreEx(context, accType, pair: false);
+ }
+
+ private static void EmitStp(ArmEmitterContext context, AccessType accType)
+ {
+ EmitStoreEx(context, accType, pair: true);
+ }
+
+ private static void EmitStoreEx(ArmEmitterContext context, AccessType accType, bool pair)
+ {
+ OpCodeMemEx op = (OpCodeMemEx)context.CurrOp;
+
+ bool ordered = (accType & AccessType.Ordered) != 0;
+ bool exclusive = (accType & AccessType.Exclusive) != 0;
+
+ Operand address = context.Copy(GetIntOrSP(context, op.Rn));
+
+ Operand t = GetIntOrZR(context, op.Rt);
+
+ if (pair)
+ {
+ Debug.Assert(op.Size == 2 || op.Size == 3, "Invalid size for pairwise store.");
+
+ Operand t2 = GetIntOrZR(context, op.Rt2);
+
+ Operand value;
+
+ if (op.Size == 2)
+ {
+ value = context.BitwiseOr(t, context.ShiftLeft(t2, Const(32)));
+ }
+ else /* if (op.Size == 3) */
+ {
+ value = context.VectorInsert(context.VectorZero(), t, 0);
+ value = context.VectorInsert(value, t2, 1);
+ }
+
+ EmitStoreExclusive(context, address, value, exclusive, op.Size + 1, op.Rs, a32: false);
+ }
+ else
+ {
+ EmitStoreExclusive(context, address, t, exclusive, op.Size, op.Rs, a32: false);
+ }
+
+ if (ordered)
+ {
+ EmitBarrier(context);
+ }
+ }
+
+ private static void EmitBarrier(ArmEmitterContext context)
+ {
+ context.MemoryBarrier();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Instructions/InstEmitMemoryEx32.cs b/src/ARMeilleure/Instructions/InstEmitMemoryEx32.cs
new file mode 100644
index 00000000..c0b6fc39
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitMemoryEx32.cs
@@ -0,0 +1,237 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitMemoryExHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ public static void Clrex(ArmEmitterContext context)
+ {
+ EmitClearExclusive(context);
+ }
+
+ public static void Csdb(ArmEmitterContext context)
+ {
+ // Execute as no-op.
+ }
+
+ public static void Dmb(ArmEmitterContext context) => EmitBarrier(context);
+
+ public static void Dsb(ArmEmitterContext context) => EmitBarrier(context);
+
+ public static void Ldrex(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, WordSizeLog2, AccessType.LoadZx | AccessType.Exclusive);
+ }
+
+ public static void Ldrexb(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, ByteSizeLog2, AccessType.LoadZx | AccessType.Exclusive);
+ }
+
+ public static void Ldrexd(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, DWordSizeLog2, AccessType.LoadZx | AccessType.Exclusive);
+ }
+
+ public static void Ldrexh(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, HWordSizeLog2, AccessType.LoadZx | AccessType.Exclusive);
+ }
+
+ public static void Lda(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, WordSizeLog2, AccessType.LoadZx | AccessType.Ordered);
+ }
+
+ public static void Ldab(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, ByteSizeLog2, AccessType.LoadZx | AccessType.Ordered);
+ }
+
+ public static void Ldaex(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, WordSizeLog2, AccessType.LoadZx | AccessType.Exclusive | AccessType.Ordered);
+ }
+
+ public static void Ldaexb(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, ByteSizeLog2, AccessType.LoadZx | AccessType.Exclusive | AccessType.Ordered);
+ }
+
+ public static void Ldaexd(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, DWordSizeLog2, AccessType.LoadZx | AccessType.Exclusive | AccessType.Ordered);
+ }
+
+ public static void Ldaexh(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, HWordSizeLog2, AccessType.LoadZx | AccessType.Exclusive | AccessType.Ordered);
+ }
+
+ public static void Ldah(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, HWordSizeLog2, AccessType.LoadZx | AccessType.Ordered);
+ }
+
+ // Stores.
+
+ public static void Strex(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, WordSizeLog2, AccessType.Store | AccessType.Exclusive);
+ }
+
+ public static void Strexb(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, ByteSizeLog2, AccessType.Store | AccessType.Exclusive);
+ }
+
+ public static void Strexd(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, DWordSizeLog2, AccessType.Store | AccessType.Exclusive);
+ }
+
+ public static void Strexh(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, HWordSizeLog2, AccessType.Store | AccessType.Exclusive);
+ }
+
+ public static void Stl(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, WordSizeLog2, AccessType.Store | AccessType.Ordered);
+ }
+
+ public static void Stlb(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, ByteSizeLog2, AccessType.Store | AccessType.Ordered);
+ }
+
+ public static void Stlex(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, WordSizeLog2, AccessType.Store | AccessType.Exclusive | AccessType.Ordered);
+ }
+
+ public static void Stlexb(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, ByteSizeLog2, AccessType.Store | AccessType.Exclusive | AccessType.Ordered);
+ }
+
+ public static void Stlexd(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, DWordSizeLog2, AccessType.Store | AccessType.Exclusive | AccessType.Ordered);
+ }
+
+ public static void Stlexh(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, HWordSizeLog2, AccessType.Store | AccessType.Exclusive | AccessType.Ordered);
+ }
+
+ public static void Stlh(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, HWordSizeLog2, AccessType.Store | AccessType.Ordered);
+ }
+
+ private static void EmitExLoadOrStore(ArmEmitterContext context, int size, AccessType accType)
+ {
+ IOpCode32MemEx op = (IOpCode32MemEx)context.CurrOp;
+
+ Operand address = context.Copy(GetIntA32(context, op.Rn));
+
+ var exclusive = (accType & AccessType.Exclusive) != 0;
+ var ordered = (accType & AccessType.Ordered) != 0;
+
+ if ((accType & AccessType.Load) != 0)
+ {
+ if (ordered)
+ {
+ EmitBarrier(context);
+ }
+
+ if (size == DWordSizeLog2)
+ {
+ // Keep loads atomic - make the call to get the whole region and then decompose it into parts
+ // for the registers.
+
+ Operand value = EmitLoadExclusive(context, address, exclusive, size);
+
+ Operand valueLow = context.ConvertI64ToI32(value);
+
+ valueLow = context.ZeroExtend32(OperandType.I64, valueLow);
+
+ Operand valueHigh = context.ShiftRightUI(value, Const(32));
+
+ Operand lblBigEndian = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag));
+
+ SetIntA32(context, op.Rt, valueLow);
+ SetIntA32(context, op.Rt2, valueHigh);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblBigEndian);
+
+ SetIntA32(context, op.Rt2, valueLow);
+ SetIntA32(context, op.Rt, valueHigh);
+
+ context.MarkLabel(lblEnd);
+ }
+ else
+ {
+ SetIntA32(context, op.Rt, EmitLoadExclusive(context, address, exclusive, size));
+ }
+ }
+ else
+ {
+ if (size == DWordSizeLog2)
+ {
+ // Split the result into 2 words (based on endianness)
+
+ Operand lo = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rt));
+ Operand hi = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rt2));
+
+ Operand lblBigEndian = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag));
+
+ Operand leResult = context.BitwiseOr(lo, context.ShiftLeft(hi, Const(32)));
+ EmitStoreExclusive(context, address, leResult, exclusive, size, op.Rd, a32: true);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblBigEndian);
+
+ Operand beResult = context.BitwiseOr(hi, context.ShiftLeft(lo, Const(32)));
+ EmitStoreExclusive(context, address, beResult, exclusive, size, op.Rd, a32: true);
+
+ context.MarkLabel(lblEnd);
+ }
+ else
+ {
+ Operand value = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rt));
+ EmitStoreExclusive(context, address, value, exclusive, size, op.Rd, a32: true);
+ }
+
+ if (ordered)
+ {
+ EmitBarrier(context);
+ }
+ }
+ }
+
+ private static void EmitBarrier(ArmEmitterContext context)
+ {
+ // Note: This barrier is most likely not necessary, and probably
+ // doesn't make any difference since we need to do a ton of stuff
+ // (software MMU emulation) to read or write anything anyway.
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitMemoryExHelper.cs b/src/ARMeilleure/Instructions/InstEmitMemoryExHelper.cs
new file mode 100644
index 00000000..9a69442a
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitMemoryExHelper.cs
@@ -0,0 +1,174 @@
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static class InstEmitMemoryExHelper
+ {
+ private const int ErgSizeLog2 = 4;
+
+ public static Operand EmitLoadExclusive(ArmEmitterContext context, Operand address, bool exclusive, int size)
+ {
+ if (exclusive)
+ {
+ Operand value;
+
+ if (size == 4)
+ {
+ // Only 128-bit CAS is guaranteed to have a atomic load.
+ Operand physAddr = InstEmitMemoryHelper.EmitPtPointerLoad(context, address, default, write: false, 4);
+
+ Operand zero = context.VectorZero();
+
+ value = context.CompareAndSwap(physAddr, zero, zero);
+ }
+ else
+ {
+ value = InstEmitMemoryHelper.EmitReadIntAligned(context, address, size);
+ }
+
+ Operand arg0 = context.LoadArgument(OperandType.I64, 0);
+
+ Operand exAddrPtr = context.Add(arg0, Const((long)NativeContext.GetExclusiveAddressOffset()));
+ Operand exValuePtr = context.Add(arg0, Const((long)NativeContext.GetExclusiveValueOffset()));
+
+ context.Store(exAddrPtr, context.BitwiseAnd(address, Const(address.Type, GetExclusiveAddressMask())));
+
+ // Make sure the unused higher bits of the value are cleared.
+ if (size < 3)
+ {
+ context.Store(exValuePtr, Const(0UL));
+ }
+ if (size < 4)
+ {
+ context.Store(context.Add(exValuePtr, Const(exValuePtr.Type, 8L)), Const(0UL));
+ }
+
+ // Store the new exclusive value.
+ context.Store(exValuePtr, value);
+
+ return value;
+ }
+ else
+ {
+ return InstEmitMemoryHelper.EmitReadIntAligned(context, address, size);
+ }
+ }
+
+ public static void EmitStoreExclusive(
+ ArmEmitterContext context,
+ Operand address,
+ Operand value,
+ bool exclusive,
+ int size,
+ int rs,
+ bool a32)
+ {
+ if (size < 3)
+ {
+ value = context.ConvertI64ToI32(value);
+ }
+
+ if (exclusive)
+ {
+ // We overwrite one of the register (Rs),
+ // keep a copy of the values to ensure we are working with the correct values.
+ address = context.Copy(address);
+ value = context.Copy(value);
+
+ void SetRs(Operand value)
+ {
+ if (a32)
+ {
+ SetIntA32(context, rs, value);
+ }
+ else
+ {
+ SetIntOrZR(context, rs, value);
+ }
+ }
+
+ Operand arg0 = context.LoadArgument(OperandType.I64, 0);
+
+ Operand exAddrPtr = context.Add(arg0, Const((long)NativeContext.GetExclusiveAddressOffset()));
+ Operand exAddr = context.Load(address.Type, exAddrPtr);
+
+ // STEP 1: Check if we have exclusive access to this memory region. If not, fail and skip store.
+ Operand maskedAddress = context.BitwiseAnd(address, Const(address.Type, GetExclusiveAddressMask()));
+
+ Operand exFailed = context.ICompareNotEqual(exAddr, maskedAddress);
+
+ Operand lblExit = Label();
+
+ SetRs(Const(1));
+
+ context.BranchIfTrue(lblExit, exFailed);
+
+ // STEP 2: We have exclusive access and the address is valid, attempt the store using CAS.
+ Operand physAddr = InstEmitMemoryHelper.EmitPtPointerLoad(context, address, default, write: true, size);
+
+ Operand exValuePtr = context.Add(arg0, Const((long)NativeContext.GetExclusiveValueOffset()));
+ Operand exValue = size switch
+ {
+ 0 => context.Load8(exValuePtr),
+ 1 => context.Load16(exValuePtr),
+ 2 => context.Load(OperandType.I32, exValuePtr),
+ 3 => context.Load(OperandType.I64, exValuePtr),
+ _ => context.Load(OperandType.V128, exValuePtr)
+ };
+
+ Operand currValue = size switch
+ {
+ 0 => context.CompareAndSwap8(physAddr, exValue, value),
+ 1 => context.CompareAndSwap16(physAddr, exValue, value),
+ _ => context.CompareAndSwap(physAddr, exValue, value)
+ };
+
+ // STEP 3: Check if we succeeded by comparing expected and in-memory values.
+ Operand storeFailed;
+
+ if (size == 4)
+ {
+ Operand currValueLow = context.VectorExtract(OperandType.I64, currValue, 0);
+ Operand currValueHigh = context.VectorExtract(OperandType.I64, currValue, 1);
+
+ Operand exValueLow = context.VectorExtract(OperandType.I64, exValue, 0);
+ Operand exValueHigh = context.VectorExtract(OperandType.I64, exValue, 1);
+
+ storeFailed = context.BitwiseOr(
+ context.ICompareNotEqual(currValueLow, exValueLow),
+ context.ICompareNotEqual(currValueHigh, exValueHigh));
+ }
+ else
+ {
+ storeFailed = context.ICompareNotEqual(currValue, exValue);
+ }
+
+ SetRs(storeFailed);
+
+ context.MarkLabel(lblExit);
+ }
+ else
+ {
+ InstEmitMemoryHelper.EmitWriteIntAligned(context, address, value, size);
+ }
+ }
+
+ public static void EmitClearExclusive(ArmEmitterContext context)
+ {
+ Operand arg0 = context.LoadArgument(OperandType.I64, 0);
+
+ Operand exAddrPtr = context.Add(arg0, Const((long)NativeContext.GetExclusiveAddressOffset()));
+
+ // We store ULONG max to force any exclusive address checks to fail,
+ // since this value is not aligned to the ERG mask.
+ context.Store(exAddrPtr, Const(ulong.MaxValue));
+ }
+
+ private static long GetExclusiveAddressMask() => ~((4L << ErgSizeLog2) - 1);
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitMemoryHelper.cs b/src/ARMeilleure/Instructions/InstEmitMemoryHelper.cs
new file mode 100644
index 00000000..f97e395c
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitMemoryHelper.cs
@@ -0,0 +1,648 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Memory;
+using ARMeilleure.Translation;
+using ARMeilleure.Translation.PTC;
+using System;
+using System.Reflection;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static class InstEmitMemoryHelper
+ {
+ private const int PageBits = 12;
+ private const int PageMask = (1 << PageBits) - 1;
+
+ private enum Extension
+ {
+ Zx,
+ Sx32,
+ Sx64
+ }
+
+ public static void EmitLoadZx(ArmEmitterContext context, Operand address, int rt, int size)
+ {
+ EmitLoad(context, address, Extension.Zx, rt, size);
+ }
+
+ public static void EmitLoadSx32(ArmEmitterContext context, Operand address, int rt, int size)
+ {
+ EmitLoad(context, address, Extension.Sx32, rt, size);
+ }
+
+ public static void EmitLoadSx64(ArmEmitterContext context, Operand address, int rt, int size)
+ {
+ EmitLoad(context, address, Extension.Sx64, rt, size);
+ }
+
+ private static void EmitLoad(ArmEmitterContext context, Operand address, Extension ext, int rt, int size)
+ {
+ bool isSimd = IsSimd(context);
+
+ if ((uint)size > (isSimd ? 4 : 3))
+ {
+ throw new ArgumentOutOfRangeException(nameof(size));
+ }
+
+ if (isSimd)
+ {
+ EmitReadVector(context, address, context.VectorZero(), rt, 0, size);
+ }
+ else
+ {
+ EmitReadInt(context, address, rt, size);
+ }
+
+ if (!isSimd && !(context.CurrOp is OpCode32 && rt == State.RegisterAlias.Aarch32Pc))
+ {
+ Operand value = GetInt(context, rt);
+
+ if (ext == Extension.Sx32 || ext == Extension.Sx64)
+ {
+ OperandType destType = ext == Extension.Sx64 ? OperandType.I64 : OperandType.I32;
+
+ switch (size)
+ {
+ case 0: value = context.SignExtend8 (destType, value); break;
+ case 1: value = context.SignExtend16(destType, value); break;
+ case 2: value = context.SignExtend32(destType, value); break;
+ }
+ }
+
+ SetInt(context, rt, value);
+ }
+ }
+
+ public static void EmitLoadSimd(
+ ArmEmitterContext context,
+ Operand address,
+ Operand vector,
+ int rt,
+ int elem,
+ int size)
+ {
+ EmitReadVector(context, address, vector, rt, elem, size);
+ }
+
+ public static void EmitStore(ArmEmitterContext context, Operand address, int rt, int size)
+ {
+ bool isSimd = IsSimd(context);
+
+ if ((uint)size > (isSimd ? 4 : 3))
+ {
+ throw new ArgumentOutOfRangeException(nameof(size));
+ }
+
+ if (isSimd)
+ {
+ EmitWriteVector(context, address, rt, 0, size);
+ }
+ else
+ {
+ EmitWriteInt(context, address, rt, size);
+ }
+ }
+
+ public static void EmitStoreSimd(
+ ArmEmitterContext context,
+ Operand address,
+ int rt,
+ int elem,
+ int size)
+ {
+ EmitWriteVector(context, address, rt, elem, size);
+ }
+
+ private static bool IsSimd(ArmEmitterContext context)
+ {
+ return context.CurrOp is IOpCodeSimd &&
+ !(context.CurrOp is OpCodeSimdMemMs ||
+ context.CurrOp is OpCodeSimdMemSs);
+ }
+
+ public static Operand EmitReadInt(ArmEmitterContext context, Operand address, int size)
+ {
+ Operand temp = context.AllocateLocal(size == 3 ? OperandType.I64 : OperandType.I32);
+
+ Operand lblSlowPath = Label();
+ Operand lblEnd = Label();
+
+ Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath, write: false, size);
+
+ Operand value = default;
+
+ switch (size)
+ {
+ case 0: value = context.Load8 (physAddr); break;
+ case 1: value = context.Load16(physAddr); break;
+ case 2: value = context.Load (OperandType.I32, physAddr); break;
+ case 3: value = context.Load (OperandType.I64, physAddr); break;
+ }
+
+ context.Copy(temp, value);
+
+ if (!context.Memory.Type.IsHostMapped())
+ {
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblSlowPath, BasicBlockFrequency.Cold);
+
+ context.Copy(temp, EmitReadIntFallback(context, address, size));
+
+ context.MarkLabel(lblEnd);
+ }
+
+ return temp;
+ }
+
+ private static void EmitReadInt(ArmEmitterContext context, Operand address, int rt, int size)
+ {
+ Operand lblSlowPath = Label();
+ Operand lblEnd = Label();
+
+ Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath, write: false, size);
+
+ Operand value = default;
+
+ switch (size)
+ {
+ case 0: value = context.Load8 (physAddr); break;
+ case 1: value = context.Load16(physAddr); break;
+ case 2: value = context.Load (OperandType.I32, physAddr); break;
+ case 3: value = context.Load (OperandType.I64, physAddr); break;
+ }
+
+ SetInt(context, rt, value);
+
+ if (!context.Memory.Type.IsHostMapped())
+ {
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblSlowPath, BasicBlockFrequency.Cold);
+
+ EmitReadIntFallback(context, address, rt, size);
+
+ context.MarkLabel(lblEnd);
+ }
+ }
+
+ public static Operand EmitReadIntAligned(ArmEmitterContext context, Operand address, int size)
+ {
+ if ((uint)size > 4)
+ {
+ throw new ArgumentOutOfRangeException(nameof(size));
+ }
+
+ Operand physAddr = EmitPtPointerLoad(context, address, default, write: false, size);
+
+ return size switch
+ {
+ 0 => context.Load8(physAddr),
+ 1 => context.Load16(physAddr),
+ 2 => context.Load(OperandType.I32, physAddr),
+ 3 => context.Load(OperandType.I64, physAddr),
+ _ => context.Load(OperandType.V128, physAddr)
+ };
+ }
+
+ private static void EmitReadVector(
+ ArmEmitterContext context,
+ Operand address,
+ Operand vector,
+ int rt,
+ int elem,
+ int size)
+ {
+ Operand lblSlowPath = Label();
+ Operand lblEnd = Label();
+
+ Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath, write: false, size);
+
+ Operand value = default;
+
+ switch (size)
+ {
+ case 0: value = context.VectorInsert8 (vector, context.Load8(physAddr), elem); break;
+ case 1: value = context.VectorInsert16(vector, context.Load16(physAddr), elem); break;
+ case 2: value = context.VectorInsert (vector, context.Load(OperandType.I32, physAddr), elem); break;
+ case 3: value = context.VectorInsert (vector, context.Load(OperandType.I64, physAddr), elem); break;
+ case 4: value = context.Load (OperandType.V128, physAddr); break;
+ }
+
+ context.Copy(GetVec(rt), value);
+
+ if (!context.Memory.Type.IsHostMapped())
+ {
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblSlowPath, BasicBlockFrequency.Cold);
+
+ EmitReadVectorFallback(context, address, vector, rt, elem, size);
+
+ context.MarkLabel(lblEnd);
+ }
+ }
+
+ private static Operand VectorCreate(ArmEmitterContext context, Operand value)
+ {
+ return context.VectorInsert(context.VectorZero(), value, 0);
+ }
+
+ private static void EmitWriteInt(ArmEmitterContext context, Operand address, int rt, int size)
+ {
+ Operand lblSlowPath = Label();
+ Operand lblEnd = Label();
+
+ Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath, write: true, size);
+
+ Operand value = GetInt(context, rt);
+
+ if (size < 3 && value.Type == OperandType.I64)
+ {
+ value = context.ConvertI64ToI32(value);
+ }
+
+ switch (size)
+ {
+ case 0: context.Store8 (physAddr, value); break;
+ case 1: context.Store16(physAddr, value); break;
+ case 2: context.Store (physAddr, value); break;
+ case 3: context.Store (physAddr, value); break;
+ }
+
+ if (!context.Memory.Type.IsHostMapped())
+ {
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblSlowPath, BasicBlockFrequency.Cold);
+
+ EmitWriteIntFallback(context, address, rt, size);
+
+ context.MarkLabel(lblEnd);
+ }
+ }
+
+ public static void EmitWriteIntAligned(ArmEmitterContext context, Operand address, Operand value, int size)
+ {
+ if ((uint)size > 4)
+ {
+ throw new ArgumentOutOfRangeException(nameof(size));
+ }
+
+ Operand physAddr = EmitPtPointerLoad(context, address, default, write: true, size);
+
+ if (size < 3 && value.Type == OperandType.I64)
+ {
+ value = context.ConvertI64ToI32(value);
+ }
+
+ if (size == 0)
+ {
+ context.Store8(physAddr, value);
+ }
+ else if (size == 1)
+ {
+ context.Store16(physAddr, value);
+ }
+ else
+ {
+ context.Store(physAddr, value);
+ }
+ }
+
+ private static void EmitWriteVector(
+ ArmEmitterContext context,
+ Operand address,
+ int rt,
+ int elem,
+ int size)
+ {
+ Operand lblSlowPath = Label();
+ Operand lblEnd = Label();
+
+ Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath, write: true, size);
+
+ Operand value = GetVec(rt);
+
+ switch (size)
+ {
+ case 0: context.Store8 (physAddr, context.VectorExtract8(value, elem)); break;
+ case 1: context.Store16(physAddr, context.VectorExtract16(value, elem)); break;
+ case 2: context.Store (physAddr, context.VectorExtract(OperandType.I32, value, elem)); break;
+ case 3: context.Store (physAddr, context.VectorExtract(OperandType.I64, value, elem)); break;
+ case 4: context.Store (physAddr, value); break;
+ }
+
+ if (!context.Memory.Type.IsHostMapped())
+ {
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblSlowPath, BasicBlockFrequency.Cold);
+
+ EmitWriteVectorFallback(context, address, rt, elem, size);
+
+ context.MarkLabel(lblEnd);
+ }
+ }
+
+ public static Operand EmitPtPointerLoad(ArmEmitterContext context, Operand address, Operand lblSlowPath, bool write, int size)
+ {
+ if (context.Memory.Type.IsHostMapped())
+ {
+ return EmitHostMappedPointer(context, address);
+ }
+
+ int ptLevelBits = context.Memory.AddressSpaceBits - PageBits;
+ int ptLevelSize = 1 << ptLevelBits;
+ int ptLevelMask = ptLevelSize - 1;
+
+ Operand addrRotated = size != 0 ? context.RotateRight(address, Const(size)) : address;
+ Operand addrShifted = context.ShiftRightUI(addrRotated, Const(PageBits - size));
+
+ Operand pte = !context.HasPtc
+ ? Const(context.Memory.PageTablePointer.ToInt64())
+ : Const(context.Memory.PageTablePointer.ToInt64(), Ptc.PageTableSymbol);
+
+ Operand pteOffset = context.BitwiseAnd(addrShifted, Const(addrShifted.Type, ptLevelMask));
+
+ if (pteOffset.Type == OperandType.I32)
+ {
+ pteOffset = context.ZeroExtend32(OperandType.I64, pteOffset);
+ }
+
+ pte = context.Load(OperandType.I64, context.Add(pte, context.ShiftLeft(pteOffset, Const(3))));
+
+ if (addrShifted.Type == OperandType.I32)
+ {
+ addrShifted = context.ZeroExtend32(OperandType.I64, addrShifted);
+ }
+
+ // If the VA is out of range, or not aligned to the access size, force PTE to 0 by masking it.
+ pte = context.BitwiseAnd(pte, context.ShiftRightSI(context.Add(addrShifted, Const(-(long)ptLevelSize)), Const(63)));
+
+ if (lblSlowPath != default)
+ {
+ if (write)
+ {
+ context.BranchIf(lblSlowPath, pte, Const(0L), Comparison.LessOrEqual);
+ pte = context.BitwiseAnd(pte, Const(0xffffffffffffUL)); // Ignore any software protection bits. (they are still used by C# memory access)
+ }
+ else
+ {
+ pte = context.ShiftLeft(pte, Const(1));
+ context.BranchIf(lblSlowPath, pte, Const(0L), Comparison.LessOrEqual);
+ pte = context.ShiftRightUI(pte, Const(1));
+ }
+ }
+ else
+ {
+ // When no label is provided to jump to a slow path if the address is invalid,
+ // we do the validation ourselves, and throw if needed.
+
+ Operand lblNotWatched = Label();
+
+ // Is the page currently being tracked for read/write? If so we need to call SignalMemoryTracking.
+ context.BranchIf(lblNotWatched, pte, Const(0L), Comparison.GreaterOrEqual, BasicBlockFrequency.Cold);
+
+ // Signal memory tracking. Size here doesn't matter as address is assumed to be size aligned here.
+ context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.SignalMemoryTracking)), address, Const(1UL), Const(write ? 1 : 0));
+ context.MarkLabel(lblNotWatched);
+
+ pte = context.BitwiseAnd(pte, Const(0xffffffffffffUL)); // Ignore any software protection bits. (they are still used by C# memory access)
+
+ Operand lblNonNull = Label();
+
+ // Skip exception if the PTE address is non-null (not zero).
+ context.BranchIfTrue(lblNonNull, pte, BasicBlockFrequency.Cold);
+
+ // The call is not expected to return (it should throw).
+ context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.ThrowInvalidMemoryAccess)), address);
+ context.MarkLabel(lblNonNull);
+ }
+
+ Operand pageOffset = context.BitwiseAnd(address, Const(address.Type, PageMask));
+
+ if (pageOffset.Type == OperandType.I32)
+ {
+ pageOffset = context.ZeroExtend32(OperandType.I64, pageOffset);
+ }
+
+ return context.Add(pte, pageOffset);
+ }
+
+ public static Operand EmitHostMappedPointer(ArmEmitterContext context, Operand address)
+ {
+ if (address.Type == OperandType.I32)
+ {
+ address = context.ZeroExtend32(OperandType.I64, address);
+ }
+
+ if (context.Memory.Type == MemoryManagerType.HostMapped)
+ {
+ Operand mask = Const(ulong.MaxValue >> (64 - context.Memory.AddressSpaceBits));
+ address = context.BitwiseAnd(address, mask);
+ }
+
+ Operand baseAddr = !context.HasPtc
+ ? Const(context.Memory.PageTablePointer.ToInt64())
+ : Const(context.Memory.PageTablePointer.ToInt64(), Ptc.PageTableSymbol);
+
+ return context.Add(baseAddr, address);
+ }
+
+ private static void EmitReadIntFallback(ArmEmitterContext context, Operand address, int rt, int size)
+ {
+ SetInt(context, rt, EmitReadIntFallback(context, address, size));
+ }
+
+ private static Operand EmitReadIntFallback(ArmEmitterContext context, Operand address, int size)
+ {
+ MethodInfo info = null;
+
+ switch (size)
+ {
+ case 0: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadByte)); break;
+ case 1: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt16)); break;
+ case 2: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt32)); break;
+ case 3: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt64)); break;
+ }
+
+ return context.Call(info, address);
+ }
+
+ private static void EmitReadVectorFallback(
+ ArmEmitterContext context,
+ Operand address,
+ Operand vector,
+ int rt,
+ int elem,
+ int size)
+ {
+ MethodInfo info = null;
+
+ switch (size)
+ {
+ case 0: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadByte)); break;
+ case 1: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt16)); break;
+ case 2: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt32)); break;
+ case 3: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt64)); break;
+ case 4: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadVector128)); break;
+ }
+
+ Operand value = context.Call(info, address);
+
+ switch (size)
+ {
+ case 0: value = context.VectorInsert8 (vector, value, elem); break;
+ case 1: value = context.VectorInsert16(vector, value, elem); break;
+ case 2: value = context.VectorInsert (vector, value, elem); break;
+ case 3: value = context.VectorInsert (vector, value, elem); break;
+ }
+
+ context.Copy(GetVec(rt), value);
+ }
+
+ private static void EmitWriteIntFallback(ArmEmitterContext context, Operand address, int rt, int size)
+ {
+ MethodInfo info = null;
+
+ switch (size)
+ {
+ case 0: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteByte)); break;
+ case 1: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt16)); break;
+ case 2: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt32)); break;
+ case 3: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt64)); break;
+ }
+
+ Operand value = GetInt(context, rt);
+
+ if (size < 3 && value.Type == OperandType.I64)
+ {
+ value = context.ConvertI64ToI32(value);
+ }
+
+ context.Call(info, address, value);
+ }
+
+ private static void EmitWriteVectorFallback(
+ ArmEmitterContext context,
+ Operand address,
+ int rt,
+ int elem,
+ int size)
+ {
+ MethodInfo info = null;
+
+ switch (size)
+ {
+ case 0: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteByte)); break;
+ case 1: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt16)); break;
+ case 2: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt32)); break;
+ case 3: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt64)); break;
+ case 4: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteVector128)); break;
+ }
+
+ Operand value = default;
+
+ if (size < 4)
+ {
+ switch (size)
+ {
+ case 0: value = context.VectorExtract8 (GetVec(rt), elem); break;
+ case 1: value = context.VectorExtract16(GetVec(rt), elem); break;
+ case 2: value = context.VectorExtract (OperandType.I32, GetVec(rt), elem); break;
+ case 3: value = context.VectorExtract (OperandType.I64, GetVec(rt), elem); break;
+ }
+ }
+ else
+ {
+ value = GetVec(rt);
+ }
+
+ context.Call(info, address, value);
+ }
+
+ private static Operand GetInt(ArmEmitterContext context, int rt)
+ {
+ return context.CurrOp is OpCode32 ? GetIntA32(context, rt) : GetIntOrZR(context, rt);
+ }
+
+ private static void SetInt(ArmEmitterContext context, int rt, Operand value)
+ {
+ if (context.CurrOp is OpCode32)
+ {
+ SetIntA32(context, rt, value);
+ }
+ else
+ {
+ SetIntOrZR(context, rt, value);
+ }
+ }
+
+ // ARM32 helpers.
+ public static Operand GetMemM(ArmEmitterContext context, bool setCarry = true)
+ {
+ switch (context.CurrOp)
+ {
+ case IOpCode32MemRsImm op: return GetMShiftedByImmediate(context, op, setCarry);
+
+ case IOpCode32MemReg op: return GetIntA32(context, op.Rm);
+
+ case IOpCode32Mem op: return Const(op.Immediate);
+
+ case OpCode32SimdMemImm op: return Const(op.Immediate);
+
+ default: throw InvalidOpCodeType(context.CurrOp);
+ }
+ }
+
+ private static Exception InvalidOpCodeType(OpCode opCode)
+ {
+ return new InvalidOperationException($"Invalid OpCode type \"{opCode?.GetType().Name ?? "null"}\".");
+ }
+
+ public static Operand GetMShiftedByImmediate(ArmEmitterContext context, IOpCode32MemRsImm op, bool setCarry)
+ {
+ Operand m = GetIntA32(context, op.Rm);
+
+ int shift = op.Immediate;
+
+ if (shift == 0)
+ {
+ switch (op.ShiftType)
+ {
+ case ShiftType.Lsr: shift = 32; break;
+ case ShiftType.Asr: shift = 32; break;
+ case ShiftType.Ror: shift = 1; break;
+ }
+ }
+
+ if (shift != 0)
+ {
+ setCarry &= false;
+
+ switch (op.ShiftType)
+ {
+ case ShiftType.Lsl: m = InstEmitAluHelper.GetLslC(context, m, setCarry, shift); break;
+ case ShiftType.Lsr: m = InstEmitAluHelper.GetLsrC(context, m, setCarry, shift); break;
+ case ShiftType.Asr: m = InstEmitAluHelper.GetAsrC(context, m, setCarry, shift); break;
+ case ShiftType.Ror:
+ if (op.Immediate != 0)
+ {
+ m = InstEmitAluHelper.GetRorC(context, m, setCarry, shift);
+ }
+ else
+ {
+ m = InstEmitAluHelper.GetRrxC(context, m, setCarry);
+ }
+ break;
+ }
+ }
+
+ return m;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitMove.cs b/src/ARMeilleure/Instructions/InstEmitMove.cs
new file mode 100644
index 00000000..d551bf2d
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitMove.cs
@@ -0,0 +1,41 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Movk(ArmEmitterContext context)
+ {
+ OpCodeMov op = (OpCodeMov)context.CurrOp;
+
+ OperandType type = op.GetOperandType();
+
+ Operand res = GetIntOrZR(context, op.Rd);
+
+ res = context.BitwiseAnd(res, Const(type, ~(0xffffL << op.Bit)));
+
+ res = context.BitwiseOr(res, Const(type, op.Immediate));
+
+ SetIntOrZR(context, op.Rd, res);
+ }
+
+ public static void Movn(ArmEmitterContext context)
+ {
+ OpCodeMov op = (OpCodeMov)context.CurrOp;
+
+ SetIntOrZR(context, op.Rd, Const(op.GetOperandType(), ~op.Immediate));
+ }
+
+ public static void Movz(ArmEmitterContext context)
+ {
+ OpCodeMov op = (OpCodeMov)context.CurrOp;
+
+ SetIntOrZR(context, op.Rd, Const(op.GetOperandType(), op.Immediate));
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Instructions/InstEmitMul.cs b/src/ARMeilleure/Instructions/InstEmitMul.cs
new file mode 100644
index 00000000..65d11b30
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitMul.cs
@@ -0,0 +1,100 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Madd(ArmEmitterContext context) => EmitMul(context, isAdd: true);
+ public static void Msub(ArmEmitterContext context) => EmitMul(context, isAdd: false);
+
+ private static void EmitMul(ArmEmitterContext context, bool isAdd)
+ {
+ OpCodeMul op = (OpCodeMul)context.CurrOp;
+
+ Operand a = GetIntOrZR(context, op.Ra);
+ Operand n = GetIntOrZR(context, op.Rn);
+ Operand m = GetIntOrZR(context, op.Rm);
+
+ Operand res = context.Multiply(n, m);
+
+ res = isAdd ? context.Add(a, res) : context.Subtract(a, res);
+
+ SetIntOrZR(context, op.Rd, res);
+ }
+
+ public static void Smaddl(ArmEmitterContext context) => EmitMull(context, MullFlags.SignedAdd);
+ public static void Smsubl(ArmEmitterContext context) => EmitMull(context, MullFlags.SignedSubtract);
+ public static void Umaddl(ArmEmitterContext context) => EmitMull(context, MullFlags.Add);
+ public static void Umsubl(ArmEmitterContext context) => EmitMull(context, MullFlags.Subtract);
+
+ [Flags]
+ private enum MullFlags
+ {
+ Subtract = 0,
+ Add = 1 << 0,
+ Signed = 1 << 1,
+
+ SignedAdd = Signed | Add,
+ SignedSubtract = Signed | Subtract
+ }
+
+ private static void EmitMull(ArmEmitterContext context, MullFlags flags)
+ {
+ OpCodeMul op = (OpCodeMul)context.CurrOp;
+
+ Operand GetExtendedRegister32(int index)
+ {
+ Operand value = GetIntOrZR(context, index);
+
+ if ((flags & MullFlags.Signed) != 0)
+ {
+ return context.SignExtend32(value.Type, value);
+ }
+ else
+ {
+ return context.ZeroExtend32(value.Type, value);
+ }
+ }
+
+ Operand a = GetIntOrZR(context, op.Ra);
+
+ Operand n = GetExtendedRegister32(op.Rn);
+ Operand m = GetExtendedRegister32(op.Rm);
+
+ Operand res = context.Multiply(n, m);
+
+ res = (flags & MullFlags.Add) != 0 ? context.Add(a, res) : context.Subtract(a, res);
+
+ SetIntOrZR(context, op.Rd, res);
+ }
+
+ public static void Smulh(ArmEmitterContext context)
+ {
+ OpCodeMul op = (OpCodeMul)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+ Operand m = GetIntOrZR(context, op.Rm);
+
+ Operand d = context.Multiply64HighSI(n, m);
+
+ SetIntOrZR(context, op.Rd, d);
+ }
+
+ public static void Umulh(ArmEmitterContext context)
+ {
+ OpCodeMul op = (OpCodeMul)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+ Operand m = GetIntOrZR(context, op.Rm);
+
+ Operand d = context.Multiply64HighUI(n, m);
+
+ SetIntOrZR(context, op.Rd, d);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Instructions/InstEmitMul32.cs b/src/ARMeilleure/Instructions/InstEmitMul32.cs
new file mode 100644
index 00000000..0822f92c
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitMul32.cs
@@ -0,0 +1,379 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.Instructions.InstEmitAluHelper;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ [Flags]
+ private enum MullFlags
+ {
+ Subtract = 1,
+ Add = 1 << 1,
+ Signed = 1 << 2,
+
+ SignedAdd = Signed | Add,
+ SignedSubtract = Signed | Subtract
+ }
+
+ public static void Mla(ArmEmitterContext context)
+ {
+ IOpCode32AluMla op = (IOpCode32AluMla)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+ Operand a = GetIntA32(context, op.Ra);
+
+ Operand res = context.Add(a, context.Multiply(n, m));
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Mls(ArmEmitterContext context)
+ {
+ IOpCode32AluMla op = (IOpCode32AluMla)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+ Operand a = GetIntA32(context, op.Ra);
+
+ Operand res = context.Subtract(a, context.Multiply(n, m));
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Smmla(ArmEmitterContext context)
+ {
+ EmitSmmul(context, MullFlags.SignedAdd);
+ }
+
+ public static void Smmls(ArmEmitterContext context)
+ {
+ EmitSmmul(context, MullFlags.SignedSubtract);
+ }
+
+ public static void Smmul(ArmEmitterContext context)
+ {
+ EmitSmmul(context, MullFlags.Signed);
+ }
+
+ private static void EmitSmmul(ArmEmitterContext context, MullFlags flags)
+ {
+ IOpCode32AluMla op = (IOpCode32AluMla)context.CurrOp;
+
+ Operand n = context.SignExtend32(OperandType.I64, GetIntA32(context, op.Rn));
+ Operand m = context.SignExtend32(OperandType.I64, GetIntA32(context, op.Rm));
+
+ Operand res = context.Multiply(n, m);
+
+ if (flags.HasFlag(MullFlags.Add) && op.Ra != 0xf)
+ {
+ res = context.Add(context.ShiftLeft(context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Ra)), Const(32)), res);
+ }
+ else if (flags.HasFlag(MullFlags.Subtract))
+ {
+ res = context.Subtract(context.ShiftLeft(context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Ra)), Const(32)), res);
+ }
+
+ if (op.R)
+ {
+ res = context.Add(res, Const(0x80000000L));
+ }
+
+ Operand hi = context.ConvertI64ToI32(context.ShiftRightSI(res, Const(32)));
+
+ EmitGenericAluStoreA32(context, op.Rd, false, hi);
+ }
+
+ public static void Smla__(ArmEmitterContext context)
+ {
+ IOpCode32AluMla op = (IOpCode32AluMla)context.CurrOp;
+
+ Operand n = GetIntA32(context, op.Rn);
+ Operand m = GetIntA32(context, op.Rm);
+ Operand a = GetIntA32(context, op.Ra);
+
+ if (op.NHigh)
+ {
+ n = context.SignExtend16(OperandType.I64, context.ShiftRightUI(n, Const(16)));
+ }
+ else
+ {
+ n = context.SignExtend16(OperandType.I64, n);
+ }
+
+ if (op.MHigh)
+ {
+ m = context.SignExtend16(OperandType.I64, context.ShiftRightUI(m, Const(16)));
+ }
+ else
+ {
+ m = context.SignExtend16(OperandType.I64, m);
+ }
+
+ Operand res = context.Multiply(n, m);
+
+ Operand toAdd = context.SignExtend32(OperandType.I64, a);
+ res = context.Add(res, toAdd);
+ Operand q = context.ICompareNotEqual(res, context.SignExtend32(OperandType.I64, res));
+ res = context.ConvertI64ToI32(res);
+
+ UpdateQFlag(context, q);
+
+ EmitGenericAluStoreA32(context, op.Rd, false, res);
+ }
+
+ public static void Smlal(ArmEmitterContext context)
+ {
+ EmitMlal(context, true);
+ }
+
+ public static void Smlal__(ArmEmitterContext context)
+ {
+ IOpCode32AluUmull op = (IOpCode32AluUmull)context.CurrOp;
+
+ Operand n = GetIntA32(context, op.Rn);
+ Operand m = GetIntA32(context, op.Rm);
+
+ if (op.NHigh)
+ {
+ n = context.SignExtend16(OperandType.I64, context.ShiftRightUI(n, Const(16)));
+ }
+ else
+ {
+ n = context.SignExtend16(OperandType.I64, n);
+ }
+
+ if (op.MHigh)
+ {
+ m = context.SignExtend16(OperandType.I64, context.ShiftRightUI(m, Const(16)));
+ }
+ else
+ {
+ m = context.SignExtend16(OperandType.I64, m);
+ }
+
+ Operand res = context.Multiply(n, m);
+
+ Operand toAdd = context.ShiftLeft(context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.RdHi)), Const(32));
+ toAdd = context.BitwiseOr(toAdd, context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.RdLo)));
+ res = context.Add(res, toAdd);
+
+ Operand hi = context.ConvertI64ToI32(context.ShiftRightUI(res, Const(32)));
+ Operand lo = context.ConvertI64ToI32(res);
+
+ EmitGenericAluStoreA32(context, op.RdHi, false, hi);
+ EmitGenericAluStoreA32(context, op.RdLo, false, lo);
+ }
+
+ public static void Smlaw_(ArmEmitterContext context)
+ {
+ IOpCode32AluMla op = (IOpCode32AluMla)context.CurrOp;
+
+ Operand n = GetIntA32(context, op.Rn);
+ Operand m = GetIntA32(context, op.Rm);
+ Operand a = GetIntA32(context, op.Ra);
+
+ if (op.MHigh)
+ {
+ m = context.SignExtend16(OperandType.I64, context.ShiftRightUI(m, Const(16)));
+ }
+ else
+ {
+ m = context.SignExtend16(OperandType.I64, m);
+ }
+
+ Operand res = context.Multiply(context.SignExtend32(OperandType.I64, n), m);
+
+ Operand toAdd = context.ShiftLeft(context.SignExtend32(OperandType.I64, a), Const(16));
+ res = context.Add(res, toAdd);
+ res = context.ShiftRightSI(res, Const(16));
+ Operand q = context.ICompareNotEqual(res, context.SignExtend32(OperandType.I64, res));
+ res = context.ConvertI64ToI32(res);
+
+ UpdateQFlag(context, q);
+
+ EmitGenericAluStoreA32(context, op.Rd, false, res);
+ }
+
+ public static void Smul__(ArmEmitterContext context)
+ {
+ IOpCode32AluMla op = (IOpCode32AluMla)context.CurrOp;
+
+ Operand n = GetIntA32(context, op.Rn);
+ Operand m = GetIntA32(context, op.Rm);
+
+ if (op.NHigh)
+ {
+ n = context.ShiftRightSI(n, Const(16));
+ }
+ else
+ {
+ n = context.SignExtend16(OperandType.I32, n);
+ }
+
+ if (op.MHigh)
+ {
+ m = context.ShiftRightSI(m, Const(16));
+ }
+ else
+ {
+ m = context.SignExtend16(OperandType.I32, m);
+ }
+
+ Operand res = context.Multiply(n, m);
+
+ EmitGenericAluStoreA32(context, op.Rd, false, res);
+ }
+
+ public static void Smull(ArmEmitterContext context)
+ {
+ IOpCode32AluUmull op = (IOpCode32AluUmull)context.CurrOp;
+
+ Operand n = context.SignExtend32(OperandType.I64, GetIntA32(context, op.Rn));
+ Operand m = context.SignExtend32(OperandType.I64, GetIntA32(context, op.Rm));
+
+ Operand res = context.Multiply(n, m);
+
+ Operand hi = context.ConvertI64ToI32(context.ShiftRightUI(res, Const(32)));
+ Operand lo = context.ConvertI64ToI32(res);
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+ }
+
+ EmitGenericAluStoreA32(context, op.RdHi, ShouldSetFlags(context), hi);
+ EmitGenericAluStoreA32(context, op.RdLo, ShouldSetFlags(context), lo);
+ }
+
+ public static void Smulw_(ArmEmitterContext context)
+ {
+ IOpCode32AluMla op = (IOpCode32AluMla)context.CurrOp;
+
+ Operand n = GetIntA32(context, op.Rn);
+ Operand m = GetIntA32(context, op.Rm);
+
+ if (op.MHigh)
+ {
+ m = context.SignExtend16(OperandType.I64, context.ShiftRightUI(m, Const(16)));
+ }
+ else
+ {
+ m = context.SignExtend16(OperandType.I64, m);
+ }
+
+ Operand res = context.Multiply(context.SignExtend32(OperandType.I64, n), m);
+
+ res = context.ShiftRightUI(res, Const(16));
+ res = context.ConvertI64ToI32(res);
+
+ EmitGenericAluStoreA32(context, op.Rd, false, res);
+ }
+
+ public static void Umaal(ArmEmitterContext context)
+ {
+ IOpCode32AluUmull op = (IOpCode32AluUmull)context.CurrOp;
+
+ Operand n = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rn));
+ Operand m = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rm));
+ Operand dHi = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.RdHi));
+ Operand dLo = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.RdLo));
+
+ Operand res = context.Multiply(n, m);
+ res = context.Add(res, dHi);
+ res = context.Add(res, dLo);
+
+ Operand hi = context.ConvertI64ToI32(context.ShiftRightUI(res, Const(32)));
+ Operand lo = context.ConvertI64ToI32(res);
+
+ EmitGenericAluStoreA32(context, op.RdHi, false, hi);
+ EmitGenericAluStoreA32(context, op.RdLo, false, lo);
+ }
+
+ public static void Umlal(ArmEmitterContext context)
+ {
+ EmitMlal(context, false);
+ }
+
+ public static void Umull(ArmEmitterContext context)
+ {
+ IOpCode32AluUmull op = (IOpCode32AluUmull)context.CurrOp;
+
+ Operand n = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rn));
+ Operand m = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rm));
+
+ Operand res = context.Multiply(n, m);
+
+ Operand hi = context.ConvertI64ToI32(context.ShiftRightUI(res, Const(32)));
+ Operand lo = context.ConvertI64ToI32(res);
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+ }
+
+ EmitGenericAluStoreA32(context, op.RdHi, ShouldSetFlags(context), hi);
+ EmitGenericAluStoreA32(context, op.RdLo, ShouldSetFlags(context), lo);
+ }
+
+ private static void EmitMlal(ArmEmitterContext context, bool signed)
+ {
+ IOpCode32AluUmull op = (IOpCode32AluUmull)context.CurrOp;
+
+ Operand n = GetIntA32(context, op.Rn);
+ Operand m = GetIntA32(context, op.Rm);
+
+ if (signed)
+ {
+ n = context.SignExtend32(OperandType.I64, n);
+ m = context.SignExtend32(OperandType.I64, m);
+ }
+ else
+ {
+ n = context.ZeroExtend32(OperandType.I64, n);
+ m = context.ZeroExtend32(OperandType.I64, m);
+ }
+
+ Operand res = context.Multiply(n, m);
+
+ Operand toAdd = context.ShiftLeft(context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.RdHi)), Const(32));
+ toAdd = context.BitwiseOr(toAdd, context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.RdLo)));
+ res = context.Add(res, toAdd);
+
+ Operand hi = context.ConvertI64ToI32(context.ShiftRightUI(res, Const(32)));
+ Operand lo = context.ConvertI64ToI32(res);
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+ }
+
+ EmitGenericAluStoreA32(context, op.RdHi, ShouldSetFlags(context), hi);
+ EmitGenericAluStoreA32(context, op.RdLo, ShouldSetFlags(context), lo);
+ }
+
+ private static void UpdateQFlag(ArmEmitterContext context, Operand q)
+ {
+ Operand lblSkipSetQ = Label();
+
+ context.BranchIfFalse(lblSkipSetQ, q);
+
+ SetFlag(context, PState.QFlag, Const(1));
+
+ context.MarkLabel(lblSkipSetQ);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs b/src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
new file mode 100644
index 00000000..7e7f26b1
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
@@ -0,0 +1,5224 @@
+// https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h
+// https://www.agner.org/optimize/#vectorclass @ vectori128.h
+
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper32;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ using Func2I = Func<Operand, Operand, Operand>;
+
+ static partial class InstEmit
+ {
+ public static void Abs_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOp(context, Intrinsic.Arm64AbsS);
+ }
+ else
+ {
+ EmitScalarUnaryOpSx(context, (op1) => EmitAbs(context, op1));
+ }
+ }
+
+ public static void Abs_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64AbsV);
+ }
+ else
+ {
+ EmitVectorUnaryOpSx(context, (op1) => EmitAbs(context, op1));
+ }
+ }
+
+ public static void Add_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOp(context, Intrinsic.Arm64AddS);
+ }
+ else
+ {
+ EmitScalarBinaryOpZx(context, (op1, op2) => context.Add(op1, op2));
+ }
+ }
+
+ public static void Add_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64AddV);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(addInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) => context.Add(op1, op2));
+ }
+ }
+
+ public static void Addhn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64AddhnV);
+ }
+ else
+ {
+ EmitHighNarrow(context, (op1, op2) => context.Add(op1, op2), round: false);
+ }
+ }
+
+ public static void Addp_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOp(context, Intrinsic.Arm64AddpS);
+ }
+ else
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand ne0 = EmitVectorExtractZx(context, op.Rn, 0, op.Size);
+ Operand ne1 = EmitVectorExtractZx(context, op.Rn, 1, op.Size);
+
+ Operand res = context.Add(ne0, ne1);
+
+ context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), res, 0, op.Size));
+ }
+ }
+
+ public static void Addp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64AddpV);
+ }
+ else if (Optimizations.UseSsse3)
+ {
+ EmitSsse3VectorPairwiseOp(context, X86PaddInstruction);
+ }
+ else
+ {
+ EmitVectorPairwiseOpZx(context, (op1, op2) => context.Add(op1, op2));
+ }
+ }
+
+ public static void Addv_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64AddvV);
+ }
+ else
+ {
+ EmitVectorAcrossVectorOpZx(context, (op1, op2) => context.Add(op1, op2));
+ }
+ }
+
+ public static void Cls_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64ClsV);
+ }
+ else
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ int eSize = 8 << op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+
+ Operand de = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.CountLeadingSigns)), ne, Const(eSize));
+
+ res = EmitVectorInsert(context, res, de, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ public static void Clz_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64ClzV);
+ }
+ else
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int eSize = 8 << op.Size;
+
+ Operand res = eSize switch {
+ 8 => Clz_V_I8 (context, GetVec(op.Rn)),
+ 16 => Clz_V_I16(context, GetVec(op.Rn)),
+ 32 => Clz_V_I32(context, GetVec(op.Rn)),
+ _ => default
+ };
+
+ if (res != default)
+ {
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+ }
+ else
+ {
+ int elems = op.GetBytesCount() >> op.Size;
+
+ res = context.VectorZero();
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+
+ Operand de = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.CountLeadingZeros)), ne, Const(eSize));
+
+ res = EmitVectorInsert(context, res, de, index, op.Size);
+ }
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ private static Operand Clz_V_I8(ArmEmitterContext context, Operand arg)
+ {
+ if (!Optimizations.UseSsse3)
+ {
+ return default;
+ }
+
+ // CLZ nibble table.
+ Operand clzTable = X86GetScalar(context, 0x01_01_01_01_02_02_03_04);
+
+ Operand maskLow = X86GetAllElements(context, 0x0f_0f_0f_0f);
+ Operand c04 = X86GetAllElements(context, 0x04_04_04_04);
+
+ // CLZ of low 4 bits of elements in arg.
+ Operand loClz = context.AddIntrinsic(Intrinsic.X86Pshufb, clzTable, arg);
+
+ // Get the high 4 bits of elements in arg.
+ Operand hiArg = context.AddIntrinsic(Intrinsic.X86Psrlw, arg, Const(4));
+ hiArg = context.AddIntrinsic(Intrinsic.X86Pand, hiArg, maskLow);
+
+ // CLZ of high 4 bits of elements in arg.
+ Operand hiClz = context.AddIntrinsic(Intrinsic.X86Pshufb, clzTable, hiArg);
+
+ // If high 4 bits are not all zero, we discard the CLZ of the low 4 bits.
+ Operand mask = context.AddIntrinsic(Intrinsic.X86Pcmpeqb, hiClz, c04);
+ loClz = context.AddIntrinsic(Intrinsic.X86Pand, loClz, mask);
+
+ return context.AddIntrinsic(Intrinsic.X86Paddb, loClz, hiClz);
+ }
+
+ private static Operand Clz_V_I16(ArmEmitterContext context, Operand arg)
+ {
+ if (!Optimizations.UseSsse3)
+ {
+ return default;
+ }
+
+ Operand maskSwap = X86GetElements(context, 0x80_0f_80_0d_80_0b_80_09, 0x80_07_80_05_80_03_80_01);
+ Operand maskLow = X86GetAllElements(context, 0x00ff_00ff);
+ Operand c0008 = X86GetAllElements(context, 0x0008_0008);
+
+ // CLZ pair of high 8 and low 8 bits of elements in arg.
+ Operand hiloClz = Clz_V_I8(context, arg);
+ // Get CLZ of low 8 bits in each pair.
+ Operand loClz = context.AddIntrinsic(Intrinsic.X86Pand, hiloClz, maskLow);
+ // Get CLZ of high 8 bits in each pair.
+ Operand hiClz = context.AddIntrinsic(Intrinsic.X86Pshufb, hiloClz, maskSwap);
+
+ // If high 8 bits are not all zero, we discard the CLZ of the low 8 bits.
+ Operand mask = context.AddIntrinsic(Intrinsic.X86Pcmpeqw, hiClz, c0008);
+ loClz = context.AddIntrinsic(Intrinsic.X86Pand, loClz, mask);
+
+ return context.AddIntrinsic(Intrinsic.X86Paddw, loClz, hiClz);
+ }
+
+ private static Operand Clz_V_I32(ArmEmitterContext context, Operand arg)
+ {
+ // TODO: Use vplzcntd when AVX-512 is supported.
+ if (!Optimizations.UseSse2)
+ {
+ return default;
+ }
+
+ Operand AddVectorI32(Operand op0, Operand op1) => context.AddIntrinsic(Intrinsic.X86Paddd, op0, op1);
+ Operand SubVectorI32(Operand op0, Operand op1) => context.AddIntrinsic(Intrinsic.X86Psubd, op0, op1);
+ Operand ShiftRightVectorUI32(Operand op0, int imm8) => context.AddIntrinsic(Intrinsic.X86Psrld, op0, Const(imm8));
+ Operand OrVector(Operand op0, Operand op1) => context.AddIntrinsic(Intrinsic.X86Por, op0, op1);
+ Operand AndVector(Operand op0, Operand op1) => context.AddIntrinsic(Intrinsic.X86Pand, op0, op1);
+ Operand NotVector(Operand op0) => context.AddIntrinsic(Intrinsic.X86Pandn, op0, context.VectorOne());
+
+ Operand c55555555 = X86GetAllElements(context, 0x55555555);
+ Operand c33333333 = X86GetAllElements(context, 0x33333333);
+ Operand c0f0f0f0f = X86GetAllElements(context, 0x0f0f0f0f);
+ Operand c0000003f = X86GetAllElements(context, 0x0000003f);
+
+ Operand tmp0;
+ Operand tmp1;
+ Operand res;
+
+ // Set all bits after highest set bit to 1.
+ res = OrVector(ShiftRightVectorUI32(arg, 1), arg);
+ res = OrVector(ShiftRightVectorUI32(res, 2), res);
+ res = OrVector(ShiftRightVectorUI32(res, 4), res);
+ res = OrVector(ShiftRightVectorUI32(res, 8), res);
+ res = OrVector(ShiftRightVectorUI32(res, 16), res);
+
+ // Make leading 0s into leading 1s.
+ res = NotVector(res);
+
+ // Count leading 1s, which is the population count.
+ tmp0 = ShiftRightVectorUI32(res, 1);
+ tmp0 = AndVector(tmp0, c55555555);
+ res = SubVectorI32(res, tmp0);
+
+ tmp0 = ShiftRightVectorUI32(res, 2);
+ tmp0 = AndVector(tmp0, c33333333);
+ tmp1 = AndVector(res, c33333333);
+ res = AddVectorI32(tmp0, tmp1);
+
+ tmp0 = ShiftRightVectorUI32(res, 4);
+ tmp0 = AddVectorI32(tmp0, res);
+ res = AndVector(tmp0, c0f0f0f0f);
+
+ tmp0 = ShiftRightVectorUI32(res, 8);
+ res = AddVectorI32(tmp0, res);
+
+ tmp0 = ShiftRightVectorUI32(res, 16);
+ res = AddVectorI32(tmp0, res);
+
+ res = AndVector(res, c0000003f);
+
+ return res;
+ }
+
+ public static void Cnt_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64CntV);
+ }
+ else
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.RegisterSize == RegisterSize.Simd128 ? 16 : 8;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, 0);
+
+ Operand de;
+
+ if (Optimizations.UsePopCnt)
+ {
+ de = context.AddIntrinsicLong(Intrinsic.X86Popcnt, ne);
+ }
+ else
+ {
+ de = EmitCountSetBits8(context, ne);
+ }
+
+ res = EmitVectorInsert(context, res, de, index, 0);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ public static void Fabd_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FabdS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Subss, GetVec(op.Rn), GetVec(op.Rm));
+
+ res = EmitFloatAbs(context, res, true, false);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Subsd, GetVec(op.Rn), GetVec(op.Rm));
+
+ res = EmitFloatAbs(context, res, false, false);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub), op1, op2);
+
+ return EmitUnaryMathCall(context, nameof(Math.Abs), res);
+ });
+ }
+ }
+
+ public static void Fabd_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FabdV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Subps, GetVec(op.Rn), GetVec(op.Rm));
+
+ res = EmitFloatAbs(context, res, true, true);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Subpd, GetVec(op.Rn), GetVec(op.Rm));
+
+ res = EmitFloatAbs(context, res, false, true);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub), op1, op2);
+
+ return EmitUnaryMathCall(context, nameof(Math.Abs), res);
+ });
+ }
+ }
+
+ public static void Fabs_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FabsS);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ if (op.Size == 0)
+ {
+ Operand res = EmitFloatAbs(context, GetVec(op.Rn), true, false);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+ }
+ else /* if (op.Size == 1) */
+ {
+ Operand res = EmitFloatAbs(context, GetVec(op.Rn), false, false);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitUnaryMathCall(context, nameof(Math.Abs), op1);
+ });
+ }
+ }
+
+ public static void Fabs_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FabsV);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand res = EmitFloatAbs(context, GetVec(op.Rn), true, true);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand res = EmitFloatAbs(context, GetVec(op.Rn), false, true);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitUnaryMathCall(context, nameof(Math.Abs), op1);
+ });
+ }
+ }
+
+ public static void Fadd_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FaddS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarBinaryOpF(context, Intrinsic.X86Addss, Intrinsic.X86Addsd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) => context.Add(op1, op2));
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), op1, op2);
+ });
+ }
+ }
+
+ public static void Fadd_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FaddV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpF(context, Intrinsic.X86Addps, Intrinsic.X86Addpd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) => context.Add(op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), op1, op2);
+ });
+ }
+ }
+
+ public static void Faddp_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FaddpS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse3)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ if ((op.Size & 1) == 0)
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Haddps, GetVec(op.Rn), GetVec(op.Rn));
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+ }
+ else /* if ((op.Size & 1) == 1) */
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Haddpd, GetVec(op.Rn), GetVec(op.Rn));
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), op1, op2);
+ });
+ }
+ }
+
+ public static void Faddp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FaddpV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse2VectorPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+
+ Intrinsic addInst = (op.Size & 1) == 0 ? Intrinsic.X86Addps : Intrinsic.X86Addpd;
+
+ return context.AddIntrinsic(addInst, op1, op2);
+ }, scalar: false, op1, op2);
+ });
+ }
+ else
+ {
+ EmitVectorPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), op1, op2);
+ });
+ }
+ }
+
+ public static void Fdiv_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FdivS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarBinaryOpF(context, Intrinsic.X86Divss, Intrinsic.X86Divsd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) => context.Divide(op1, op2));
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPDiv), op1, op2);
+ });
+ }
+ }
+
+ public static void Fdiv_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FdivV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpF(context, Intrinsic.X86Divps, Intrinsic.X86Divpd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) => context.Divide(op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPDiv), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmadd_S(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarTernaryOpF(context, Intrinsic.Arm64FmaddS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand a = GetVec(op.Ra);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res;
+
+ if (op.Size == 0)
+ {
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfmadd231ss, a, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Addss, a, res);
+ }
+
+ context.Copy(d, context.VectorZeroUpper96(res));
+ }
+ else /* if (op.Size == 1) */
+ {
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfmadd231sd, a, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Addsd, a, res);
+ }
+
+ context.Copy(d, context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarTernaryRaOpF(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Fmax_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FmaxS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
+ }, scalar: true);
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMax), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmax_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmaxV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
+ }, scalar: false);
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMax), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmaxnm_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FmaxnmS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse41MaxMinNumOpF(context, isMaxNum: true, scalar: true);
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMaxNum), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmaxnm_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmaxnmV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse41MaxMinNumOpF(context, isMaxNum: true, scalar: false);
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMaxNum), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmaxnmp_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FmaxnmpS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse2ScalarPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSse41MaxMinNumOpF(context, isMaxNum: true, scalar: true, op1, op2);
+ });
+ }
+ else
+ {
+ EmitScalarPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMaxNum), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmaxnmp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmaxnmpV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse2VectorPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSse41MaxMinNumOpF(context, isMaxNum: true, scalar: false, op1, op2);
+ });
+ }
+ else
+ {
+ EmitVectorPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMaxNum), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmaxnmv_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FmaxnmvV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse2VectorAcrossVectorOpF(context, (op1, op2) =>
+ {
+ return EmitSse41MaxMinNumOpF(context, isMaxNum: true, scalar: false, op1, op2);
+ });
+ }
+ else
+ {
+ EmitVectorAcrossVectorOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMaxNum), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmaxp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmaxpV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse2VectorPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
+ }, scalar: false, op1, op2);
+ });
+ }
+ else
+ {
+ EmitVectorPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMax), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmaxv_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FmaxvV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse2VectorAcrossVectorOpF(context, (op1, op2) =>
+ {
+ return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
+ }, scalar: false, op1, op2);
+ });
+ }
+ else
+ {
+ EmitVectorAcrossVectorOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMax), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmin_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FminS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
+ }, scalar: true);
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMin), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmin_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FminV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
+ }, scalar: false);
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMin), op1, op2);
+ });
+ }
+ }
+
+ public static void Fminnm_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FminnmS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse41MaxMinNumOpF(context, isMaxNum: false, scalar: true);
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMinNum), op1, op2);
+ });
+ }
+ }
+
+ public static void Fminnm_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FminnmV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse41MaxMinNumOpF(context, isMaxNum: false, scalar: false);
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMinNum), op1, op2);
+ });
+ }
+ }
+
+ public static void Fminnmp_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FminnmpS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse2ScalarPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSse41MaxMinNumOpF(context, isMaxNum: false, scalar: true, op1, op2);
+ });
+ }
+ else
+ {
+ EmitScalarPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMinNum), op1, op2);
+ });
+ }
+ }
+
+ public static void Fminnmp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FminnmpV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse2VectorPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSse41MaxMinNumOpF(context, isMaxNum: false, scalar: false, op1, op2);
+ });
+ }
+ else
+ {
+ EmitVectorPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMinNum), op1, op2);
+ });
+ }
+ }
+
+ public static void Fminnmv_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FminnmvV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse2VectorAcrossVectorOpF(context, (op1, op2) =>
+ {
+ return EmitSse41MaxMinNumOpF(context, isMaxNum: false, scalar: false, op1, op2);
+ });
+ }
+ else
+ {
+ EmitVectorAcrossVectorOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMinNum), op1, op2);
+ });
+ }
+ }
+
+ public static void Fminp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FminpV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse2VectorPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
+ }, scalar: false, op1, op2);
+ });
+ }
+ else
+ {
+ EmitVectorPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMin), op1, op2);
+ });
+ }
+ }
+
+ public static void Fminv_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FminvV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse2VectorAcrossVectorOpF(context, (op1, op2) =>
+ {
+ return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
+ }, scalar: false, op1, op2);
+ });
+ }
+ else
+ {
+ EmitVectorAcrossVectorOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMin), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmla_Se(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarTernaryOpFRdByElem(context, Intrinsic.Arm64FmlaSe);
+ }
+ else if (Optimizations.UseFma)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask));
+
+ res = context.AddIntrinsic(Intrinsic.X86Vfmadd231ss, d, n, res);
+
+ context.Copy(d, context.VectorZeroUpper96(res));
+ }
+ else /* if (sizeF == 1) */
+ {
+ int shuffleMask = op.Index | op.Index << 1;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask));
+
+ res = context.AddIntrinsic(Intrinsic.X86Vfmadd231sd, d, n, res);
+
+ context.Copy(d, context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarTernaryOpByElemF(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Fmla_V(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpFRd(context, Intrinsic.Arm64FmlaV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ Operand res;
+
+ if (sizeF == 0)
+ {
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfmadd231ps, d, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Addps, d, res);
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfmadd231pd, d, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Addpd, d, res);
+ }
+
+ context.Copy(d, res);
+ }
+ }
+ else
+ {
+ EmitVectorTernaryOpF(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Fmla_Ve(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpFRdByElem(context, Intrinsic.Arm64FmlaVe);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask));
+
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfmadd231ps, d, n, res);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, n, res);
+ res = context.AddIntrinsic(Intrinsic.X86Addps, d, res);
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ int shuffleMask = op.Index | op.Index << 1;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask));
+
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfmadd231pd, d, n, res);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, res);
+ res = context.AddIntrinsic(Intrinsic.X86Addpd, d, res);
+ }
+
+ context.Copy(d, res);
+ }
+ }
+ else
+ {
+ EmitVectorTernaryOpByElemF(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Fmls_Se(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarTernaryOpFRdByElem(context, Intrinsic.Arm64FmlsSe);
+ }
+ else if (Optimizations.UseFma)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask));
+
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ss, d, n, res);
+
+ context.Copy(d, context.VectorZeroUpper96(res));
+ }
+ else /* if (sizeF == 1) */
+ {
+ int shuffleMask = op.Index | op.Index << 1;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask));
+
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231sd, d, n, res);
+
+ context.Copy(d, context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarTernaryOpByElemF(context, (op1, op2, op3) =>
+ {
+ return context.Subtract(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Fmls_V(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpFRd(context, Intrinsic.Arm64FmlsV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ Operand res;
+
+ if (sizeF == 0)
+ {
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ps, d, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subps, d, res);
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231pd, d, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subpd, d, res);
+ }
+
+ context.Copy(d, res);
+ }
+ }
+ else
+ {
+ EmitVectorTernaryOpF(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulSub), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Fmls_Ve(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpFRdByElem(context, Intrinsic.Arm64FmlsVe);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask));
+
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ps, d, n, res);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, n, res);
+ res = context.AddIntrinsic(Intrinsic.X86Subps, d, res);
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ int shuffleMask = op.Index | op.Index << 1;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask));
+
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231pd, d, n, res);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, res);
+ res = context.AddIntrinsic(Intrinsic.X86Subpd, d, res);
+ }
+
+ context.Copy(d, res);
+ }
+ }
+ else
+ {
+ EmitVectorTernaryOpByElemF(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulSub), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Fmsub_S(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarTernaryOpF(context, Intrinsic.Arm64FmsubS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand a = GetVec(op.Ra);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res;
+
+ if (op.Size == 0)
+ {
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ss, a, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subss, a, res);
+ }
+
+ context.Copy(d, context.VectorZeroUpper96(res));
+ }
+ else /* if (op.Size == 1) */
+ {
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231sd, a, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subsd, a, res);
+ }
+
+ context.Copy(d, context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarTernaryRaOpF(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulSub), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Fmul_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FmulS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarBinaryOpF(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmul_Se(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpFByElem(context, Intrinsic.Arm64FmulSe);
+ }
+ else
+ {
+ EmitScalarBinaryOpByElemF(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ }
+
+ public static void Fmul_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmulV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpF(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmul_Ve(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpFByElem(context, Intrinsic.Arm64FmulVe);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask));
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, n, res);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ int shuffleMask = op.Index | op.Index << 1;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask));
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, res);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitVectorBinaryOpByElemF(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpByElemF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmulx_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FmulxS);
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmulx_Se(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpFByElem(context, Intrinsic.Arm64FmulxSe);
+ }
+ else
+ {
+ EmitScalarBinaryOpByElemF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmulx_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmulxV);
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmulx_Ve(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpFByElem(context, Intrinsic.Arm64FmulxVe);
+ }
+ else
+ {
+ EmitVectorBinaryOpByElemF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2);
+ });
+ }
+ }
+
+ public static void Fneg_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FnegS);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ if (op.Size == 0)
+ {
+ Operand mask = X86GetScalar(context, -0f);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Xorps, mask, GetVec(op.Rn));
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+ }
+ else /* if (op.Size == 1) */
+ {
+ Operand mask = X86GetScalar(context, -0d);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, GetVec(op.Rn));
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) => context.Negate(op1));
+ }
+ }
+
+ public static void Fneg_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FnegV);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand mask = X86GetAllElements(context, -0f);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Xorps, mask, GetVec(op.Rn));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand mask = X86GetAllElements(context, -0d);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, GetVec(op.Rn));
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) => context.Negate(op1));
+ }
+ }
+
+ public static void Fnmadd_S(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarTernaryOpF(context, Intrinsic.Arm64FnmaddS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand a = GetVec(op.Ra);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res;
+
+ if (op.Size == 0)
+ {
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmsub231ss, a, n, m);
+ }
+ else
+ {
+ Operand mask = X86GetScalar(context, -0f);
+ Operand aNeg = context.AddIntrinsic(Intrinsic.X86Xorps, mask, a);
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subss, aNeg, res);
+ }
+
+ context.Copy(d, context.VectorZeroUpper96(res));
+ }
+ else /* if (op.Size == 1) */
+ {
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmsub231sd, a, n, m);
+ }
+ else
+ {
+ Operand mask = X86GetScalar(context, -0d);
+ Operand aNeg = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, a);
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subsd, aNeg, res);
+ }
+
+ context.Copy(d, context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarTernaryRaOpF(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPNegMulAdd), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Fnmsub_S(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarTernaryOpF(context, Intrinsic.Arm64FnmsubS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand a = GetVec(op.Ra);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res;
+
+ if (op.Size == 0)
+ {
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfmsub231ss, a, n, m);
+ }
+ else
+ {
+ Operand mask = X86GetScalar(context, -0f);
+ Operand aNeg = context.AddIntrinsic(Intrinsic.X86Xorps, mask, a);
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Addss, aNeg, res);
+ }
+
+ context.Copy(d, context.VectorZeroUpper96(res));
+ }
+ else /* if (op.Size == 1) */
+ {
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfmsub231sd, a, n, m);
+ }
+ else
+ {
+ Operand mask = X86GetScalar(context, -0d);
+ Operand aNeg = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, a);
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Addsd, aNeg, res);
+ }
+
+ context.Copy(d, context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarTernaryRaOpF(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPNegMulSub), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Fnmul_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FnmulS);
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) => context.Negate(context.Multiply(op1, op2)));
+ }
+ }
+
+ public static void Frecpe_S(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrecpeS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0)
+ {
+ Operand res = EmitSse41Round32Exp8OpF(context, context.AddIntrinsic(Intrinsic.X86Rcpss, GetVec(op.Rn)), scalar: true);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecipEstimate), op1);
+ });
+ }
+ }
+
+ public static void Frecpe_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrecpeV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0)
+ {
+ Operand res = EmitSse41Round32Exp8OpF(context, context.AddIntrinsic(Intrinsic.X86Rcpps, GetVec(op.Rn)), scalar: false);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecipEstimate), op1);
+ });
+ }
+ }
+
+ public static void Frecps_S(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FrecpsS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ Operand res;
+
+ if (sizeF == 0)
+ {
+ Operand mask = X86GetScalar(context, 2f);
+
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ss, mask, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subss, mask, res);
+ }
+
+ res = EmitSse41RecipStepSelectOpF(context, n, m, res, mask, scalar: true, sizeF);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand mask = X86GetScalar(context, 2d);
+
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231sd, mask, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subsd, mask, res);
+ }
+
+ res = EmitSse41RecipStepSelectOpF(context, n, m, res, mask, scalar: true, sizeF);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecipStepFused), op1, op2);
+ });
+ }
+ }
+
+ public static void Frecps_V(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FrecpsV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ Operand res;
+
+ if (sizeF == 0)
+ {
+ Operand mask = X86GetAllElements(context, 2f);
+
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ps, mask, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subps, mask, res);
+ }
+
+ res = EmitSse41RecipStepSelectOpF(context, n, m, res, mask, scalar: false, sizeF);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand mask = X86GetAllElements(context, 2d);
+
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231pd, mask, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subpd, mask, res);
+ }
+
+ res = EmitSse41RecipStepSelectOpF(context, n, m, res, mask, scalar: false, sizeF);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecipStepFused), op1, op2);
+ });
+ }
+ }
+
+ public static void Frecpx_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FrecpxS);
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecpX), op1);
+ });
+ }
+ }
+
+ public static void Frinta_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintaS);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41ScalarRoundOpF(context, FPRoundingMode.ToNearestAway);
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1);
+ });
+ }
+ }
+
+ public static void Frinta_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintaV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41VectorRoundOpF(context, FPRoundingMode.ToNearestAway);
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1);
+ });
+ }
+ }
+
+ public static void Frinti_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintiS);
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitRoundByRMode(context, op1);
+ });
+ }
+ }
+
+ public static void Frinti_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintiV);
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitRoundByRMode(context, op1);
+ });
+ }
+ }
+
+ public static void Frintm_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintmS);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41ScalarRoundOpF(context, FPRoundingMode.TowardsMinusInfinity);
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitUnaryMathCall(context, nameof(Math.Floor), op1);
+ });
+ }
+ }
+
+ public static void Frintm_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintmV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41VectorRoundOpF(context, FPRoundingMode.TowardsMinusInfinity);
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitUnaryMathCall(context, nameof(Math.Floor), op1);
+ });
+ }
+ }
+
+ public static void Frintn_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintnS);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41ScalarRoundOpF(context, FPRoundingMode.ToNearest);
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitRoundMathCall(context, MidpointRounding.ToEven, op1);
+ });
+ }
+ }
+
+ public static void Frintn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintnV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41VectorRoundOpF(context, FPRoundingMode.ToNearest);
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitRoundMathCall(context, MidpointRounding.ToEven, op1);
+ });
+ }
+ }
+
+ public static void Frintp_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintpS);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41ScalarRoundOpF(context, FPRoundingMode.TowardsPlusInfinity);
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitUnaryMathCall(context, nameof(Math.Ceiling), op1);
+ });
+ }
+ }
+
+ public static void Frintp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintpV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41VectorRoundOpF(context, FPRoundingMode.TowardsPlusInfinity);
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitUnaryMathCall(context, nameof(Math.Ceiling), op1);
+ });
+ }
+ }
+
+ public static void Frintx_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintxS);
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitRoundByRMode(context, op1);
+ });
+ }
+ }
+
+ public static void Frintx_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintxV);
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitRoundByRMode(context, op1);
+ });
+ }
+ }
+
+ public static void Frintz_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintzS);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41ScalarRoundOpF(context, FPRoundingMode.TowardsZero);
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitUnaryMathCall(context, nameof(Math.Truncate), op1);
+ });
+ }
+ }
+
+ public static void Frintz_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintzV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41VectorRoundOpF(context, FPRoundingMode.TowardsZero);
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitUnaryMathCall(context, nameof(Math.Truncate), op1);
+ });
+ }
+ }
+
+ public static void Frsqrte_S(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrsqrteS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0)
+ {
+ Operand res = EmitSse41Round32Exp8OpF(context, context.AddIntrinsic(Intrinsic.X86Rsqrtss, GetVec(op.Rn)), scalar: true);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRSqrtEstimate), op1);
+ });
+ }
+ }
+
+ public static void Frsqrte_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrsqrteV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0)
+ {
+ Operand res = EmitSse41Round32Exp8OpF(context, context.AddIntrinsic(Intrinsic.X86Rsqrtps, GetVec(op.Rn)), scalar: false);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRSqrtEstimate), op1);
+ });
+ }
+ }
+
+ public static void Frsqrts_S(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FrsqrtsS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ Operand res;
+
+ if (sizeF == 0)
+ {
+ Operand maskHalf = X86GetScalar(context, 0.5f);
+ Operand maskThree = X86GetScalar(context, 3f);
+ Operand maskOneHalf = X86GetScalar(context, 1.5f);
+
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ss, maskThree, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subss, maskThree, res);
+ }
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulss, maskHalf, res);
+ res = EmitSse41RecipStepSelectOpF(context, n, m, res, maskOneHalf, scalar: true, sizeF);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand maskHalf = X86GetScalar(context, 0.5d);
+ Operand maskThree = X86GetScalar(context, 3d);
+ Operand maskOneHalf = X86GetScalar(context, 1.5d);
+
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231sd, maskThree, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subsd, maskThree, res);
+ }
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulsd, maskHalf, res);
+ res = EmitSse41RecipStepSelectOpF(context, n, m, res, maskOneHalf, scalar: true, sizeF);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRSqrtStepFused), op1, op2);
+ });
+ }
+ }
+
+ public static void Frsqrts_V(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FrsqrtsV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ Operand res;
+
+ if (sizeF == 0)
+ {
+ Operand maskHalf = X86GetAllElements(context, 0.5f);
+ Operand maskThree = X86GetAllElements(context, 3f);
+ Operand maskOneHalf = X86GetAllElements(context, 1.5f);
+
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ps, maskThree, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subps, maskThree, res);
+ }
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, maskHalf, res);
+ res = EmitSse41RecipStepSelectOpF(context, n, m, res, maskOneHalf, scalar: false, sizeF);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand maskHalf = X86GetAllElements(context, 0.5d);
+ Operand maskThree = X86GetAllElements(context, 3d);
+ Operand maskOneHalf = X86GetAllElements(context, 1.5d);
+
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231pd, maskThree, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subpd, maskThree, res);
+ }
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulpd, maskHalf, res);
+ res = EmitSse41RecipStepSelectOpF(context, n, m, res, maskOneHalf, scalar: false, sizeF);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRSqrtStepFused), op1, op2);
+ });
+ }
+ }
+
+ public static void Fsqrt_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FsqrtS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarUnaryOpF(context, Intrinsic.X86Sqrtss, Intrinsic.X86Sqrtsd);
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPSqrt), op1);
+ });
+ }
+ }
+
+ public static void Fsqrt_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FsqrtV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorUnaryOpF(context, Intrinsic.X86Sqrtps, Intrinsic.X86Sqrtpd);
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPSqrt), op1);
+ });
+ }
+ }
+
+ public static void Fsub_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FsubS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarBinaryOpF(context, Intrinsic.X86Subss, Intrinsic.X86Subsd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub), op1, op2);
+ });
+ }
+ }
+
+ public static void Fsub_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FsubV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpF(context, Intrinsic.X86Subps, Intrinsic.X86Subpd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub), op1, op2);
+ });
+ }
+ }
+
+ public static void Mla_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64MlaV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41VectorMul_AddSub(context, AddSub.Add);
+ }
+ else
+ {
+ EmitVectorTernaryOpZx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Mla_Ve(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64MlaVe);
+ }
+ else
+ {
+ EmitVectorTernaryOpByElemZx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Mls_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64MlsV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41VectorMul_AddSub(context, AddSub.Subtract);
+ }
+ else
+ {
+ EmitVectorTernaryOpZx(context, (op1, op2, op3) =>
+ {
+ return context.Subtract(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Mls_Ve(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64MlsVe);
+ }
+ else
+ {
+ EmitVectorTernaryOpByElemZx(context, (op1, op2, op3) =>
+ {
+ return context.Subtract(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Mul_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64MulV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41VectorMul_AddSub(context, AddSub.None);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ }
+
+ public static void Mul_Ve(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpByElem(context, Intrinsic.Arm64MulVe);
+ }
+ else
+ {
+ EmitVectorBinaryOpByElemZx(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ }
+
+ public static void Neg_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOp(context, Intrinsic.Arm64NegS);
+ }
+ else
+ {
+ EmitScalarUnaryOpSx(context, (op1) => context.Negate(op1));
+ }
+ }
+
+ public static void Neg_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64NegV);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Intrinsic subInst = X86PsubInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(subInst, context.VectorZero(), GetVec(op.Rn));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorUnaryOpSx(context, (op1) => context.Negate(op1));
+ }
+ }
+
+ public static void Pmull_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseArm64Pmull)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64PmullV);
+ }
+ else if (Optimizations.UsePclmulqdq && op.Size == 3)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int imm8 = op.RegisterSize == RegisterSize.Simd64 ? 0b0000_0000 : 0b0001_0001;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, n, m, Const(imm8));
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ n = context.VectorZeroUpper64(n);
+ m = context.VectorZeroUpper64(m);
+ }
+ else /* if (op.RegisterSize == RegisterSize.Simd128) */
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Operand res = context.VectorZero();
+
+ if (op.Size == 0)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Pmovzxbw, n);
+ m = context.AddIntrinsic(Intrinsic.X86Pmovzxbw, m);
+
+ for (int i = 0; i < 8; i++)
+ {
+ Operand mask = context.AddIntrinsic(Intrinsic.X86Psllw, n, Const(15 - i));
+ mask = context.AddIntrinsic(Intrinsic.X86Psraw, mask, Const(15));
+
+ Operand tmp = context.AddIntrinsic(Intrinsic.X86Psllw, m, Const(i));
+ tmp = context.AddIntrinsic(Intrinsic.X86Pand, tmp, mask);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pxor, res, tmp);
+ }
+ }
+ else /* if (op.Size == 3) */
+ {
+ Operand zero = context.VectorZero();
+
+ for (int i = 0; i < 64; i++)
+ {
+ Operand mask = context.AddIntrinsic(Intrinsic.X86Movlhps, n, n);
+ mask = context.AddIntrinsic(Intrinsic.X86Psllq, mask, Const(63 - i));
+ mask = context.AddIntrinsic(Intrinsic.X86Psrlq, mask, Const(63));
+ mask = context.AddIntrinsic(Intrinsic.X86Psubq, zero, mask);
+
+ Operand tmp = EmitSse2Sll_128(context, m, i);
+ tmp = context.AddIntrinsic(Intrinsic.X86Pand, tmp, mask);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pxor, res, tmp);
+ }
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res;
+
+ if (op.Size == 0)
+ {
+ res = context.VectorZero();
+
+ int part = op.RegisterSize == RegisterSize.Simd64 ? 0 : 8;
+
+ for (int index = 0; index < 8; index++)
+ {
+ Operand ne = context.VectorExtract8(n, part + index);
+ Operand me = context.VectorExtract8(m, part + index);
+
+ Operand de = EmitPolynomialMultiply(context, ne, me, 8);
+
+ res = EmitVectorInsert(context, res, de, index, 1);
+ }
+ }
+ else /* if (op.Size == 3) */
+ {
+ int part = op.RegisterSize == RegisterSize.Simd64 ? 0 : 1;
+
+ Operand ne = context.VectorExtract(OperandType.I64, n, part);
+ Operand me = context.VectorExtract(OperandType.I64, m, part);
+
+ res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.PolynomialMult64_128)), ne, me);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ public static void Raddhn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64RaddhnV);
+ }
+ else
+ {
+ EmitHighNarrow(context, (op1, op2) => context.Add(op1, op2), round: true);
+ }
+ }
+
+ public static void Rsubhn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64RsubhnV);
+ }
+ else
+ {
+ EmitHighNarrow(context, (op1, op2) => context.Subtract(op1, op2), round: true);
+ }
+ }
+
+ public static void Saba_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64SabaV);
+ }
+ else
+ {
+ EmitVectorTernaryOpSx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3)));
+ });
+ }
+ }
+
+ public static void Sabal_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64SabalV);
+ }
+ else
+ {
+ EmitVectorWidenRnRmTernaryOpSx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3)));
+ });
+ }
+ }
+
+ public static void Sabd_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SabdV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ EmitSse41VectorSabdOp(context, op, n, m, isLong: false);
+ }
+ else
+ {
+ EmitVectorBinaryOpSx(context, (op1, op2) =>
+ {
+ return EmitAbs(context, context.Subtract(op1, op2));
+ });
+ }
+ }
+
+ public static void Sabdl_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SabdlV);
+ }
+ else if (Optimizations.UseSse41 && op.Size < 2)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = op.Size == 0
+ ? Intrinsic.X86Pmovsxbw
+ : Intrinsic.X86Pmovsxwd;
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ EmitSse41VectorSabdOp(context, op, n, m, isLong: true);
+ }
+ else
+ {
+ EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) =>
+ {
+ return EmitAbs(context, context.Subtract(op1, op2));
+ });
+ }
+ }
+
+ public static void Sadalp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpRd(context, Intrinsic.Arm64SadalpV);
+ }
+ else
+ {
+ EmitAddLongPairwise(context, signed: true, accumulate: true);
+ }
+ }
+
+ public static void Saddl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SaddlV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovsxInstruction[op.Size];
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic addInst = X86PaddInstruction[op.Size + 1];
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m));
+ }
+ else
+ {
+ EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => context.Add(op1, op2));
+ }
+ }
+
+ public static void Saddlp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64SaddlpV);
+ }
+ else
+ {
+ EmitAddLongPairwise(context, signed: true, accumulate: false);
+ }
+ }
+
+ public static void Saddlv_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64SaddlvV);
+ }
+ else
+ {
+ EmitVectorLongAcrossVectorOpSx(context, (op1, op2) => context.Add(op1, op2));
+ }
+ }
+
+ public static void Saddw_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SaddwV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovsxInstruction[op.Size];
+
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic addInst = X86PaddInstruction[op.Size + 1];
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m));
+ }
+ else
+ {
+ EmitVectorWidenRmBinaryOpSx(context, (op1, op2) => context.Add(op1, op2));
+ }
+ }
+
+ public static void Shadd_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64ShaddV);
+ }
+ else if (Optimizations.UseSse2 && op.Size > 0)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pand, n, m);
+ Operand res2 = context.AddIntrinsic(Intrinsic.X86Pxor, n, m);
+
+ Intrinsic shiftInst = op.Size == 1 ? Intrinsic.X86Psraw : Intrinsic.X86Psrad;
+
+ res2 = context.AddIntrinsic(shiftInst, res2, Const(1));
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, res, res2);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpSx(context, (op1, op2) =>
+ {
+ return context.ShiftRightSI(context.Add(op1, op2), Const(1));
+ });
+ }
+ }
+
+ public static void Shsub_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64ShsubV);
+ }
+ else if (Optimizations.UseSse2 && op.Size < 2)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand mask = X86GetAllElements(context, (int)(op.Size == 0 ? 0x80808080u : 0x80008000u));
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ Operand nPlusMask = context.AddIntrinsic(addInst, n, mask);
+ Operand mPlusMask = context.AddIntrinsic(addInst, m, mask);
+
+ Intrinsic avgInst = op.Size == 0 ? Intrinsic.X86Pavgb : Intrinsic.X86Pavgw;
+
+ Operand res = context.AddIntrinsic(avgInst, nPlusMask, mPlusMask);
+
+ Intrinsic subInst = X86PsubInstruction[op.Size];
+
+ res = context.AddIntrinsic(subInst, nPlusMask, res);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpSx(context, (op1, op2) =>
+ {
+ return context.ShiftRightSI(context.Subtract(op1, op2), Const(1));
+ });
+ }
+ }
+
+ public static void Smax_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SmaxV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic maxInst = X86PmaxsInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(maxInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpSx(context, (op1, op2) => EmitMax64Op(context, op1, op2, signed: true));
+ }
+ }
+
+ public static void Smaxp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SmaxpV);
+ }
+ else if (Optimizations.UseSsse3)
+ {
+ EmitSsse3VectorPairwiseOp(context, X86PmaxsInstruction);
+ }
+ else
+ {
+ EmitVectorPairwiseOpSx(context, (op1, op2) => EmitMax64Op(context, op1, op2, signed: true));
+ }
+ }
+
+ public static void Smaxv_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64SmaxvV);
+ }
+ else
+ {
+ EmitVectorAcrossVectorOpSx(context, (op1, op2) => EmitMax64Op(context, op1, op2, signed: true));
+ }
+ }
+
+ public static void Smin_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SminV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic minInst = X86PminsInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(minInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpSx(context, (op1, op2) => EmitMin64Op(context, op1, op2, signed: true));
+ }
+ }
+
+ public static void Sminp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SminpV);
+ }
+ else if (Optimizations.UseSsse3)
+ {
+ EmitSsse3VectorPairwiseOp(context, X86PminsInstruction);
+ }
+ else
+ {
+ EmitVectorPairwiseOpSx(context, (op1, op2) => EmitMin64Op(context, op1, op2, signed: true));
+ }
+ }
+
+ public static void Sminv_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64SminvV);
+ }
+ else
+ {
+ EmitVectorAcrossVectorOpSx(context, (op1, op2) => EmitMin64Op(context, op1, op2, signed: true));
+ }
+ }
+
+ public static void Smlal_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64SmlalV);
+ }
+ else if (Optimizations.UseSse41 && op.Size < 2)
+ {
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovsxInstruction[op.Size];
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic mullInst = op.Size == 0 ? Intrinsic.X86Pmullw : Intrinsic.X86Pmulld;
+
+ Operand res = context.AddIntrinsic(mullInst, n, m);
+
+ Intrinsic addInst = X86PaddInstruction[op.Size + 1];
+
+ context.Copy(d, context.AddIntrinsic(addInst, d, res));
+ }
+ else
+ {
+ EmitVectorWidenRnRmTernaryOpSx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Smlal_Ve(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64SmlalVe);
+ }
+ else
+ {
+ EmitVectorWidenTernaryOpByElemSx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Smlsl_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64SmlslV);
+ }
+ else if (Optimizations.UseSse41 && op.Size < 2)
+ {
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = op.Size == 0 ? Intrinsic.X86Pmovsxbw : Intrinsic.X86Pmovsxwd;
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic mullInst = op.Size == 0 ? Intrinsic.X86Pmullw : Intrinsic.X86Pmulld;
+
+ Operand res = context.AddIntrinsic(mullInst, n, m);
+
+ Intrinsic subInst = X86PsubInstruction[op.Size + 1];
+
+ context.Copy(d, context.AddIntrinsic(subInst, d, res));
+ }
+ else
+ {
+ EmitVectorWidenRnRmTernaryOpSx(context, (op1, op2, op3) =>
+ {
+ return context.Subtract(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Smlsl_Ve(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64SmlslVe);
+ }
+ else
+ {
+ EmitVectorWidenTernaryOpByElemSx(context, (op1, op2, op3) =>
+ {
+ return context.Subtract(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Smull_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SmullV);
+ }
+ else
+ {
+ EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ }
+
+ public static void Smull_Ve(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpByElem(context, Intrinsic.Arm64SmullVe);
+ }
+ else
+ {
+ EmitVectorWidenBinaryOpByElemSx(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ }
+
+ public static void Sqabs_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarSaturatingUnaryOp(context, Intrinsic.Arm64SqabsS);
+ }
+ else
+ {
+ EmitScalarSaturatingUnaryOpSx(context, (op1) => EmitAbs(context, op1));
+ }
+ }
+
+ public static void Sqabs_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingUnaryOp(context, Intrinsic.Arm64SqabsV);
+ }
+ else
+ {
+ EmitVectorSaturatingUnaryOpSx(context, (op1) => EmitAbs(context, op1));
+ }
+ }
+
+ public static void Sqadd_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64SqaddS);
+ }
+ else
+ {
+ EmitScalarSaturatingBinaryOpSx(context, flags: SaturatingFlags.Add);
+ }
+ }
+
+ public static void Sqadd_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqaddV);
+ }
+ else
+ {
+ EmitVectorSaturatingBinaryOpSx(context, flags: SaturatingFlags.Add);
+ }
+ }
+
+ public static void Sqdmulh_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64SqdmulhS);
+ }
+ else
+ {
+ EmitScalarSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false));
+ }
+ }
+
+ public static void Sqdmulh_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqdmulhV);
+ }
+ else
+ {
+ EmitVectorSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false));
+ }
+ }
+
+ public static void Sqdmulh_Ve(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpByElem(context, Intrinsic.Arm64SqdmulhVe);
+ }
+ else
+ {
+ EmitVectorSaturatingBinaryOpByElemSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false));
+ }
+ }
+
+ public static void Sqneg_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarSaturatingUnaryOp(context, Intrinsic.Arm64SqnegS);
+ }
+ else
+ {
+ EmitScalarSaturatingUnaryOpSx(context, (op1) => context.Negate(op1));
+ }
+ }
+
+ public static void Sqneg_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingUnaryOp(context, Intrinsic.Arm64SqnegV);
+ }
+ else
+ {
+ EmitVectorSaturatingUnaryOpSx(context, (op1) => context.Negate(op1));
+ }
+ }
+
+ public static void Sqrdmulh_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64SqrdmulhS);
+ }
+ else
+ {
+ EmitScalarSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true));
+ }
+ }
+
+ public static void Sqrdmulh_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqrdmulhV);
+ }
+ else
+ {
+ EmitVectorSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true));
+ }
+ }
+
+ public static void Sqrdmulh_Ve(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpByElem(context, Intrinsic.Arm64SqrdmulhVe);
+ }
+ else
+ {
+ EmitVectorSaturatingBinaryOpByElemSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true));
+ }
+ }
+
+ public static void Sqsub_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64SqsubS);
+ }
+ else
+ {
+ EmitScalarSaturatingBinaryOpSx(context, flags: SaturatingFlags.Sub);
+ }
+ }
+
+ public static void Sqsub_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqsubV);
+ }
+ else
+ {
+ EmitVectorSaturatingBinaryOpSx(context, flags: SaturatingFlags.Sub);
+ }
+ }
+
+ public static void Sqxtn_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOpRd(context, Intrinsic.Arm64SqxtnS);
+ }
+ else
+ {
+ EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarSxSx);
+ }
+ }
+
+ public static void Sqxtn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpRd(context, Intrinsic.Arm64SqxtnV);
+ }
+ else
+ {
+ EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorSxSx);
+ }
+ }
+
+ public static void Sqxtun_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOpRd(context, Intrinsic.Arm64SqxtunS);
+ }
+ else
+ {
+ EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarSxZx);
+ }
+ }
+
+ public static void Sqxtun_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpRd(context, Intrinsic.Arm64SqxtunV);
+ }
+ else
+ {
+ EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorSxZx);
+ }
+ }
+
+ public static void Srhadd_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SrhaddV);
+ }
+ else if (Optimizations.UseSse2 && op.Size < 2)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand mask = X86GetAllElements(context, (int)(op.Size == 0 ? 0x80808080u : 0x80008000u));
+
+ Intrinsic subInst = X86PsubInstruction[op.Size];
+
+ Operand nMinusMask = context.AddIntrinsic(subInst, n, mask);
+ Operand mMinusMask = context.AddIntrinsic(subInst, m, mask);
+
+ Intrinsic avgInst = op.Size == 0 ? Intrinsic.X86Pavgb : Intrinsic.X86Pavgw;
+
+ Operand res = context.AddIntrinsic(avgInst, nMinusMask, mMinusMask);
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, mask, res);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpSx(context, (op1, op2) =>
+ {
+ Operand res = context.Add(op1, op2);
+
+ res = context.Add(res, Const(1L));
+
+ return context.ShiftRightSI(res, Const(1));
+ });
+ }
+ }
+
+ public static void Ssubl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SsublV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovsxInstruction[op.Size];
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic subInst = X86PsubInstruction[op.Size + 1];
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m));
+ }
+ else
+ {
+ EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ }
+
+ public static void Ssubw_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SsubwV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovsxInstruction[op.Size];
+
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic subInst = X86PsubInstruction[op.Size + 1];
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m));
+ }
+ else
+ {
+ EmitVectorWidenRmBinaryOpSx(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ }
+
+ public static void Sub_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOp(context, Intrinsic.Arm64SubS);
+ }
+ else
+ {
+ EmitScalarBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ }
+
+ public static void Sub_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SubV);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic subInst = X86PsubInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(subInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ }
+
+ public static void Subhn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64SubhnV);
+ }
+ else
+ {
+ EmitHighNarrow(context, (op1, op2) => context.Subtract(op1, op2), round: false);
+ }
+ }
+
+ public static void Suqadd_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOpRd(context, Intrinsic.Arm64SuqaddS);
+ }
+ else
+ {
+ EmitScalarSaturatingBinaryOpSx(context, flags: SaturatingFlags.Accumulate);
+ }
+ }
+
+ public static void Suqadd_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpRd(context, Intrinsic.Arm64SuqaddV);
+ }
+ else
+ {
+ EmitVectorSaturatingBinaryOpSx(context, flags: SaturatingFlags.Accumulate);
+ }
+ }
+
+ public static void Uaba_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64UabaV);
+ }
+ else
+ {
+ EmitVectorTernaryOpZx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3)));
+ });
+ }
+ }
+
+ public static void Uabal_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64UabalV);
+ }
+ else
+ {
+ EmitVectorWidenRnRmTernaryOpZx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3)));
+ });
+ }
+ }
+
+ public static void Uabd_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UabdV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ EmitSse41VectorUabdOp(context, op, n, m, isLong: false);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) =>
+ {
+ return EmitAbs(context, context.Subtract(op1, op2));
+ });
+ }
+ }
+
+ public static void Uabdl_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UabdlV);
+ }
+ else if (Optimizations.UseSse41 && op.Size < 2)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = op.Size == 0
+ ? Intrinsic.X86Pmovzxbw
+ : Intrinsic.X86Pmovzxwd;
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ EmitSse41VectorUabdOp(context, op, n, m, isLong: true);
+ }
+ else
+ {
+ EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) =>
+ {
+ return EmitAbs(context, context.Subtract(op1, op2));
+ });
+ }
+ }
+
+ public static void Uadalp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpRd(context, Intrinsic.Arm64UadalpV);
+ }
+ else
+ {
+ EmitAddLongPairwise(context, signed: false, accumulate: true);
+ }
+ }
+
+ public static void Uaddl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UaddlV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovzxInstruction[op.Size];
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic addInst = X86PaddInstruction[op.Size + 1];
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m));
+ }
+ else
+ {
+ EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => context.Add(op1, op2));
+ }
+ }
+
+ public static void Uaddlp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64UaddlpV);
+ }
+ else
+ {
+ EmitAddLongPairwise(context, signed: false, accumulate: false);
+ }
+ }
+
+ public static void Uaddlv_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64UaddlvV);
+ }
+ else
+ {
+ EmitVectorLongAcrossVectorOpZx(context, (op1, op2) => context.Add(op1, op2));
+ }
+ }
+
+ public static void Uaddw_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UaddwV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovzxInstruction[op.Size];
+
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic addInst = X86PaddInstruction[op.Size + 1];
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m));
+ }
+ else
+ {
+ EmitVectorWidenRmBinaryOpZx(context, (op1, op2) => context.Add(op1, op2));
+ }
+ }
+
+ public static void Uhadd_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UhaddV);
+ }
+ else if (Optimizations.UseSse2 && op.Size > 0)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pand, n, m);
+ Operand res2 = context.AddIntrinsic(Intrinsic.X86Pxor, n, m);
+
+ Intrinsic shiftInst = op.Size == 1 ? Intrinsic.X86Psrlw : Intrinsic.X86Psrld;
+
+ res2 = context.AddIntrinsic(shiftInst, res2, Const(1));
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, res, res2);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) =>
+ {
+ return context.ShiftRightUI(context.Add(op1, op2), Const(1));
+ });
+ }
+ }
+
+ public static void Uhsub_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UhsubV);
+ }
+ else if (Optimizations.UseSse2 && op.Size < 2)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic avgInst = op.Size == 0 ? Intrinsic.X86Pavgb : Intrinsic.X86Pavgw;
+
+ Operand res = context.AddIntrinsic(avgInst, n, m);
+
+ Intrinsic subInst = X86PsubInstruction[op.Size];
+
+ res = context.AddIntrinsic(subInst, n, res);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) =>
+ {
+ return context.ShiftRightUI(context.Subtract(op1, op2), Const(1));
+ });
+ }
+ }
+
+ public static void Umax_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UmaxV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic maxInst = X86PmaxuInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(maxInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) => EmitMax64Op(context, op1, op2, signed: false));
+ }
+ }
+
+ public static void Umaxp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UmaxpV);
+ }
+ else if (Optimizations.UseSsse3)
+ {
+ EmitSsse3VectorPairwiseOp(context, X86PmaxuInstruction);
+ }
+ else
+ {
+ EmitVectorPairwiseOpZx(context, (op1, op2) => EmitMax64Op(context, op1, op2, signed: false));
+ }
+ }
+
+ public static void Umaxv_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64UmaxvV);
+ }
+ else
+ {
+ EmitVectorAcrossVectorOpZx(context, (op1, op2) => EmitMax64Op(context, op1, op2, signed: false));
+ }
+ }
+
+ public static void Umin_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UminV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic minInst = X86PminuInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(minInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) => EmitMin64Op(context, op1, op2, signed: false));
+ }
+ }
+
+ public static void Uminp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UminpV);
+ }
+ else if (Optimizations.UseSsse3)
+ {
+ EmitSsse3VectorPairwiseOp(context, X86PminuInstruction);
+ }
+ else
+ {
+ EmitVectorPairwiseOpZx(context, (op1, op2) => EmitMin64Op(context, op1, op2, signed: false));
+ }
+ }
+
+ public static void Uminv_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64UminvV);
+ }
+ else
+ {
+ EmitVectorAcrossVectorOpZx(context, (op1, op2) => EmitMin64Op(context, op1, op2, signed: false));
+ }
+ }
+
+ public static void Umlal_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64UmlalV);
+ }
+ else if (Optimizations.UseSse41 && op.Size < 2)
+ {
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovzxInstruction[op.Size];
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic mullInst = op.Size == 0 ? Intrinsic.X86Pmullw : Intrinsic.X86Pmulld;
+
+ Operand res = context.AddIntrinsic(mullInst, n, m);
+
+ Intrinsic addInst = X86PaddInstruction[op.Size + 1];
+
+ context.Copy(d, context.AddIntrinsic(addInst, d, res));
+ }
+ else
+ {
+ EmitVectorWidenRnRmTernaryOpZx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Umlal_Ve(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64UmlalVe);
+ }
+ else
+ {
+ EmitVectorWidenTernaryOpByElemZx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Umlsl_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64UmlslV);
+ }
+ else if (Optimizations.UseSse41 && op.Size < 2)
+ {
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = op.Size == 0 ? Intrinsic.X86Pmovzxbw : Intrinsic.X86Pmovzxwd;
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic mullInst = op.Size == 0 ? Intrinsic.X86Pmullw : Intrinsic.X86Pmulld;
+
+ Operand res = context.AddIntrinsic(mullInst, n, m);
+
+ Intrinsic subInst = X86PsubInstruction[op.Size + 1];
+
+ context.Copy(d, context.AddIntrinsic(subInst, d, res));
+ }
+ else
+ {
+ EmitVectorWidenRnRmTernaryOpZx(context, (op1, op2, op3) =>
+ {
+ return context.Subtract(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Umlsl_Ve(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64UmlslVe);
+ }
+ else
+ {
+ EmitVectorWidenTernaryOpByElemZx(context, (op1, op2, op3) =>
+ {
+ return context.Subtract(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Umull_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UmullV);
+ }
+ else
+ {
+ EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ }
+
+ public static void Umull_Ve(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpByElem(context, Intrinsic.Arm64UmullVe);
+ }
+ else
+ {
+ EmitVectorWidenBinaryOpByElemZx(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ }
+
+ public static void Uqadd_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64UqaddS);
+ }
+ else
+ {
+ EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Add);
+ }
+ }
+
+ public static void Uqadd_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64UqaddV);
+ }
+ else
+ {
+ EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Add);
+ }
+ }
+
+ public static void Uqsub_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64UqsubS);
+ }
+ else
+ {
+ EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Sub);
+ }
+ }
+
+ public static void Uqsub_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64UqsubV);
+ }
+ else
+ {
+ EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Sub);
+ }
+ }
+
+ public static void Uqxtn_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOpRd(context, Intrinsic.Arm64UqxtnS);
+ }
+ else
+ {
+ EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarZxZx);
+ }
+ }
+
+ public static void Uqxtn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpRd(context, Intrinsic.Arm64UqxtnV);
+ }
+ else
+ {
+ EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorZxZx);
+ }
+ }
+
+ public static void Urhadd_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UrhaddV);
+ }
+ else if (Optimizations.UseSse2 && op.Size < 2)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic avgInst = op.Size == 0 ? Intrinsic.X86Pavgb : Intrinsic.X86Pavgw;
+
+ Operand res = context.AddIntrinsic(avgInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) =>
+ {
+ Operand res = context.Add(op1, op2);
+
+ res = context.Add(res, Const(1L));
+
+ return context.ShiftRightUI(res, Const(1));
+ });
+ }
+ }
+
+ public static void Usqadd_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOpRd(context, Intrinsic.Arm64UsqaddS);
+ }
+ else
+ {
+ EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Accumulate);
+ }
+ }
+
+ public static void Usqadd_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpRd(context, Intrinsic.Arm64UsqaddV);
+ }
+ else
+ {
+ EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Accumulate);
+ }
+ }
+
+ public static void Usubl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UsublV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovzxInstruction[op.Size];
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic subInst = X86PsubInstruction[op.Size + 1];
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m));
+ }
+ else
+ {
+ EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ }
+
+ public static void Usubw_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UsubwV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovzxInstruction[op.Size];
+
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic subInst = X86PsubInstruction[op.Size + 1];
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m));
+ }
+ else
+ {
+ EmitVectorWidenRmBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ }
+
+ private static Operand EmitAbs(ArmEmitterContext context, Operand value)
+ {
+ Operand isPositive = context.ICompareGreaterOrEqual(value, Const(value.Type, 0));
+
+ return context.ConditionalSelect(isPositive, value, context.Negate(value));
+ }
+
+ private static void EmitAddLongPairwise(ArmEmitterContext context, bool signed, bool accumulate)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int pairs = op.GetPairsCount() >> op.Size;
+
+ for (int index = 0; index < pairs; index++)
+ {
+ int pairIndex = index << 1;
+
+ Operand ne0 = EmitVectorExtract(context, op.Rn, pairIndex, op.Size, signed);
+ Operand ne1 = EmitVectorExtract(context, op.Rn, pairIndex + 1, op.Size, signed);
+
+ Operand e = context.Add(ne0, ne1);
+
+ if (accumulate)
+ {
+ Operand de = EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed);
+
+ e = context.Add(e, de);
+ }
+
+ res = EmitVectorInsert(context, res, e, index, op.Size + 1);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static Operand EmitDoublingMultiplyHighHalf(
+ ArmEmitterContext context,
+ Operand n,
+ Operand m,
+ bool round)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ int eSize = 8 << op.Size;
+
+ Operand res = context.Multiply(n, m);
+
+ if (!round)
+ {
+ res = context.ShiftRightSI(res, Const(eSize - 1));
+ }
+ else
+ {
+ long roundConst = 1L << (eSize - 1);
+
+ res = context.ShiftLeft(res, Const(1));
+
+ res = context.Add(res, Const(roundConst));
+
+ res = context.ShiftRightSI(res, Const(eSize));
+
+ Operand isIntMin = context.ICompareEqual(res, Const((long)int.MinValue));
+
+ res = context.ConditionalSelect(isIntMin, context.Negate(res), res);
+ }
+
+ return res;
+ }
+
+ private static void EmitHighNarrow(ArmEmitterContext context, Func2I emit, bool round)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ int elems = 8 >> op.Size;
+ int eSize = 8 << op.Size;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ Operand d = GetVec(op.Rd);
+
+ Operand res = part == 0 ? context.VectorZero() : context.Copy(d);
+
+ long roundConst = 1L << (eSize - 1);
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size + 1);
+ Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size + 1);
+
+ Operand de = emit(ne, me);
+
+ if (round)
+ {
+ de = context.Add(de, Const(roundConst));
+ }
+
+ de = context.ShiftRightUI(de, Const(eSize));
+
+ res = EmitVectorInsert(context, res, de, part + index, op.Size);
+ }
+
+ context.Copy(d, res);
+ }
+
+ private static Operand EmitMax64Op(ArmEmitterContext context, Operand op1, Operand op2, bool signed)
+ {
+ Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
+
+ Operand cmp = signed
+ ? context.ICompareGreaterOrEqual (op1, op2)
+ : context.ICompareGreaterOrEqualUI(op1, op2);
+
+ return context.ConditionalSelect(cmp, op1, op2);
+ }
+
+ private static Operand EmitMin64Op(ArmEmitterContext context, Operand op1, Operand op2, bool signed)
+ {
+ Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
+
+ Operand cmp = signed
+ ? context.ICompareLessOrEqual (op1, op2)
+ : context.ICompareLessOrEqualUI(op1, op2);
+
+ return context.ConditionalSelect(cmp, op1, op2);
+ }
+
+ private static void EmitSse41ScalarRoundOpF(ArmEmitterContext context, FPRoundingMode roundMode)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Operand res;
+
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ Intrinsic inst = (op.Size & 1) != 0 ? Intrinsic.X86Roundsd : Intrinsic.X86Roundss;
+
+ res = context.AddIntrinsic(inst, n, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ res = EmitSse41RoundToNearestWithTiesToAwayOpF(context, n, scalar: true);
+ }
+
+ if ((op.Size & 1) != 0)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+ else
+ {
+ res = context.VectorZeroUpper96(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static void EmitSse41VectorRoundOpF(ArmEmitterContext context, FPRoundingMode roundMode)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Operand res;
+
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ Intrinsic inst = (op.Size & 1) != 0 ? Intrinsic.X86Roundpd : Intrinsic.X86Roundps;
+
+ res = context.AddIntrinsic(inst, n, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ res = EmitSse41RoundToNearestWithTiesToAwayOpF(context, n, scalar: false);
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static Operand EmitSse41Round32Exp8OpF(ArmEmitterContext context, Operand value, bool scalar)
+ {
+ Operand roundMask;
+ Operand truncMask;
+ Operand expMask;
+
+ if (scalar)
+ {
+ roundMask = X86GetScalar(context, 0x4000);
+ truncMask = X86GetScalar(context, unchecked((int)0xFFFF8000));
+ expMask = X86GetScalar(context, 0x7F800000);
+ }
+ else
+ {
+ roundMask = X86GetAllElements(context, 0x4000);
+ truncMask = X86GetAllElements(context, unchecked((int)0xFFFF8000));
+ expMask = X86GetAllElements(context, 0x7F800000);
+ }
+
+ Operand oValue = value;
+ Operand masked = context.AddIntrinsic(Intrinsic.X86Pand, value, expMask);
+ Operand isNaNInf = context.AddIntrinsic(Intrinsic.X86Pcmpeqd, masked, expMask);
+
+ value = context.AddIntrinsic(Intrinsic.X86Paddd, value, roundMask);
+ value = context.AddIntrinsic(Intrinsic.X86Pand, value, truncMask);
+
+ return context.AddIntrinsic(Intrinsic.X86Blendvps, value, oValue, isNaNInf);
+ }
+
+ private static Operand EmitSse41RecipStepSelectOpF(
+ ArmEmitterContext context,
+ Operand n,
+ Operand m,
+ Operand res,
+ Operand mask,
+ bool scalar,
+ int sizeF)
+ {
+ Intrinsic cmpOp;
+ Intrinsic shlOp;
+ Intrinsic blendOp;
+ Operand zero = context.VectorZero();
+ Operand expMask;
+
+ if (sizeF == 0)
+ {
+ cmpOp = Intrinsic.X86Pcmpeqd;
+ shlOp = Intrinsic.X86Pslld;
+ blendOp = Intrinsic.X86Blendvps;
+ expMask = scalar ? X86GetScalar(context, 0x7F800000 << 1) : X86GetAllElements(context, 0x7F800000 << 1);
+ }
+ else /* if (sizeF == 1) */
+ {
+ cmpOp = Intrinsic.X86Pcmpeqq;
+ shlOp = Intrinsic.X86Psllq;
+ blendOp = Intrinsic.X86Blendvpd;
+ expMask = scalar ? X86GetScalar(context, 0x7FF0000000000000L << 1) : X86GetAllElements(context, 0x7FF0000000000000L << 1);
+ }
+
+ n = context.AddIntrinsic(shlOp, n, Const(1));
+ m = context.AddIntrinsic(shlOp, m, Const(1));
+
+ Operand nZero = context.AddIntrinsic(cmpOp, n, zero);
+ Operand mZero = context.AddIntrinsic(cmpOp, m, zero);
+ Operand nInf = context.AddIntrinsic(cmpOp, n, expMask);
+ Operand mInf = context.AddIntrinsic(cmpOp, m, expMask);
+
+ Operand nmZero = context.AddIntrinsic(Intrinsic.X86Por, nZero, mZero);
+ Operand nmInf = context.AddIntrinsic(Intrinsic.X86Por, nInf, mInf);
+ Operand nmZeroInf = context.AddIntrinsic(Intrinsic.X86Pand, nmZero, nmInf);
+
+ return context.AddIntrinsic(blendOp, res, mask, nmZeroInf);
+ }
+
+ public static void EmitSse2VectorIsNaNOpF(
+ ArmEmitterContext context,
+ Operand opF,
+ out Operand qNaNMask,
+ out Operand sNaNMask,
+ bool? isQNaN = null)
+ {
+ IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+
+ if ((op.Size & 1) == 0)
+ {
+ const int QBit = 22;
+
+ Operand qMask = X86GetAllElements(context, 1 << QBit);
+
+ Operand mask1 = context.AddIntrinsic(Intrinsic.X86Cmpps, opF, opF, Const((int)CmpCondition.UnorderedQ));
+
+ Operand mask2 = context.AddIntrinsic(Intrinsic.X86Pand, opF, qMask);
+ mask2 = context.AddIntrinsic(Intrinsic.X86Cmpps, mask2, qMask, Const((int)CmpCondition.Equal));
+
+ qNaNMask = isQNaN == null || (bool)isQNaN ? context.AddIntrinsic(Intrinsic.X86Andps, mask2, mask1) : default;
+ sNaNMask = isQNaN == null || !(bool)isQNaN ? context.AddIntrinsic(Intrinsic.X86Andnps, mask2, mask1) : default;
+ }
+ else /* if ((op.Size & 1) == 1) */
+ {
+ const int QBit = 51;
+
+ Operand qMask = X86GetAllElements(context, 1L << QBit);
+
+ Operand mask1 = context.AddIntrinsic(Intrinsic.X86Cmppd, opF, opF, Const((int)CmpCondition.UnorderedQ));
+
+ Operand mask2 = context.AddIntrinsic(Intrinsic.X86Pand, opF, qMask);
+ mask2 = context.AddIntrinsic(Intrinsic.X86Cmppd, mask2, qMask, Const((int)CmpCondition.Equal));
+
+ qNaNMask = isQNaN == null || (bool)isQNaN ? context.AddIntrinsic(Intrinsic.X86Andpd, mask2, mask1) : default;
+ sNaNMask = isQNaN == null || !(bool)isQNaN ? context.AddIntrinsic(Intrinsic.X86Andnpd, mask2, mask1) : default;
+ }
+ }
+
+ public static Operand EmitSse41ProcessNaNsOpF(
+ ArmEmitterContext context,
+ Func2I emit,
+ bool scalar,
+ Operand n = default,
+ Operand m = default)
+ {
+ Operand nCopy = n == default ? context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rn)) : n;
+ Operand mCopy = m == default ? context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rm)) : m;
+
+ EmitSse2VectorIsNaNOpF(context, nCopy, out Operand nQNaNMask, out Operand nSNaNMask);
+ EmitSse2VectorIsNaNOpF(context, mCopy, out _, out Operand mSNaNMask, isQNaN: false);
+
+ int sizeF = ((IOpCodeSimd)context.CurrOp).Size & 1;
+
+ if (sizeF == 0)
+ {
+ const int QBit = 22;
+
+ Operand qMask = scalar ? X86GetScalar(context, 1 << QBit) : X86GetAllElements(context, 1 << QBit);
+
+ Operand resNaNMask = context.AddIntrinsic(Intrinsic.X86Pandn, mSNaNMask, nQNaNMask);
+ resNaNMask = context.AddIntrinsic(Intrinsic.X86Por, resNaNMask, nSNaNMask);
+
+ Operand resNaN = context.AddIntrinsic(Intrinsic.X86Blendvps, mCopy, nCopy, resNaNMask);
+ resNaN = context.AddIntrinsic(Intrinsic.X86Por, resNaN, qMask);
+
+ Operand resMask = context.AddIntrinsic(Intrinsic.X86Cmpps, nCopy, mCopy, Const((int)CmpCondition.OrderedQ));
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Blendvps, resNaN, emit(nCopy, mCopy), resMask);
+
+ if (n != default || m != default)
+ {
+ return res;
+ }
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper96(res);
+ }
+ else if (((OpCodeSimdReg)context.CurrOp).RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rd), res);
+
+ return default;
+ }
+ else /* if (sizeF == 1) */
+ {
+ const int QBit = 51;
+
+ Operand qMask = scalar ? X86GetScalar(context, 1L << QBit) : X86GetAllElements(context, 1L << QBit);
+
+ Operand resNaNMask = context.AddIntrinsic(Intrinsic.X86Pandn, mSNaNMask, nQNaNMask);
+ resNaNMask = context.AddIntrinsic(Intrinsic.X86Por, resNaNMask, nSNaNMask);
+
+ Operand resNaN = context.AddIntrinsic(Intrinsic.X86Blendvpd, mCopy, nCopy, resNaNMask);
+ resNaN = context.AddIntrinsic(Intrinsic.X86Por, resNaN, qMask);
+
+ Operand resMask = context.AddIntrinsic(Intrinsic.X86Cmppd, nCopy, mCopy, Const((int)CmpCondition.OrderedQ));
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Blendvpd, resNaN, emit(nCopy, mCopy), resMask);
+
+ if (n != default || m != default)
+ {
+ return res;
+ }
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rd), res);
+
+ return default;
+ }
+ }
+
+ private static Operand EmitSse2VectorMaxMinOpF(ArmEmitterContext context, Operand n, Operand m, bool isMax)
+ {
+ IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+
+ if ((op.Size & 1) == 0)
+ {
+ Operand mask = X86GetAllElements(context, -0f);
+
+ Operand res = context.AddIntrinsic(isMax ? Intrinsic.X86Maxps : Intrinsic.X86Minps, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, res);
+
+ Operand resSign = context.AddIntrinsic(isMax ? Intrinsic.X86Pand : Intrinsic.X86Por, n, m);
+ resSign = context.AddIntrinsic(Intrinsic.X86Andps, mask, resSign);
+
+ return context.AddIntrinsic(Intrinsic.X86Por, res, resSign);
+ }
+ else /* if ((op.Size & 1) == 1) */
+ {
+ Operand mask = X86GetAllElements(context, -0d);
+
+ Operand res = context.AddIntrinsic(isMax ? Intrinsic.X86Maxpd : Intrinsic.X86Minpd, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, res);
+
+ Operand resSign = context.AddIntrinsic(isMax ? Intrinsic.X86Pand : Intrinsic.X86Por, n, m);
+ resSign = context.AddIntrinsic(Intrinsic.X86Andpd, mask, resSign);
+
+ return context.AddIntrinsic(Intrinsic.X86Por, res, resSign);
+ }
+ }
+
+ private static Operand EmitSse41MaxMinNumOpF(
+ ArmEmitterContext context,
+ bool isMaxNum,
+ bool scalar,
+ Operand n = default,
+ Operand m = default)
+ {
+ Operand nCopy = n == default ? context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rn)) : n;
+ Operand mCopy = m == default ? context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rm)) : m;
+
+ EmitSse2VectorIsNaNOpF(context, nCopy, out Operand nQNaNMask, out _, isQNaN: true);
+ EmitSse2VectorIsNaNOpF(context, mCopy, out Operand mQNaNMask, out _, isQNaN: true);
+
+ int sizeF = ((IOpCodeSimd)context.CurrOp).Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand negInfMask = scalar
+ ? X86GetScalar (context, isMaxNum ? float.NegativeInfinity : float.PositiveInfinity)
+ : X86GetAllElements(context, isMaxNum ? float.NegativeInfinity : float.PositiveInfinity);
+
+ Operand nMask = context.AddIntrinsic(Intrinsic.X86Andnps, mQNaNMask, nQNaNMask);
+ Operand mMask = context.AddIntrinsic(Intrinsic.X86Andnps, nQNaNMask, mQNaNMask);
+
+ nCopy = context.AddIntrinsic(Intrinsic.X86Blendvps, nCopy, negInfMask, nMask);
+ mCopy = context.AddIntrinsic(Intrinsic.X86Blendvps, mCopy, negInfMask, mMask);
+
+ Operand res = EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: isMaxNum);
+ }, scalar: scalar, nCopy, mCopy);
+
+ if (n != default || m != default)
+ {
+ return res;
+ }
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper96(res);
+ }
+ else if (((OpCodeSimdReg)context.CurrOp).RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rd), res);
+
+ return default;
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand negInfMask = scalar
+ ? X86GetScalar (context, isMaxNum ? double.NegativeInfinity : double.PositiveInfinity)
+ : X86GetAllElements(context, isMaxNum ? double.NegativeInfinity : double.PositiveInfinity);
+
+ Operand nMask = context.AddIntrinsic(Intrinsic.X86Andnpd, mQNaNMask, nQNaNMask);
+ Operand mMask = context.AddIntrinsic(Intrinsic.X86Andnpd, nQNaNMask, mQNaNMask);
+
+ nCopy = context.AddIntrinsic(Intrinsic.X86Blendvpd, nCopy, negInfMask, nMask);
+ mCopy = context.AddIntrinsic(Intrinsic.X86Blendvpd, mCopy, negInfMask, mMask);
+
+ Operand res = EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: isMaxNum);
+ }, scalar: scalar, nCopy, mCopy);
+
+ if (n != default || m != default)
+ {
+ return res;
+ }
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rd), res);
+
+ return default;
+ }
+ }
+
+ private enum AddSub
+ {
+ None,
+ Add,
+ Subtract
+ }
+
+ private static void EmitSse41VectorMul_AddSub(ArmEmitterContext context, AddSub addSub)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res;
+
+ if (op.Size == 0)
+ {
+ Operand ns8 = context.AddIntrinsic(Intrinsic.X86Psrlw, n, Const(8));
+ Operand ms8 = context.AddIntrinsic(Intrinsic.X86Psrlw, m, Const(8));
+
+ res = context.AddIntrinsic(Intrinsic.X86Pmullw, ns8, ms8);
+
+ res = context.AddIntrinsic(Intrinsic.X86Psllw, res, Const(8));
+
+ Operand res2 = context.AddIntrinsic(Intrinsic.X86Pmullw, n, m);
+
+ Operand mask = X86GetAllElements(context, 0x00FF00FF);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pblendvb, res, res2, mask);
+ }
+ else if (op.Size == 1)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Pmullw, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Pmulld, n, m);
+ }
+
+ Operand d = GetVec(op.Rd);
+
+ if (addSub == AddSub.Add)
+ {
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, d, res);
+ }
+ else if (addSub == AddSub.Subtract)
+ {
+ Intrinsic subInst = X86PsubInstruction[op.Size];
+
+ res = context.AddIntrinsic(subInst, d, res);
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+
+ private static void EmitSse41VectorSabdOp(
+ ArmEmitterContext context,
+ OpCodeSimdReg op,
+ Operand n,
+ Operand m,
+ bool isLong)
+ {
+ int size = isLong ? op.Size + 1 : op.Size;
+
+ Intrinsic cmpgtInst = X86PcmpgtInstruction[size];
+
+ Operand cmpMask = context.AddIntrinsic(cmpgtInst, n, m);
+
+ Intrinsic subInst = X86PsubInstruction[size];
+
+ Operand res = context.AddIntrinsic(subInst, n, m);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pand, cmpMask, res);
+
+ Operand res2 = context.AddIntrinsic(subInst, m, n);
+
+ res2 = context.AddIntrinsic(Intrinsic.X86Pandn, cmpMask, res2);
+
+ res = context.AddIntrinsic(Intrinsic.X86Por, res, res2);
+
+ if (!isLong && op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static void EmitSse41VectorUabdOp(
+ ArmEmitterContext context,
+ OpCodeSimdReg op,
+ Operand n,
+ Operand m,
+ bool isLong)
+ {
+ int size = isLong ? op.Size + 1 : op.Size;
+
+ Intrinsic maxInst = X86PmaxuInstruction[size];
+
+ Operand max = context.AddIntrinsic(maxInst, m, n);
+
+ Intrinsic cmpeqInst = X86PcmpeqInstruction[size];
+
+ Operand cmpMask = context.AddIntrinsic(cmpeqInst, max, m);
+
+ Operand onesMask = X86GetAllElements(context, -1L);
+
+ cmpMask = context.AddIntrinsic(Intrinsic.X86Pandn, cmpMask, onesMask);
+
+ Intrinsic subInst = X86PsubInstruction[size];
+
+ Operand res = context.AddIntrinsic(subInst, n, m);
+ Operand res2 = context.AddIntrinsic(subInst, m, n);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pand, cmpMask, res);
+ res2 = context.AddIntrinsic(Intrinsic.X86Pandn, cmpMask, res2);
+
+ res = context.AddIntrinsic(Intrinsic.X86Por, res, res2);
+
+ if (!isLong && op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static Operand EmitSse2Sll_128(ArmEmitterContext context, Operand op, int shift)
+ {
+ // The upper part of op is assumed to be zero.
+ Debug.Assert(shift >= 0 && shift < 64);
+
+ if (shift == 0)
+ {
+ return op;
+ }
+
+ Operand high = context.AddIntrinsic(Intrinsic.X86Pslldq, op, Const(8));
+ high = context.AddIntrinsic(Intrinsic.X86Psrlq, high, Const(64 - shift));
+
+ Operand low = context.AddIntrinsic(Intrinsic.X86Psllq, op, Const(shift));
+
+ return context.AddIntrinsic(Intrinsic.X86Por, high, low);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs b/src/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs
new file mode 100644
index 00000000..a9994e41
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs
@@ -0,0 +1,1703 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.Instructions.InstEmitFlowHelper;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper32;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ public static void Vabd_I(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ EmitVectorBinaryOpI32(context, (op1, op2) => EmitAbs(context, context.Subtract(op1, op2)), !op.U);
+ }
+
+ public static void Vabdl_I(ArmEmitterContext context)
+ {
+ OpCode32SimdRegLong op = (OpCode32SimdRegLong)context.CurrOp;
+
+ EmitVectorBinaryLongOpI32(context, (op1, op2) => EmitAbs(context, context.Subtract(op1, op2)), !op.U);
+ }
+
+ public static void Vabs_S(ArmEmitterContext context)
+ {
+ OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
+
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FabsS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarUnaryOpSimd32(context, (m) =>
+ {
+ return EmitFloatAbs(context, m, (op.Size & 1) == 0, false);
+ });
+ }
+ else
+ {
+ EmitScalarUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Abs), op1));
+ }
+ }
+
+ public static void Vabs_V(ArmEmitterContext context)
+ {
+ OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp;
+
+ if (op.F)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FabsV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorUnaryOpSimd32(context, (m) =>
+ {
+ return EmitFloatAbs(context, m, (op.Size & 1) == 0, true);
+ });
+ }
+ else
+ {
+ EmitVectorUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Abs), op1));
+ }
+ }
+ else
+ {
+ EmitVectorUnaryOpSx32(context, (op1) => EmitAbs(context, op1));
+ }
+ }
+
+ private static Operand EmitAbs(ArmEmitterContext context, Operand value)
+ {
+ Operand isPositive = context.ICompareGreaterOrEqual(value, Const(value.Type, 0));
+
+ return context.ConditionalSelect(isPositive, value, context.Negate(value));
+ }
+
+ public static void Vadd_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FaddS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarBinaryOpF32(context, Intrinsic.X86Addss, Intrinsic.X86Addsd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitScalarBinaryOpF32(context, (op1, op2) => context.Add(op1, op2));
+ }
+ else
+ {
+ EmitScalarBinaryOpF32(context, (op1, op2) => EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), op1, op2));
+ }
+ }
+
+ public static void Vadd_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FaddV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpF32(context, Intrinsic.X86Addps, Intrinsic.X86Addpd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitVectorBinaryOpF32(context, (op1, op2) => context.Add(op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpF32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPAddFpscr), op1, op2));
+ }
+ }
+
+ public static void Vadd_I(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+ EmitVectorBinaryOpSimd32(context, (op1, op2) => context.AddIntrinsic(X86PaddInstruction[op.Size], op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpZx32(context, (op1, op2) => context.Add(op1, op2));
+ }
+ }
+
+ public static void Vaddl_I(ArmEmitterContext context)
+ {
+ OpCode32SimdRegLong op = (OpCode32SimdRegLong)context.CurrOp;
+
+ EmitVectorBinaryLongOpI32(context, (op1, op2) => context.Add(op1, op2), !op.U);
+ }
+
+ public static void Vaddw_I(ArmEmitterContext context)
+ {
+ OpCode32SimdRegWide op = (OpCode32SimdRegWide)context.CurrOp;
+
+ EmitVectorBinaryWideOpI32(context, (op1, op2) => context.Add(op1, op2), !op.U);
+ }
+
+ public static void Vcnt(ArmEmitterContext context)
+ {
+ OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp;
+
+ Operand res = GetVecA32(op.Qd);
+
+ int elems = op.GetBytesCount();
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de;
+ Operand me = EmitVectorExtractZx32(context, op.Qm, op.Im + index, op.Size);
+
+ if (Optimizations.UsePopCnt)
+ {
+ de = context.AddIntrinsicInt(Intrinsic.X86Popcnt, me);
+ }
+ else
+ {
+ de = EmitCountSetBits8(context, me);
+ }
+
+ res = EmitVectorInsert(context, res, de, op.Id + index, op.Size);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void Vdup(ArmEmitterContext context)
+ {
+ OpCode32SimdDupGP op = (OpCode32SimdDupGP)context.CurrOp;
+
+ Operand insert = GetIntA32(context, op.Rt);
+
+ // Zero extend into an I64, then replicate. Saves the most time over elementwise inserts.
+ insert = op.Size switch
+ {
+ 2 => context.Multiply(context.ZeroExtend32(OperandType.I64, insert), Const(0x0000000100000001u)),
+ 1 => context.Multiply(context.ZeroExtend16(OperandType.I64, insert), Const(0x0001000100010001u)),
+ 0 => context.Multiply(context.ZeroExtend8(OperandType.I64, insert), Const(0x0101010101010101u)),
+ _ => throw new InvalidOperationException($"Invalid Vdup size \"{op.Size}\".")
+ };
+
+ InsertScalar(context, op.Vd, insert);
+ if (op.Q)
+ {
+ InsertScalar(context, op.Vd + 1, insert);
+ }
+ }
+
+ public static void Vdup_1(ArmEmitterContext context)
+ {
+ OpCode32SimdDupElem op = (OpCode32SimdDupElem)context.CurrOp;
+
+ Operand insert = EmitVectorExtractZx32(context, op.Vm >> 1, ((op.Vm & 1) << (3 - op.Size)) + op.Index, op.Size);
+
+ // Zero extend into an I64, then replicate. Saves the most time over elementwise inserts.
+ insert = op.Size switch
+ {
+ 2 => context.Multiply(context.ZeroExtend32(OperandType.I64, insert), Const(0x0000000100000001u)),
+ 1 => context.Multiply(context.ZeroExtend16(OperandType.I64, insert), Const(0x0001000100010001u)),
+ 0 => context.Multiply(context.ZeroExtend8(OperandType.I64, insert), Const(0x0101010101010101u)),
+ _ => throw new InvalidOperationException($"Invalid Vdup size \"{op.Size}\".")
+ };
+
+ InsertScalar(context, op.Vd, insert);
+ if (op.Q)
+ {
+ InsertScalar(context, op.Vd | 1, insert);
+ }
+ }
+
+ private static (long, long) MaskHelperByteSequence(int start, int length, int startByte)
+ {
+ int end = start + length;
+ int b = startByte;
+ long result = 0;
+ long result2 = 0;
+ for (int i = 0; i < 8; i++)
+ {
+ result |= (long)((i >= end || i < start) ? 0x80 : b++) << (i * 8);
+ }
+ for (int i = 8; i < 16; i++)
+ {
+ result2 |= (long)((i >= end || i < start) ? 0x80 : b++) << ((i - 8) * 8);
+ }
+ return (result2, result);
+ }
+
+ public static void Vext(ArmEmitterContext context)
+ {
+ OpCode32SimdExt op = (OpCode32SimdExt)context.CurrOp;
+ int elems = op.GetBytesCount();
+ int byteOff = op.Immediate;
+
+ if (Optimizations.UseSsse3)
+ {
+ EmitVectorBinaryOpSimd32(context, (n, m) =>
+ {
+ // Writing low to high of d: start <imm> into n, overlap into m.
+ // Then rotate n down by <imm>, m up by (elems)-imm.
+ // Then OR them together for the result.
+
+ (long nMaskHigh, long nMaskLow) = MaskHelperByteSequence(0, elems - byteOff, byteOff);
+ (long mMaskHigh, long mMaskLow) = MaskHelperByteSequence(elems - byteOff, byteOff, 0);
+ Operand nMask, mMask;
+ if (!op.Q)
+ {
+ // Do the same operation to the bytes in the top doubleword too, as our target could be in either.
+ nMaskHigh = nMaskLow + 0x0808080808080808L;
+ mMaskHigh = mMaskLow + 0x0808080808080808L;
+ }
+ nMask = X86GetElements(context, nMaskHigh, nMaskLow);
+ mMask = X86GetElements(context, mMaskHigh, mMaskLow);
+ Operand nPart = context.AddIntrinsic(Intrinsic.X86Pshufb, n, nMask);
+ Operand mPart = context.AddIntrinsic(Intrinsic.X86Pshufb, m, mMask);
+
+ return context.AddIntrinsic(Intrinsic.X86Por, nPart, mPart);
+ });
+ }
+ else
+ {
+ Operand res = GetVecA32(op.Qd);
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand extract;
+
+ if (byteOff >= elems)
+ {
+ extract = EmitVectorExtractZx32(context, op.Qm, op.Im + (byteOff - elems), op.Size);
+ }
+ else
+ {
+ extract = EmitVectorExtractZx32(context, op.Qn, op.In + byteOff, op.Size);
+ }
+ byteOff++;
+
+ res = EmitVectorInsert(context, res, extract, op.Id + index, op.Size);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+ }
+
+ public static void Vfma_S(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FmaddS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseFma)
+ {
+ EmitScalarTernaryOpF32(context, Intrinsic.X86Vfmadd231ss, Intrinsic.X86Vfmadd231sd);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd);
+ }
+ else
+ {
+ EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Vfma_V(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorTernaryOpF32(context, Intrinsic.Arm64FmlaV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseFma)
+ {
+ EmitVectorTernaryOpF32(context, Intrinsic.X86Vfmadd231ps);
+ }
+ else
+ {
+ EmitVectorTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMulAddFpscr), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Vfms_S(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FmsubS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseFma)
+ {
+ EmitScalarTernaryOpF32(context, Intrinsic.X86Vfnmadd231ss, Intrinsic.X86Vfnmadd231sd);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd);
+ }
+ else
+ {
+ EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulSub), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Vfms_V(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorTernaryOpF32(context, Intrinsic.Arm64FmlsV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseFma)
+ {
+ EmitVectorTernaryOpF32(context, Intrinsic.X86Vfnmadd231ps);
+ }
+ else
+ {
+ EmitVectorTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMulSubFpscr), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Vfnma_S(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FnmaddS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseFma)
+ {
+ EmitScalarTernaryOpF32(context, Intrinsic.X86Vfnmsub231ss, Intrinsic.X86Vfnmsub231sd);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd, isNegD: true);
+ }
+ else
+ {
+ EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPNegMulAdd), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Vfnms_S(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FnmsubS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseFma)
+ {
+ EmitScalarTernaryOpF32(context, Intrinsic.X86Vfmsub231ss, Intrinsic.X86Vfmsub231sd);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd, isNegD: true);
+ }
+ else
+ {
+ EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPNegMulSub), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Vhadd(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ if (op.U)
+ {
+ EmitVectorBinaryOpZx32(context, (op1, op2) => context.ShiftRightUI(context.Add(op1, op2), Const(1)));
+ }
+ else
+ {
+ EmitVectorBinaryOpSx32(context, (op1, op2) => context.ShiftRightSI(context.Add(op1, op2), Const(1)));
+ }
+ }
+
+ public static void Vmov_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarUnaryOpF32(context, 0, 0);
+ }
+ else
+ {
+ EmitScalarUnaryOpF32(context, (op1) => op1);
+ }
+ }
+
+ public static void Vmovn(ArmEmitterContext context)
+ {
+ EmitVectorUnaryNarrowOp32(context, (op1) => op1);
+ }
+
+ public static void Vneg_S(ArmEmitterContext context)
+ {
+ OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
+
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FnegS);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitScalarUnaryOpSimd32(context, (m) =>
+ {
+ if ((op.Size & 1) == 0)
+ {
+ Operand mask = X86GetScalar(context, -0f);
+ return context.AddIntrinsic(Intrinsic.X86Xorps, mask, m);
+ }
+ else
+ {
+ Operand mask = X86GetScalar(context, -0d);
+ return context.AddIntrinsic(Intrinsic.X86Xorpd, mask, m);
+ }
+ });
+ }
+ else
+ {
+ EmitScalarUnaryOpF32(context, (op1) => context.Negate(op1));
+ }
+ }
+
+ public static void Vnmul_S(ArmEmitterContext context)
+ {
+ OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FnmulS);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitScalarBinaryOpSimd32(context, (n, m) =>
+ {
+ if ((op.Size & 1) == 0)
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
+ Operand mask = X86GetScalar(context, -0f);
+ return context.AddIntrinsic(Intrinsic.X86Xorps, mask, res);
+ }
+ else
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
+ Operand mask = X86GetScalar(context, -0d);
+ return context.AddIntrinsic(Intrinsic.X86Xorpd, mask, res);
+ }
+ });
+ }
+ else
+ {
+ EmitScalarBinaryOpF32(context, (op1, op2) => context.Negate(context.Multiply(op1, op2)));
+ }
+ }
+
+ public static void Vnmla_S(ArmEmitterContext context)
+ {
+ OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FnmaddS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd, isNegD: true);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ return context.Subtract(context.Negate(op1), context.Multiply(op2, op3));
+ });
+ }
+ else
+ {
+ EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op2, op3);
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub), context.Negate(op1), res);
+ });
+ }
+ }
+
+ public static void Vnmls_S(ArmEmitterContext context)
+ {
+ OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FnmsubS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd, isNegD: true);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ return context.Add(context.Negate(op1), context.Multiply(op2, op3));
+ });
+ }
+ else
+ {
+ EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op2, op3);
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), context.Negate(op1), res);
+ });
+ }
+ }
+
+ public static void Vneg_V(ArmEmitterContext context)
+ {
+ OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp;
+
+ if (op.F)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FnegV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorUnaryOpSimd32(context, (m) =>
+ {
+ if ((op.Size & 1) == 0)
+ {
+ Operand mask = X86GetAllElements(context, -0f);
+ return context.AddIntrinsic(Intrinsic.X86Xorps, mask, m);
+ }
+ else
+ {
+ Operand mask = X86GetAllElements(context, -0d);
+ return context.AddIntrinsic(Intrinsic.X86Xorpd, mask, m);
+ }
+ });
+ }
+ else
+ {
+ EmitVectorUnaryOpF32(context, (op1) => context.Negate(op1));
+ }
+ }
+ else
+ {
+ EmitVectorUnaryOpSx32(context, (op1) => context.Negate(op1));
+ }
+ }
+
+ public static void Vdiv_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FdivS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarBinaryOpF32(context, Intrinsic.X86Divss, Intrinsic.X86Divsd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitScalarBinaryOpF32(context, (op1, op2) => context.Divide(op1, op2));
+ }
+ else
+ {
+ EmitScalarBinaryOpF32(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPDiv), op1, op2);
+ });
+ }
+ }
+
+ public static void Vmaxnm_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FmaxnmS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse41MaxMinNumOpF32(context, true, true);
+ }
+ else
+ {
+ EmitScalarBinaryOpF32(context, (op1, op2) => EmitSoftFloatCall(context, nameof(SoftFloat32.FPMaxNum), op1, op2));
+ }
+ }
+
+ public static void Vmaxnm_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FmaxnmV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse41MaxMinNumOpF32(context, true, false);
+ }
+ else
+ {
+ EmitVectorBinaryOpSx32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMaxNumFpscr), op1, op2));
+ }
+ }
+
+ public static void Vminnm_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FminnmS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse41MaxMinNumOpF32(context, false, true);
+ }
+ else
+ {
+ EmitScalarBinaryOpF32(context, (op1, op2) => EmitSoftFloatCall(context, nameof(SoftFloat32.FPMinNum), op1, op2));
+ }
+ }
+
+ public static void Vminnm_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FminnmV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse41MaxMinNumOpF32(context, false, false);
+ }
+ else
+ {
+ EmitVectorBinaryOpSx32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMinNumFpscr), op1, op2));
+ }
+ }
+
+ public static void Vmax_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FmaxV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpF32(context, Intrinsic.X86Maxps, Intrinsic.X86Maxpd);
+ }
+ else
+ {
+ EmitVectorBinaryOpF32(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMaxFpscr), op1, op2);
+ });
+ }
+ }
+
+ public static void Vmax_I(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ if (op.U)
+ {
+ if (Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpSimd32(context, (op1, op2) => context.AddIntrinsic(X86PmaxuInstruction[op.Size], op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpZx32(context, (op1, op2) => context.ConditionalSelect(context.ICompareGreaterUI(op1, op2), op1, op2));
+ }
+ }
+ else
+ {
+ if (Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpSimd32(context, (op1, op2) => context.AddIntrinsic(X86PmaxsInstruction[op.Size], op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpSx32(context, (op1, op2) => context.ConditionalSelect(context.ICompareGreater(op1, op2), op1, op2));
+ }
+ }
+ }
+
+ public static void Vmin_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FminV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpF32(context, Intrinsic.X86Minps, Intrinsic.X86Minpd);
+ }
+ else
+ {
+ EmitVectorBinaryOpF32(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMinFpscr), op1, op2);
+ });
+ }
+ }
+
+ public static void Vmin_I(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ if (op.U)
+ {
+ if (Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpSimd32(context, (op1, op2) => context.AddIntrinsic(X86PminuInstruction[op.Size], op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpZx32(context, (op1, op2) => context.ConditionalSelect(context.ICompareLessUI(op1, op2), op1, op2));
+ }
+ }
+ else
+ {
+ if (Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpSimd32(context, (op1, op2) => context.AddIntrinsic(X86PminsInstruction[op.Size], op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpSx32(context, (op1, op2) => context.ConditionalSelect(context.ICompareLess(op1, op2), op1, op2));
+ }
+ }
+ }
+
+ public static void Vmla_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FmaddS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, context.Multiply(op2, op3));
+ });
+ }
+ else
+ {
+ EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op2, op3);
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), op1, res);
+ });
+ }
+ }
+
+ public static void Vmla_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorTernaryOpF32(context, Intrinsic.Arm64FmlaV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorTernaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Addps, Intrinsic.X86Addpd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitVectorTernaryOpF32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3)));
+ }
+ else
+ {
+ EmitVectorTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMulAddFpscr), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Vmla_I(ArmEmitterContext context)
+ {
+ EmitVectorTernaryOpZx32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3)));
+ }
+
+ public static void Vmla_1(ArmEmitterContext context)
+ {
+ OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
+
+ if (op.F)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorsByScalarOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Addps, Intrinsic.X86Addpd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitVectorsByScalarOpF32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3)));
+ }
+ else
+ {
+ EmitVectorsByScalarOpF32(context, (op1, op2, op3) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMulAddFpscr), op1, op2, op3));
+ }
+ }
+ else
+ {
+ EmitVectorsByScalarOpI32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3)), false);
+ }
+ }
+
+ public static void Vmlal_I(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ EmitVectorTernaryLongOpI32(context, (d, n, m) => context.Add(d, context.Multiply(n, m)), !op.U);
+ }
+
+ public static void Vmls_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FmlsV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ return context.Subtract(op1, context.Multiply(op2, op3));
+ });
+ }
+ else
+ {
+ EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op2, op3);
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub), op1, res);
+ });
+ }
+ }
+
+ public static void Vmls_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorTernaryOpF32(context, Intrinsic.Arm64FmlsV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorTernaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Subps, Intrinsic.X86Subpd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitVectorTernaryOpF32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3)));
+ }
+ else
+ {
+ EmitVectorTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMulSubFpscr), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Vmls_I(ArmEmitterContext context)
+ {
+ EmitVectorTernaryOpZx32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3)));
+ }
+
+ public static void Vmls_1(ArmEmitterContext context)
+ {
+ OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
+
+ if (op.F)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorsByScalarOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Subps, Intrinsic.X86Subpd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitVectorsByScalarOpF32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3)));
+ }
+ else
+ {
+ EmitVectorsByScalarOpF32(context, (op1, op2, op3) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMulSubFpscr), op1, op2, op3));
+ }
+ }
+ else
+ {
+ EmitVectorsByScalarOpI32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3)), false);
+ }
+ }
+
+ public static void Vmlsl_I(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ EmitVectorTernaryLongOpI32(context, (opD, op1, op2) => context.Subtract(opD, context.Multiply(op1, op2)), !op.U);
+ }
+
+ public static void Vmul_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FmulS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarBinaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitScalarBinaryOpF32(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ else
+ {
+ EmitScalarBinaryOpF32(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op1, op2);
+ });
+ }
+ }
+
+ public static void Vmul_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FmulV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitVectorBinaryOpF32(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpF32(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMulFpscr), op1, op2);
+ });
+ }
+ }
+
+ public static void Vmul_I(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ if (op.U) // This instruction is always signed, U indicates polynomial mode.
+ {
+ EmitVectorBinaryOpZx32(context, (op1, op2) => EmitPolynomialMultiply(context, op1, op2, 8 << op.Size));
+ }
+ else
+ {
+ EmitVectorBinaryOpSx32(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ }
+
+ public static void Vmul_1(ArmEmitterContext context)
+ {
+ OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
+
+ if (op.F)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorByScalarOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitVectorByScalarOpF32(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ else
+ {
+ EmitVectorByScalarOpF32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMulFpscr), op1, op2));
+ }
+ }
+ else
+ {
+ EmitVectorByScalarOpI32(context, (op1, op2) => context.Multiply(op1, op2), false);
+ }
+ }
+
+ public static void Vmull_1(ArmEmitterContext context)
+ {
+ OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
+
+ EmitVectorByScalarLongOpI32(context, (op1, op2) => context.Multiply(op1, op2), !op.U);
+ }
+
+ public static void Vmull_I(ArmEmitterContext context)
+ {
+ OpCode32SimdRegLong op = (OpCode32SimdRegLong)context.CurrOp;
+
+ if (op.Polynomial)
+ {
+ if (op.Size == 0) // P8
+ {
+ EmitVectorBinaryLongOpI32(context, (op1, op2) => EmitPolynomialMultiply(context, op1, op2, 8 << op.Size), false);
+ }
+ else /* if (op.Size == 2) // P64 */
+ {
+ Operand ne = context.VectorExtract(OperandType.I64, GetVec(op.Qn), op.Vn & 1);
+ Operand me = context.VectorExtract(OperandType.I64, GetVec(op.Qm), op.Vm & 1);
+
+ Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.PolynomialMult64_128)), ne, me);
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+ }
+ else
+ {
+ EmitVectorBinaryLongOpI32(context, (op1, op2) => context.Multiply(op1, op2), !op.U);
+ }
+ }
+
+ public static void Vpadd_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorPairwiseOpF32(context, Intrinsic.Arm64FaddpV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2VectorPairwiseOpF32(context, Intrinsic.X86Addps);
+ }
+ else
+ {
+ EmitVectorPairwiseOpF32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPAddFpscr), op1, op2));
+ }
+ }
+
+ public static void Vpadd_I(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ if (Optimizations.UseSsse3)
+ {
+ EmitSsse3VectorPairwiseOp32(context, X86PaddInstruction);
+ }
+ else
+ {
+ EmitVectorPairwiseOpI32(context, (op1, op2) => context.Add(op1, op2), !op.U);
+ }
+ }
+
+ public static void Vpaddl(ArmEmitterContext context)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ EmitVectorPairwiseLongOpI32(context, (op1, op2) => context.Add(op1, op2), (op.Opc & 1) == 0);
+ }
+
+ public static void Vpmax_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorPairwiseOpF32(context, Intrinsic.Arm64FmaxpV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2VectorPairwiseOpF32(context, Intrinsic.X86Maxps);
+ }
+ else
+ {
+ EmitVectorPairwiseOpF32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat64.FPMaxFpscr), op1, op2));
+ }
+ }
+
+ public static void Vpmax_I(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ if (Optimizations.UseSsse3)
+ {
+ EmitSsse3VectorPairwiseOp32(context, op.U ? X86PmaxuInstruction : X86PmaxsInstruction);
+ }
+ else
+ {
+ EmitVectorPairwiseOpI32(context, (op1, op2) =>
+ {
+ Operand greater = op.U ? context.ICompareGreaterUI(op1, op2) : context.ICompareGreater(op1, op2);
+ return context.ConditionalSelect(greater, op1, op2);
+ }, !op.U);
+ }
+ }
+
+ public static void Vpmin_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorPairwiseOpF32(context, Intrinsic.Arm64FminpV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2VectorPairwiseOpF32(context, Intrinsic.X86Minps);
+ }
+ else
+ {
+ EmitVectorPairwiseOpF32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMinFpscr), op1, op2));
+ }
+ }
+
+ public static void Vpmin_I(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ if (Optimizations.UseSsse3)
+ {
+ EmitSsse3VectorPairwiseOp32(context, op.U ? X86PminuInstruction : X86PminsInstruction);
+ }
+ else
+ {
+ EmitVectorPairwiseOpI32(context, (op1, op2) =>
+ {
+ Operand greater = op.U ? context.ICompareLessUI(op1, op2) : context.ICompareLess(op1, op2);
+ return context.ConditionalSelect(greater, op1, op2);
+ }, !op.U);
+ }
+ }
+
+ public static void Vqadd(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ EmitSaturatingAddSubBinaryOp(context, add: true, !op.U);
+ }
+
+ public static void Vqdmulh(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+ int eSize = 8 << op.Size;
+
+ EmitVectorBinaryOpI32(context, (op1, op2) =>
+ {
+ if (op.Size == 2)
+ {
+ op1 = context.SignExtend32(OperandType.I64, op1);
+ op2 = context.SignExtend32(OperandType.I64, op2);
+ }
+
+ Operand res = context.Multiply(op1, op2);
+ res = context.ShiftRightSI(res, Const(eSize - 1));
+ res = EmitSatQ(context, res, eSize, signedSrc: true, signedDst: true);
+
+ if (op.Size == 2)
+ {
+ res = context.ConvertI64ToI32(res);
+ }
+
+ return res;
+ }, signed: true);
+ }
+
+ public static void Vqmovn(ArmEmitterContext context)
+ {
+ OpCode32SimdMovn op = (OpCode32SimdMovn)context.CurrOp;
+
+ bool signed = !op.Q;
+
+ EmitVectorUnaryNarrowOp32(context, (op1) => EmitSatQ(context, op1, 8 << op.Size, signed, signed), signed);
+ }
+
+ public static void Vqmovun(ArmEmitterContext context)
+ {
+ OpCode32SimdMovn op = (OpCode32SimdMovn)context.CurrOp;
+
+ EmitVectorUnaryNarrowOp32(context, (op1) => EmitSatQ(context, op1, 8 << op.Size, signedSrc: true, signedDst: false), signed: true);
+ }
+
+ public static void Vqsub(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ EmitSaturatingAddSubBinaryOp(context, add: false, !op.U);
+ }
+
+ public static void Vrev(ArmEmitterContext context)
+ {
+ OpCode32SimdRev op = (OpCode32SimdRev)context.CurrOp;
+
+ if (Optimizations.UseSsse3)
+ {
+ EmitVectorUnaryOpSimd32(context, (op1) =>
+ {
+ Operand mask;
+ switch (op.Size)
+ {
+ case 3:
+ // Rev64
+ switch (op.Opc)
+ {
+ case 0:
+ mask = X86GetElements(context, 0x08090a0b0c0d0e0fL, 0x0001020304050607L);
+ return context.AddIntrinsic(Intrinsic.X86Pshufb, op1, mask);
+ case 1:
+ mask = X86GetElements(context, 0x09080b0a0d0c0f0eL, 0x0100030205040706L);
+ return context.AddIntrinsic(Intrinsic.X86Pshufb, op1, mask);
+ case 2:
+ return context.AddIntrinsic(Intrinsic.X86Shufps, op1, op1, Const(1 | (0 << 2) | (3 << 4) | (2 << 6)));
+ }
+ break;
+ case 2:
+ // Rev32
+ switch (op.Opc)
+ {
+ case 0:
+ mask = X86GetElements(context, 0x0c0d0e0f_08090a0bL, 0x04050607_00010203L);
+ return context.AddIntrinsic(Intrinsic.X86Pshufb, op1, mask);
+ case 1:
+ mask = X86GetElements(context, 0x0d0c0f0e_09080b0aL, 0x05040706_01000302L);
+ return context.AddIntrinsic(Intrinsic.X86Pshufb, op1, mask);
+ }
+ break;
+ case 1:
+ // Rev16
+ mask = X86GetElements(context, 0x0e0f_0c0d_0a0b_0809L, 0x_0607_0405_0203_0001L);
+ return context.AddIntrinsic(Intrinsic.X86Pshufb, op1, mask);
+ }
+
+ throw new InvalidOperationException("Invalid VREV Opcode + Size combo."); // Should be unreachable.
+ });
+ }
+ else
+ {
+ EmitVectorUnaryOpZx32(context, (op1) =>
+ {
+ switch (op.Opc)
+ {
+ case 0:
+ switch (op.Size) // Swap bytes.
+ {
+ case 1:
+ return InstEmitAluHelper.EmitReverseBytes16_32Op(context, op1);
+ case 2:
+ case 3:
+ return context.ByteSwap(op1);
+ }
+ break;
+ case 1:
+ switch (op.Size)
+ {
+ case 2:
+ return context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op1, Const(0xffff0000)), Const(16)),
+ context.ShiftLeft(context.BitwiseAnd(op1, Const(0x0000ffff)), Const(16)));
+ case 3:
+ return context.BitwiseOr(
+ context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op1, Const(0xffff000000000000ul)), Const(48)),
+ context.ShiftLeft(context.BitwiseAnd(op1, Const(0x000000000000fffful)), Const(48))),
+ context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op1, Const(0x0000ffff00000000ul)), Const(16)),
+ context.ShiftLeft(context.BitwiseAnd(op1, Const(0x00000000ffff0000ul)), Const(16))));
+ }
+ break;
+ case 2:
+ // Swap upper and lower halves.
+ return context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op1, Const(0xffffffff00000000ul)), Const(32)),
+ context.ShiftLeft(context.BitwiseAnd(op1, Const(0x00000000fffffffful)), Const(32)));
+ }
+
+ throw new InvalidOperationException("Invalid VREV Opcode + Size combo."); // Should be unreachable.
+ });
+ }
+ }
+
+ public static void Vrecpe(ArmEmitterContext context)
+ {
+ OpCode32SimdSqrte op = (OpCode32SimdSqrte)context.CurrOp;
+
+ if (op.F)
+ {
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrecpeV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2 && sizeF == 0)
+ {
+ EmitVectorUnaryOpF32(context, Intrinsic.X86Rcpps, 0);
+ }
+ else
+ {
+ EmitVectorUnaryOpF32(context, (op1) =>
+ {
+ return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPRecipEstimateFpscr), op1);
+ });
+ }
+ }
+ else
+ {
+ throw new NotImplementedException("Integer Vrecpe not currently implemented.");
+ }
+ }
+
+ public static void Vrecps(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FrecpsV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+ bool single = (op.Size & 1) == 0;
+
+ // (2 - (n*m))
+ EmitVectorBinaryOpSimd32(context, (n, m) =>
+ {
+ if (single)
+ {
+ Operand maskTwo = X86GetAllElements(context, 2f);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m);
+
+ return context.AddIntrinsic(Intrinsic.X86Subps, maskTwo, res);
+ }
+ else
+ {
+ Operand maskTwo = X86GetAllElements(context, 2d);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m);
+
+ return context.AddIntrinsic(Intrinsic.X86Subpd, maskTwo, res);
+ }
+ });
+ }
+ else
+ {
+ EmitVectorBinaryOpF32(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecipStep), op1, op2);
+ });
+ }
+ }
+
+ public static void Vrhadd(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ EmitVectorBinaryOpI32(context, (op1, op2) =>
+ {
+ if (op.Size == 2)
+ {
+ op1 = context.ZeroExtend32(OperandType.I64, op1);
+ op2 = context.ZeroExtend32(OperandType.I64, op2);
+ }
+
+ Operand res = context.Add(context.Add(op1, op2), Const(op1.Type, 1L));
+ res = context.ShiftRightUI(res, Const(1));
+
+ if (op.Size == 2)
+ {
+ res = context.ConvertI64ToI32(res);
+ }
+
+ return res;
+ }, !op.U);
+ }
+
+ public static void Vrsqrte(ArmEmitterContext context)
+ {
+ OpCode32SimdSqrte op = (OpCode32SimdSqrte)context.CurrOp;
+
+ if (op.F)
+ {
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrsqrteV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2 && sizeF == 0)
+ {
+ EmitVectorUnaryOpF32(context, Intrinsic.X86Rsqrtps, 0);
+ }
+ else
+ {
+ EmitVectorUnaryOpF32(context, (op1) =>
+ {
+ return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPRSqrtEstimateFpscr), op1);
+ });
+ }
+ }
+ else
+ {
+ throw new NotImplementedException("Integer Vrsqrte not currently implemented.");
+ }
+ }
+
+ public static void Vrsqrts(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FrsqrtsV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+ bool single = (op.Size & 1) == 0;
+
+ // (3 - (n*m)) / 2
+ EmitVectorBinaryOpSimd32(context, (n, m) =>
+ {
+ if (single)
+ {
+ Operand maskHalf = X86GetAllElements(context, 0.5f);
+ Operand maskThree = X86GetAllElements(context, 3f);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m);
+
+ res = context.AddIntrinsic(Intrinsic.X86Subps, maskThree, res);
+ return context.AddIntrinsic(Intrinsic.X86Mulps, maskHalf, res);
+ }
+ else
+ {
+ Operand maskHalf = X86GetAllElements(context, 0.5d);
+ Operand maskThree = X86GetAllElements(context, 3d);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m);
+
+ res = context.AddIntrinsic(Intrinsic.X86Subpd, maskThree, res);
+ return context.AddIntrinsic(Intrinsic.X86Mulpd, maskHalf, res);
+ }
+ });
+ }
+ else
+ {
+ EmitVectorBinaryOpF32(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRSqrtStep), op1, op2);
+ });
+ }
+ }
+
+ public static void Vsel(ArmEmitterContext context)
+ {
+ OpCode32SimdSel op = (OpCode32SimdSel)context.CurrOp;
+
+ Operand condition = default;
+
+ switch (op.Cc)
+ {
+ case OpCode32SimdSelMode.Eq:
+ condition = GetCondTrue(context, Condition.Eq);
+ break;
+ case OpCode32SimdSelMode.Ge:
+ condition = GetCondTrue(context, Condition.Ge);
+ break;
+ case OpCode32SimdSelMode.Gt:
+ condition = GetCondTrue(context, Condition.Gt);
+ break;
+ case OpCode32SimdSelMode.Vs:
+ condition = GetCondTrue(context, Condition.Vs);
+ break;
+ }
+
+ EmitScalarBinaryOpI32(context, (op1, op2) =>
+ {
+ return context.ConditionalSelect(condition, op1, op2);
+ });
+ }
+
+ public static void Vsqrt_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FsqrtS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarUnaryOpF32(context, Intrinsic.X86Sqrtss, Intrinsic.X86Sqrtsd);
+ }
+ else
+ {
+ EmitScalarUnaryOpF32(context, (op1) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPSqrt), op1);
+ });
+ }
+ }
+
+ public static void Vsub_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FsubS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarBinaryOpF32(context, Intrinsic.X86Subss, Intrinsic.X86Subsd);
+ }
+ else
+ {
+ EmitScalarBinaryOpF32(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ }
+
+ public static void Vsub_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FsubV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpF32(context, Intrinsic.X86Subps, Intrinsic.X86Subpd);
+ }
+ else
+ {
+ EmitVectorBinaryOpF32(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ }
+
+ public static void Vsub_I(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+ EmitVectorBinaryOpSimd32(context, (op1, op2) => context.AddIntrinsic(X86PsubInstruction[op.Size], op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpZx32(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ }
+
+ public static void Vsubl_I(ArmEmitterContext context)
+ {
+ OpCode32SimdRegLong op = (OpCode32SimdRegLong)context.CurrOp;
+
+ EmitVectorBinaryLongOpI32(context, (op1, op2) => context.Subtract(op1, op2), !op.U);
+ }
+
+ public static void Vsubw_I(ArmEmitterContext context)
+ {
+ OpCode32SimdRegWide op = (OpCode32SimdRegWide)context.CurrOp;
+
+ EmitVectorBinaryWideOpI32(context, (op1, op2) => context.Subtract(op1, op2), !op.U);
+ }
+
+ private static void EmitSaturatingAddSubBinaryOp(ArmEmitterContext context, bool add, bool signed)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ EmitVectorBinaryOpI32(context, (ne, me) =>
+ {
+ if (op.Size <= 2)
+ {
+ if (op.Size == 2)
+ {
+ ne = signed ? context.SignExtend32(OperandType.I64, ne) : context.ZeroExtend32(OperandType.I64, ne);
+ me = signed ? context.SignExtend32(OperandType.I64, me) : context.ZeroExtend32(OperandType.I64, me);
+ }
+
+ Operand res = add ? context.Add(ne, me) : context.Subtract(ne, me);
+
+ res = EmitSatQ(context, res, 8 << op.Size, signedSrc: true, signed);
+
+ if (op.Size == 2)
+ {
+ res = context.ConvertI64ToI32(res);
+ }
+
+ return res;
+ }
+ else if (add) /* if (op.Size == 3) */
+ {
+ return signed
+ ? EmitBinarySignedSatQAdd(context, ne, me)
+ : EmitBinaryUnsignedSatQAdd(context, ne, me);
+ }
+ else /* if (sub) */
+ {
+ return signed
+ ? EmitBinarySignedSatQSub(context, ne, me)
+ : EmitBinaryUnsignedSatQSub(context, ne, me);
+ }
+ }, signed);
+ }
+
+ private static void EmitSse41MaxMinNumOpF32(ArmEmitterContext context, bool isMaxNum, bool scalar)
+ {
+ IOpCode32Simd op = (IOpCode32Simd)context.CurrOp;
+
+ Func<Operand, Operand, Operand> genericEmit = (n, m) =>
+ {
+ Operand nNum = context.Copy(n);
+ Operand mNum = context.Copy(m);
+
+ InstEmit.EmitSse2VectorIsNaNOpF(context, nNum, out Operand nQNaNMask, out _, isQNaN: true);
+ InstEmit.EmitSse2VectorIsNaNOpF(context, mNum, out Operand mQNaNMask, out _, isQNaN: true);
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand negInfMask = X86GetAllElements(context, isMaxNum ? float.NegativeInfinity : float.PositiveInfinity);
+
+ Operand nMask = context.AddIntrinsic(Intrinsic.X86Andnps, mQNaNMask, nQNaNMask);
+ Operand mMask = context.AddIntrinsic(Intrinsic.X86Andnps, nQNaNMask, mQNaNMask);
+
+ nNum = context.AddIntrinsic(Intrinsic.X86Blendvps, nNum, negInfMask, nMask);
+ mNum = context.AddIntrinsic(Intrinsic.X86Blendvps, mNum, negInfMask, mMask);
+
+ return context.AddIntrinsic(isMaxNum ? Intrinsic.X86Maxps : Intrinsic.X86Minps, nNum, mNum);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand negInfMask = X86GetAllElements(context, isMaxNum ? double.NegativeInfinity : double.PositiveInfinity);
+
+ Operand nMask = context.AddIntrinsic(Intrinsic.X86Andnpd, mQNaNMask, nQNaNMask);
+ Operand mMask = context.AddIntrinsic(Intrinsic.X86Andnpd, nQNaNMask, mQNaNMask);
+
+ nNum = context.AddIntrinsic(Intrinsic.X86Blendvpd, nNum, negInfMask, nMask);
+ mNum = context.AddIntrinsic(Intrinsic.X86Blendvpd, mNum, negInfMask, mMask);
+
+ return context.AddIntrinsic(isMaxNum ? Intrinsic.X86Maxpd : Intrinsic.X86Minpd, nNum, mNum);
+ }
+ };
+
+ if (scalar)
+ {
+ EmitScalarBinaryOpSimd32(context, genericEmit);
+ }
+ else
+ {
+ EmitVectorBinaryOpSimd32(context, genericEmit);
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdCmp.cs b/src/ARMeilleure/Instructions/InstEmitSimdCmp.cs
new file mode 100644
index 00000000..c32b64ba
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdCmp.cs
@@ -0,0 +1,799 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ using Func2I = Func<Operand, Operand, Operand>;
+
+ static partial class InstEmit
+ {
+ public static void Cmeq_S(ArmEmitterContext context)
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareEqual(op1, op2), scalar: true);
+ }
+
+ public static void Cmeq_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m;
+
+ if (op is OpCodeSimdReg binOp)
+ {
+ m = GetVec(binOp.Rm);
+ }
+ else
+ {
+ m = context.VectorZero();
+ }
+
+ Intrinsic cmpInst = X86PcmpeqInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(cmpInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareEqual(op1, op2), scalar: false);
+ }
+ }
+
+ public static void Cmge_S(ArmEmitterContext context)
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareGreaterOrEqual(op1, op2), scalar: true);
+ }
+
+ public static void Cmge_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse42)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m;
+
+ if (op is OpCodeSimdReg binOp)
+ {
+ m = GetVec(binOp.Rm);
+ }
+ else
+ {
+ m = context.VectorZero();
+ }
+
+ Intrinsic cmpInst = X86PcmpgtInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(cmpInst, m, n);
+
+ Operand mask = X86GetAllElements(context, -1L);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pandn, res, mask);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareGreaterOrEqual(op1, op2), scalar: false);
+ }
+ }
+
+ public static void Cmgt_S(ArmEmitterContext context)
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareGreater(op1, op2), scalar: true);
+ }
+
+ public static void Cmgt_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse42)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m;
+
+ if (op is OpCodeSimdReg binOp)
+ {
+ m = GetVec(binOp.Rm);
+ }
+ else
+ {
+ m = context.VectorZero();
+ }
+
+ Intrinsic cmpInst = X86PcmpgtInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(cmpInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareGreater(op1, op2), scalar: false);
+ }
+ }
+
+ public static void Cmhi_S(ArmEmitterContext context)
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareGreaterUI(op1, op2), scalar: true);
+ }
+
+ public static void Cmhi_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse41 && op.Size < 3)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic maxInst = X86PmaxuInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(maxInst, m, n);
+
+ Intrinsic cmpInst = X86PcmpeqInstruction[op.Size];
+
+ res = context.AddIntrinsic(cmpInst, res, m);
+
+ Operand mask = X86GetAllElements(context, -1L);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pandn, res, mask);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareGreaterUI(op1, op2), scalar: false);
+ }
+ }
+
+ public static void Cmhs_S(ArmEmitterContext context)
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareGreaterOrEqualUI(op1, op2), scalar: true);
+ }
+
+ public static void Cmhs_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse41 && op.Size < 3)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic maxInst = X86PmaxuInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(maxInst, n, m);
+
+ Intrinsic cmpInst = X86PcmpeqInstruction[op.Size];
+
+ res = context.AddIntrinsic(cmpInst, res, n);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareGreaterOrEqualUI(op1, op2), scalar: false);
+ }
+ }
+
+ public static void Cmle_S(ArmEmitterContext context)
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareLessOrEqual(op1, op2), scalar: true);
+ }
+
+ public static void Cmle_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse42)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic cmpInst = X86PcmpgtInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(cmpInst, n, context.VectorZero());
+
+ Operand mask = X86GetAllElements(context, -1L);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pandn, res, mask);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareLessOrEqual(op1, op2), scalar: false);
+ }
+ }
+
+ public static void Cmlt_S(ArmEmitterContext context)
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareLess(op1, op2), scalar: true);
+ }
+
+ public static void Cmlt_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse42)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic cmpInst = X86PcmpgtInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(cmpInst, context.VectorZero(), n);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareLess(op1, op2), scalar: false);
+ }
+ }
+
+ public static void Cmtst_S(ArmEmitterContext context)
+ {
+ EmitCmtstOp(context, scalar: true);
+ }
+
+ public static void Cmtst_V(ArmEmitterContext context)
+ {
+ EmitCmtstOp(context, scalar: false);
+ }
+
+ public static void Facge_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAvx)
+ {
+ EmitSse2OrAvxCmpOpF(context, CmpCondition.GreaterThanOrEqual, scalar: true, absolute: true);
+ }
+ else
+ {
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareGE), scalar: true, absolute: true);
+ }
+ }
+
+ public static void Facge_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAvx)
+ {
+ EmitSse2OrAvxCmpOpF(context, CmpCondition.GreaterThanOrEqual, scalar: false, absolute: true);
+ }
+ else
+ {
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareGE), scalar: false, absolute: true);
+ }
+ }
+
+ public static void Facgt_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAvx)
+ {
+ EmitSse2OrAvxCmpOpF(context, CmpCondition.GreaterThan, scalar: true, absolute: true);
+ }
+ else
+ {
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareGT), scalar: true, absolute: true);
+ }
+ }
+
+ public static void Facgt_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAvx)
+ {
+ EmitSse2OrAvxCmpOpF(context, CmpCondition.GreaterThan, scalar: false, absolute: true);
+ }
+ else
+ {
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareGT), scalar: false, absolute: true);
+ }
+ }
+
+ public static void Fccmp_S(ArmEmitterContext context)
+ {
+ EmitFccmpOrFccmpe(context, signalNaNs: false);
+ }
+
+ public static void Fccmpe_S(ArmEmitterContext context)
+ {
+ EmitFccmpOrFccmpe(context, signalNaNs: true);
+ }
+
+ public static void Fcmeq_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2OrAvxCmpOpF(context, CmpCondition.Equal, scalar: true);
+ }
+ else
+ {
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareEQ), scalar: true);
+ }
+ }
+
+ public static void Fcmeq_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2OrAvxCmpOpF(context, CmpCondition.Equal, scalar: false);
+ }
+ else
+ {
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareEQ), scalar: false);
+ }
+ }
+
+ public static void Fcmge_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAvx)
+ {
+ EmitSse2OrAvxCmpOpF(context, CmpCondition.GreaterThanOrEqual, scalar: true);
+ }
+ else
+ {
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareGE), scalar: true);
+ }
+ }
+
+ public static void Fcmge_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAvx)
+ {
+ EmitSse2OrAvxCmpOpF(context, CmpCondition.GreaterThanOrEqual, scalar: false);
+ }
+ else
+ {
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareGE), scalar: false);
+ }
+ }
+
+ public static void Fcmgt_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAvx)
+ {
+ EmitSse2OrAvxCmpOpF(context, CmpCondition.GreaterThan, scalar: true);
+ }
+ else
+ {
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareGT), scalar: true);
+ }
+ }
+
+ public static void Fcmgt_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAvx)
+ {
+ EmitSse2OrAvxCmpOpF(context, CmpCondition.GreaterThan, scalar: false);
+ }
+ else
+ {
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareGT), scalar: false);
+ }
+ }
+
+ public static void Fcmle_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2OrAvxCmpOpF(context, CmpCondition.LessThanOrEqual, scalar: true);
+ }
+ else
+ {
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareLE), scalar: true);
+ }
+ }
+
+ public static void Fcmle_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2OrAvxCmpOpF(context, CmpCondition.LessThanOrEqual, scalar: false);
+ }
+ else
+ {
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareLE), scalar: false);
+ }
+ }
+
+ public static void Fcmlt_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2OrAvxCmpOpF(context, CmpCondition.LessThan, scalar: true);
+ }
+ else
+ {
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareLT), scalar: true);
+ }
+ }
+
+ public static void Fcmlt_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2OrAvxCmpOpF(context, CmpCondition.LessThan, scalar: false);
+ }
+ else
+ {
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareLT), scalar: false);
+ }
+ }
+
+ public static void Fcmp_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitFcmpOrFcmpe(context, signalNaNs: false);
+ }
+ else
+ {
+ EmitFcmpOrFcmpe(context, signalNaNs: false);
+ }
+ }
+
+ public static void Fcmpe_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitFcmpOrFcmpe(context, signalNaNs: true);
+ }
+ else
+ {
+ EmitFcmpOrFcmpe(context, signalNaNs: true);
+ }
+ }
+
+ private static void EmitFccmpOrFccmpe(ArmEmitterContext context, bool signalNaNs)
+ {
+ OpCodeSimdFcond op = (OpCodeSimdFcond)context.CurrOp;
+
+ Operand lblTrue = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfTrue(lblTrue, InstEmitFlowHelper.GetCondTrue(context, op.Cond));
+
+ EmitSetNzcv(context, op.Nzcv);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblTrue);
+
+ EmitFcmpOrFcmpe(context, signalNaNs);
+
+ context.MarkLabel(lblEnd);
+ }
+
+ private static void EmitSetNzcv(ArmEmitterContext context, int nzcv)
+ {
+ Operand Extract(int value, int bit)
+ {
+ if (bit != 0)
+ {
+ value >>= bit;
+ }
+
+ value &= 1;
+
+ return Const(value);
+ }
+
+ SetFlag(context, PState.VFlag, Extract(nzcv, 0));
+ SetFlag(context, PState.CFlag, Extract(nzcv, 1));
+ SetFlag(context, PState.ZFlag, Extract(nzcv, 2));
+ SetFlag(context, PState.NFlag, Extract(nzcv, 3));
+ }
+
+ private static void EmitFcmpOrFcmpe(ArmEmitterContext context, bool signalNaNs)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ bool cmpWithZero = !(op is OpCodeSimdFcond) ? op.Bit3 : false;
+
+ if (Optimizations.FastFP && (signalNaNs ? Optimizations.UseAvx : Optimizations.UseSse2))
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = cmpWithZero ? context.VectorZero() : GetVec(op.Rm);
+
+ CmpCondition cmpOrdered = signalNaNs ? CmpCondition.OrderedS : CmpCondition.OrderedQ;
+
+ Operand lblNaN = Label();
+ Operand lblEnd = Label();
+
+ if (op.Size == 0)
+ {
+ Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpss, n, m, Const((int)cmpOrdered));
+
+ Operand isOrdered = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, ordMask);
+
+ context.BranchIfFalse(lblNaN, isOrdered);
+
+ Operand nCopy = context.Copy(n);
+ Operand mCopy = cmpWithZero ? context.VectorZero() : context.Copy(m);
+
+ Operand cf = context.AddIntrinsicInt(Intrinsic.X86Comissge, nCopy, mCopy);
+ Operand zf = context.AddIntrinsicInt(Intrinsic.X86Comisseq, nCopy, mCopy);
+ Operand nf = context.AddIntrinsicInt(Intrinsic.X86Comisslt, nCopy, mCopy);
+
+ SetFlag(context, PState.VFlag, Const(0));
+ SetFlag(context, PState.CFlag, cf);
+ SetFlag(context, PState.ZFlag, zf);
+ SetFlag(context, PState.NFlag, nf);
+ }
+ else /* if (op.Size == 1) */
+ {
+ Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, m, Const((int)cmpOrdered));
+
+ Operand isOrdered = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, ordMask);
+
+ context.BranchIfFalse(lblNaN, isOrdered);
+
+ Operand nCopy = context.Copy(n);
+ Operand mCopy = cmpWithZero ? context.VectorZero() : context.Copy(m);
+
+ Operand cf = context.AddIntrinsicInt(Intrinsic.X86Comisdge, nCopy, mCopy);
+ Operand zf = context.AddIntrinsicInt(Intrinsic.X86Comisdeq, nCopy, mCopy);
+ Operand nf = context.AddIntrinsicInt(Intrinsic.X86Comisdlt, nCopy, mCopy);
+
+ SetFlag(context, PState.VFlag, Const(0));
+ SetFlag(context, PState.CFlag, cf);
+ SetFlag(context, PState.ZFlag, zf);
+ SetFlag(context, PState.NFlag, nf);
+ }
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblNaN);
+
+ SetFlag(context, PState.VFlag, Const(1));
+ SetFlag(context, PState.CFlag, Const(1));
+ SetFlag(context, PState.ZFlag, Const(0));
+ SetFlag(context, PState.NFlag, Const(0));
+
+ context.MarkLabel(lblEnd);
+ }
+ else
+ {
+ OperandType type = op.Size != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
+ Operand me;
+
+ if (cmpWithZero)
+ {
+ me = op.Size == 0 ? ConstF(0f) : ConstF(0d);
+ }
+ else
+ {
+ me = context.VectorExtract(type, GetVec(op.Rm), 0);
+ }
+
+ Operand nzcv = EmitSoftFloatCall(context, nameof(SoftFloat32.FPCompare), ne, me, Const(signalNaNs));
+
+ EmitSetNzcv(context, nzcv);
+ }
+ }
+
+ private static void EmitSetNzcv(ArmEmitterContext context, Operand nzcv)
+ {
+ Operand Extract(Operand value, int bit)
+ {
+ if (bit != 0)
+ {
+ value = context.ShiftRightUI(value, Const(bit));
+ }
+
+ value = context.BitwiseAnd(value, Const(1));
+
+ return value;
+ }
+
+ SetFlag(context, PState.VFlag, Extract(nzcv, 0));
+ SetFlag(context, PState.CFlag, Extract(nzcv, 1));
+ SetFlag(context, PState.ZFlag, Extract(nzcv, 2));
+ SetFlag(context, PState.NFlag, Extract(nzcv, 3));
+ }
+
+ private static void EmitCmpOp(ArmEmitterContext context, Func2I emitCmp, bool scalar)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
+
+ ulong szMask = ulong.MaxValue >> (64 - (8 << op.Size));
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+ Operand me;
+
+ if (op is OpCodeSimdReg binOp)
+ {
+ me = EmitVectorExtractSx(context, binOp.Rm, index, op.Size);
+ }
+ else
+ {
+ me = Const(0L);
+ }
+
+ Operand isTrue = emitCmp(ne, me);
+
+ Operand mask = context.ConditionalSelect(isTrue, Const(szMask), Const(0L));
+
+ res = EmitVectorInsert(context, res, mask, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static void EmitCmtstOp(ArmEmitterContext context, bool scalar)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
+
+ ulong szMask = ulong.MaxValue >> (64 - (8 << op.Size));
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+ Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);
+
+ Operand test = context.BitwiseAnd(ne, me);
+
+ Operand isTrue = context.ICompareNotEqual(test, Const(0L));
+
+ Operand mask = context.ConditionalSelect(isTrue, Const(szMask), Const(0L));
+
+ res = EmitVectorInsert(context, res, mask, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static void EmitCmpOpF(ArmEmitterContext context, string name, bool scalar, bool absolute = false)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = !scalar ? op.GetBytesCount() >> sizeF + 2 : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
+ Operand me;
+
+ if (op is OpCodeSimdReg binOp)
+ {
+ me = context.VectorExtract(type, GetVec(binOp.Rm), index);
+ }
+ else
+ {
+ me = sizeF == 0 ? ConstF(0f) : ConstF(0d);
+ }
+
+ if (absolute)
+ {
+ ne = EmitUnaryMathCall(context, nameof(Math.Abs), ne);
+ me = EmitUnaryMathCall(context, nameof(Math.Abs), me);
+ }
+
+ Operand e = EmitSoftFloatCall(context, name, ne, me);
+
+ res = context.VectorInsert(res, e, index);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static void EmitSse2OrAvxCmpOpF(ArmEmitterContext context, CmpCondition cond, bool scalar, bool absolute = false)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = op is OpCodeSimdReg binOp ? GetVec(binOp.Rm) : context.VectorZero();
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ if (absolute)
+ {
+ Operand mask = scalar ? X86GetScalar(context, int.MaxValue) : X86GetAllElements(context, int.MaxValue);
+
+ n = context.AddIntrinsic(Intrinsic.X86Andps, n, mask);
+ m = context.AddIntrinsic(Intrinsic.X86Andps, m, mask);
+ }
+
+ Intrinsic inst = scalar ? Intrinsic.X86Cmpss : Intrinsic.X86Cmpps;
+
+ Operand res = context.AddIntrinsic(inst, n, m, Const((int)cond));
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper96(res);
+ }
+ else if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ if (absolute)
+ {
+ Operand mask = scalar ? X86GetScalar(context, long.MaxValue) : X86GetAllElements(context, long.MaxValue);
+
+ n = context.AddIntrinsic(Intrinsic.X86Andpd, n, mask);
+ m = context.AddIntrinsic(Intrinsic.X86Andpd, m, mask);
+ }
+
+ Intrinsic inst = scalar ? Intrinsic.X86Cmpsd : Intrinsic.X86Cmppd;
+
+ Operand res = context.AddIntrinsic(inst, n, m, Const((int)cond));
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdCmp32.cs b/src/ARMeilleure/Instructions/InstEmitSimdCmp32.cs
new file mode 100644
index 00000000..a990e057
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdCmp32.cs
@@ -0,0 +1,437 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper32;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ using Func2I = Func<Operand, Operand, Operand>;
+
+ static partial class InstEmit32
+ {
+ public static void Vceq_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.Equal, false);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2OrAvxCmpOpF32(context, CmpCondition.Equal, false);
+ }
+ else
+ {
+ EmitCmpOpF32(context, nameof(SoftFloat32.FPCompareEQFpscr), false);
+ }
+ }
+
+ public static void Vceq_I(ArmEmitterContext context)
+ {
+ EmitCmpOpI32(context, context.ICompareEqual, context.ICompareEqual, false, false);
+ }
+
+ public static void Vceq_Z(ArmEmitterContext context)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ if (op.F)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.Equal, true);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2OrAvxCmpOpF32(context, CmpCondition.Equal, true);
+ }
+ else
+ {
+ EmitCmpOpF32(context, nameof(SoftFloat32.FPCompareEQFpscr), true);
+ }
+ }
+ else
+ {
+ EmitCmpOpI32(context, context.ICompareEqual, context.ICompareEqual, true, false);
+ }
+ }
+
+ public static void Vcge_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.GreaterThanOrEqual, false);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseAvx)
+ {
+ EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThanOrEqual, false);
+ }
+ else
+ {
+ EmitCmpOpF32(context, nameof(SoftFloat32.FPCompareGEFpscr), false);
+ }
+ }
+
+ public static void Vcge_I(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ EmitCmpOpI32(context, context.ICompareGreaterOrEqual, context.ICompareGreaterOrEqualUI, false, !op.U);
+ }
+
+ public static void Vcge_Z(ArmEmitterContext context)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ if (op.F)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.GreaterThanOrEqual, true);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseAvx)
+ {
+ EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThanOrEqual, true);
+ }
+ else
+ {
+ EmitCmpOpF32(context, nameof(SoftFloat32.FPCompareGEFpscr), true);
+ }
+ }
+ else
+ {
+ EmitCmpOpI32(context, context.ICompareGreaterOrEqual, context.ICompareGreaterOrEqualUI, true, true);
+ }
+ }
+
+ public static void Vcgt_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.GreaterThan, false);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseAvx)
+ {
+ EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThan, false);
+ }
+ else
+ {
+ EmitCmpOpF32(context, nameof(SoftFloat32.FPCompareGTFpscr), false);
+ }
+ }
+
+ public static void Vcgt_I(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ EmitCmpOpI32(context, context.ICompareGreater, context.ICompareGreaterUI, false, !op.U);
+ }
+
+ public static void Vcgt_Z(ArmEmitterContext context)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ if (op.F)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.GreaterThan, true);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseAvx)
+ {
+ EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThan, true);
+ }
+ else
+ {
+ EmitCmpOpF32(context, nameof(SoftFloat32.FPCompareGTFpscr), true);
+ }
+ }
+ else
+ {
+ EmitCmpOpI32(context, context.ICompareGreater, context.ICompareGreaterUI, true, true);
+ }
+ }
+
+ public static void Vcle_Z(ArmEmitterContext context)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ if (op.F)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.LessThanOrEqual, true);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2OrAvxCmpOpF32(context, CmpCondition.LessThanOrEqual, true);
+ }
+ else
+ {
+ EmitCmpOpF32(context, nameof(SoftFloat32.FPCompareLEFpscr), true);
+ }
+ }
+ else
+ {
+ EmitCmpOpI32(context, context.ICompareLessOrEqual, context.ICompareLessOrEqualUI, true, true);
+ }
+ }
+
+ public static void Vclt_Z(ArmEmitterContext context)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ if (op.F)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.LessThan, true);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2OrAvxCmpOpF32(context, CmpCondition.LessThan, true);
+ }
+ else
+ {
+ EmitCmpOpF32(context, nameof(SoftFloat32.FPCompareLTFpscr), true);
+ }
+ }
+ else
+ {
+ EmitCmpOpI32(context, context.ICompareLess, context.ICompareLessUI, true, true);
+ }
+ }
+
+ private static void EmitCmpOpF32(ArmEmitterContext context, string name, bool zero)
+ {
+ if (zero)
+ {
+ EmitVectorUnaryOpF32(context, (m) =>
+ {
+ Operand zeroOp = m.Type == OperandType.FP64 ? ConstF(0.0d) : ConstF(0.0f);
+
+ return EmitSoftFloatCallDefaultFpscr(context, name, m, zeroOp);
+ });
+ }
+ else
+ {
+ EmitVectorBinaryOpF32(context, (n, m) =>
+ {
+ return EmitSoftFloatCallDefaultFpscr(context, name, n, m);
+ });
+ }
+ }
+
+ private static Operand ZerosOrOnes(ArmEmitterContext context, Operand fromBool, OperandType baseType)
+ {
+ var ones = (baseType == OperandType.I64) ? Const(-1L) : Const(-1);
+
+ return context.ConditionalSelect(fromBool, ones, Const(baseType, 0L));
+ }
+
+ private static void EmitCmpOpI32(
+ ArmEmitterContext context,
+ Func2I signedOp,
+ Func2I unsignedOp,
+ bool zero,
+ bool signed)
+ {
+ if (zero)
+ {
+ if (signed)
+ {
+ EmitVectorUnaryOpSx32(context, (m) =>
+ {
+ OperandType type = m.Type;
+ Operand zeroV = (type == OperandType.I64) ? Const(0L) : Const(0);
+
+ return ZerosOrOnes(context, signedOp(m, zeroV), type);
+ });
+ }
+ else
+ {
+ EmitVectorUnaryOpZx32(context, (m) =>
+ {
+ OperandType type = m.Type;
+ Operand zeroV = (type == OperandType.I64) ? Const(0L) : Const(0);
+
+ return ZerosOrOnes(context, unsignedOp(m, zeroV), type);
+ });
+ }
+ }
+ else
+ {
+ if (signed)
+ {
+ EmitVectorBinaryOpSx32(context, (n, m) => ZerosOrOnes(context, signedOp(n, m), n.Type));
+ }
+ else
+ {
+ EmitVectorBinaryOpZx32(context, (n, m) => ZerosOrOnes(context, unsignedOp(n, m), n.Type));
+ }
+ }
+ }
+
+ public static void Vcmp(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVcmpOrVcmpe(context, false);
+ }
+ else
+ {
+ EmitVcmpOrVcmpe(context, false);
+ }
+ }
+
+ public static void Vcmpe(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVcmpOrVcmpe(context, true);
+ }
+ else
+ {
+ EmitVcmpOrVcmpe(context, true);
+ }
+ }
+
+ private static void EmitVcmpOrVcmpe(ArmEmitterContext context, bool signalNaNs)
+ {
+ OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
+
+ bool cmpWithZero = (op.Opc & 2) != 0;
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.FastFP && (signalNaNs ? Optimizations.UseAvx : Optimizations.UseSse2))
+ {
+ CmpCondition cmpOrdered = signalNaNs ? CmpCondition.OrderedS : CmpCondition.OrderedQ;
+
+ bool doubleSize = sizeF != 0;
+ int shift = doubleSize ? 1 : 2;
+ Operand m = GetVecA32(op.Vm >> shift);
+ Operand n = GetVecA32(op.Vd >> shift);
+
+ n = EmitSwapScalar(context, n, op.Vd, doubleSize);
+ m = cmpWithZero ? context.VectorZero() : EmitSwapScalar(context, m, op.Vm, doubleSize);
+
+ Operand lblNaN = Label();
+ Operand lblEnd = Label();
+
+ if (!doubleSize)
+ {
+ Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpss, n, m, Const((int)cmpOrdered));
+
+ Operand isOrdered = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, ordMask);
+
+ context.BranchIfFalse(lblNaN, isOrdered);
+
+ Operand cf = context.AddIntrinsicInt(Intrinsic.X86Comissge, n, m);
+ Operand zf = context.AddIntrinsicInt(Intrinsic.X86Comisseq, n, m);
+ Operand nf = context.AddIntrinsicInt(Intrinsic.X86Comisslt, n, m);
+
+ SetFpFlag(context, FPState.VFlag, Const(0));
+ SetFpFlag(context, FPState.CFlag, cf);
+ SetFpFlag(context, FPState.ZFlag, zf);
+ SetFpFlag(context, FPState.NFlag, nf);
+ }
+ else
+ {
+ Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, m, Const((int)cmpOrdered));
+
+ Operand isOrdered = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, ordMask);
+
+ context.BranchIfFalse(lblNaN, isOrdered);
+
+ Operand cf = context.AddIntrinsicInt(Intrinsic.X86Comisdge, n, m);
+ Operand zf = context.AddIntrinsicInt(Intrinsic.X86Comisdeq, n, m);
+ Operand nf = context.AddIntrinsicInt(Intrinsic.X86Comisdlt, n, m);
+
+ SetFpFlag(context, FPState.VFlag, Const(0));
+ SetFpFlag(context, FPState.CFlag, cf);
+ SetFpFlag(context, FPState.ZFlag, zf);
+ SetFpFlag(context, FPState.NFlag, nf);
+ }
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblNaN);
+
+ SetFpFlag(context, FPState.VFlag, Const(1));
+ SetFpFlag(context, FPState.CFlag, Const(1));
+ SetFpFlag(context, FPState.ZFlag, Const(0));
+ SetFpFlag(context, FPState.NFlag, Const(0));
+
+ context.MarkLabel(lblEnd);
+ }
+ else
+ {
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ Operand ne = ExtractScalar(context, type, op.Vd);
+ Operand me;
+
+ if (cmpWithZero)
+ {
+ me = sizeF == 0 ? ConstF(0f) : ConstF(0d);
+ }
+ else
+ {
+ me = ExtractScalar(context, type, op.Vm);
+ }
+
+ Operand nzcv = EmitSoftFloatCall(context, nameof(SoftFloat32.FPCompare), ne, me, Const(signalNaNs));
+
+ EmitSetFpscrNzcv(context, nzcv);
+ }
+ }
+
+ private static void EmitSetFpscrNzcv(ArmEmitterContext context, Operand nzcv)
+ {
+ Operand Extract(Operand value, int bit)
+ {
+ if (bit != 0)
+ {
+ value = context.ShiftRightUI(value, Const(bit));
+ }
+
+ value = context.BitwiseAnd(value, Const(1));
+
+ return value;
+ }
+
+ SetFpFlag(context, FPState.VFlag, Extract(nzcv, 0));
+ SetFpFlag(context, FPState.CFlag, Extract(nzcv, 1));
+ SetFpFlag(context, FPState.ZFlag, Extract(nzcv, 2));
+ SetFpFlag(context, FPState.NFlag, Extract(nzcv, 3));
+ }
+
+ private static void EmitSse2OrAvxCmpOpF32(ArmEmitterContext context, CmpCondition cond, bool zero)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+ Intrinsic inst = (sizeF == 0) ? Intrinsic.X86Cmpps : Intrinsic.X86Cmppd;
+
+ if (zero)
+ {
+ EmitVectorUnaryOpSimd32(context, (m) =>
+ {
+ return context.AddIntrinsic(inst, m, context.VectorZero(), Const((int)cond));
+ });
+ }
+ else
+ {
+ EmitVectorBinaryOpSimd32(context, (n, m) =>
+ {
+ return context.AddIntrinsic(inst, n, m, Const((int)cond));
+ });
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdCrypto.cs b/src/ARMeilleure/Instructions/InstEmitSimdCrypto.cs
new file mode 100644
index 00000000..db24e029
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdCrypto.cs
@@ -0,0 +1,99 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Aesd_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Operand res;
+
+ if (Optimizations.UseAesni)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero());
+ }
+ else
+ {
+ res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Decrypt)), d, n);
+ }
+
+ context.Copy(d, res);
+ }
+
+ public static void Aese_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Operand res;
+
+ if (Optimizations.UseAesni)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Aesenclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero());
+ }
+ else
+ {
+ res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Encrypt)), d, n);
+ }
+
+ context.Copy(d, res);
+ }
+
+ public static void Aesimc_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Operand res;
+
+ if (Optimizations.UseAesni)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Aesimc, n);
+ }
+ else
+ {
+ res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.InverseMixColumns)), n);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Aesmc_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Operand res;
+
+ if (Optimizations.UseAesni)
+ {
+ Operand roundKey = context.VectorZero();
+
+ // Inverse Shift Rows, Inverse Sub Bytes, xor 0 so nothing happens
+ res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, n, roundKey);
+
+ // Shift Rows, Sub Bytes, Mix Columns (!), xor 0 so nothing happens
+ res = context.AddIntrinsic(Intrinsic.X86Aesenc, res, roundKey);
+ }
+ else
+ {
+ res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.MixColumns)), n);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdCrypto32.cs b/src/ARMeilleure/Instructions/InstEmitSimdCrypto32.cs
new file mode 100644
index 00000000..f713a388
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdCrypto32.cs
@@ -0,0 +1,99 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+
+namespace ARMeilleure.Instructions
+{
+ partial class InstEmit32
+ {
+ public static void Aesd_V(ArmEmitterContext context)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ Operand d = GetVecA32(op.Qd);
+ Operand n = GetVecA32(op.Qm);
+
+ Operand res;
+
+ if (Optimizations.UseAesni)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero());
+ }
+ else
+ {
+ res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Decrypt)), d, n);
+ }
+
+ context.Copy(d, res);
+ }
+
+ public static void Aese_V(ArmEmitterContext context)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ Operand d = GetVecA32(op.Qd);
+ Operand n = GetVecA32(op.Qm);
+
+ Operand res;
+
+ if (Optimizations.UseAesni)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Aesenclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero());
+ }
+ else
+ {
+ res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Encrypt)), d, n);
+ }
+
+ context.Copy(d, res);
+ }
+
+ public static void Aesimc_V(ArmEmitterContext context)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ Operand n = GetVecA32(op.Qm);
+
+ Operand res;
+
+ if (Optimizations.UseAesni)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Aesimc, n);
+ }
+ else
+ {
+ res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.InverseMixColumns)), n);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void Aesmc_V(ArmEmitterContext context)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ Operand n = GetVecA32(op.Qm);
+
+ Operand res;
+
+ if (Optimizations.UseAesni)
+ {
+ Operand roundKey = context.VectorZero();
+
+ // Inverse Shift Rows, Inverse Sub Bytes, xor 0 so nothing happens.
+ res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, n, roundKey);
+
+ // Shift Rows, Sub Bytes, Mix Columns (!), xor 0 so nothing happens.
+ res = context.AddIntrinsic(Intrinsic.X86Aesenc, res, roundKey);
+ }
+ else
+ {
+ res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.MixColumns)), n);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdCvt.cs b/src/ARMeilleure/Instructions/InstEmitSimdCvt.cs
new file mode 100644
index 00000000..652ad397
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdCvt.cs
@@ -0,0 +1,1891 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+using System.Reflection;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ using Func1I = Func<Operand, Operand>;
+
+ static partial class InstEmit
+ {
+ public static void Fcvt_S(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ if (op.Size == 0 && op.Opc == 1) // Single -> Double.
+ {
+ if (Optimizations.UseSse2)
+ {
+ Operand n = GetVec(op.Rn);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Cvtss2sd, context.VectorZero(), n);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand ne = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), 0);
+
+ Operand res = context.ConvertToFP(OperandType.FP64, ne);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+ }
+ else if (op.Size == 1 && op.Opc == 0) // Double -> Single.
+ {
+ if (Optimizations.UseSse2)
+ {
+ Operand n = GetVec(op.Rn);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Cvtsd2ss, context.VectorZero(), n);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand ne = context.VectorExtract(OperandType.FP64, GetVec(op.Rn), 0);
+
+ Operand res = context.ConvertToFP(OperandType.FP32, ne);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+ }
+ else if (op.Size == 0 && op.Opc == 3) // Single -> Half.
+ {
+ if (Optimizations.UseF16c)
+ {
+ Debug.Assert(!Optimizations.ForceLegacySse);
+
+ Operand n = GetVec(op.Rn);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Vcvtps2ph, n, Const(X86GetRoundControl(FPRoundingMode.ToNearest)));
+ res = context.AddIntrinsic(Intrinsic.X86Pslldq, res, Const(14)); // VectorZeroUpper112()
+ res = context.AddIntrinsic(Intrinsic.X86Psrldq, res, Const(14));
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand ne = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), 0);
+
+ context.StoreToContext();
+ Operand res = context.Call(typeof(SoftFloat32_16).GetMethod(nameof(SoftFloat32_16.FPConvert)), ne);
+ context.LoadFromContext();
+
+ res = context.ZeroExtend16(OperandType.I64, res);
+
+ context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), res, 0, 1));
+ }
+ }
+ else if (op.Size == 3 && op.Opc == 0) // Half -> Single.
+ {
+ if (Optimizations.UseF16c)
+ {
+ Debug.Assert(!Optimizations.ForceLegacySse);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Vcvtph2ps, GetVec(op.Rn));
+ res = context.VectorZeroUpper96(res);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, 0, 1);
+
+ context.StoreToContext();
+ Operand res = context.Call(typeof(SoftFloat16_32).GetMethod(nameof(SoftFloat16_32.FPConvert)), ne);
+ context.LoadFromContext();
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+ }
+ else if (op.Size == 1 && op.Opc == 3) // Double -> Half.
+ {
+ if (Optimizations.UseF16c)
+ {
+ Debug.Assert(!Optimizations.ForceLegacySse);
+
+ Operand n = GetVec(op.Rn);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Cvtsd2ss, context.VectorZero(), n);
+ res = context.AddIntrinsic(Intrinsic.X86Vcvtps2ph, res, Const(X86GetRoundControl(FPRoundingMode.ToNearest)));
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand ne = context.VectorExtract(OperandType.FP64, GetVec(op.Rn), 0);
+
+ context.StoreToContext();
+ Operand res = context.Call(typeof(SoftFloat64_16).GetMethod(nameof(SoftFloat64_16.FPConvert)), ne);
+ context.LoadFromContext();
+
+ res = context.ZeroExtend16(OperandType.I64, res);
+
+ context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), res, 0, 1));
+ }
+ }
+ else if (op.Size == 3 && op.Opc == 1) // Half -> Double.
+ {
+ if (Optimizations.UseF16c)
+ {
+ Operand n = GetVec(op.Rn);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Vcvtph2ps, GetVec(op.Rn));
+ res = context.AddIntrinsic(Intrinsic.X86Cvtss2sd, context.VectorZero(), res);
+ res = context.VectorZeroUpper64(res);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, 0, 1);
+
+ context.StoreToContext();
+ Operand res = context.Call(typeof(SoftFloat16_64).GetMethod(nameof(SoftFloat16_64.FPConvert)), ne);
+ context.LoadFromContext();
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+ }
+ else // Invalid encoding.
+ {
+ Debug.Assert(false, $"type == {op.Size} && opc == {op.Opc}");
+ }
+ }
+
+ public static void Fcvtas_Gp(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtasGp);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvts_Gp(context, FPRoundingMode.ToNearestAway, isFixed: false);
+ }
+ else
+ {
+ EmitFcvt_s_Gp(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1));
+ }
+ }
+
+ public static void Fcvtas_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtasS);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearestAway, scalar: true);
+ }
+ else
+ {
+ EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: true, scalar: true);
+ }
+ }
+
+ public static void Fcvtas_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtasS);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearestAway, scalar: false);
+ }
+ else
+ {
+ EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: true, scalar: false);
+ }
+ }
+
+ public static void Fcvtau_Gp(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtauGp);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvtu_Gp(context, FPRoundingMode.ToNearestAway, isFixed: false);
+ }
+ else
+ {
+ EmitFcvt_u_Gp(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1));
+ }
+ }
+
+ public static void Fcvtau_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtauS);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearestAway, scalar: true);
+ }
+ else
+ {
+ EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: false, scalar: true);
+ }
+ }
+
+ public static void Fcvtau_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtauV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearestAway, scalar: false);
+ }
+ else
+ {
+ EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: false, scalar: false);
+ }
+ }
+
+ public static void Fcvtl_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtlV);
+ }
+ else if (Optimizations.UseSse2 && sizeF == 1)
+ {
+ Operand n = GetVec(op.Rn);
+
+ Operand res = op.RegisterSize == RegisterSize.Simd128 ? context.AddIntrinsic(Intrinsic.X86Movhlps, n, n) : n;
+ res = context.AddIntrinsic(Intrinsic.X86Cvtps2pd, res);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else if (Optimizations.UseF16c && sizeF == 0)
+ {
+ Debug.Assert(!Optimizations.ForceLegacySse);
+
+ Operand n = GetVec(op.Rn);
+
+ Operand res = op.RegisterSize == RegisterSize.Simd128 ? context.AddIntrinsic(Intrinsic.X86Movhlps, n, n) : n;
+ res = context.AddIntrinsic(Intrinsic.X86Vcvtph2ps, res);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand res = context.VectorZero();
+
+ int elems = 4 >> sizeF;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ for (int index = 0; index < elems; index++)
+ {
+ if (sizeF == 0)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, part + index, 1);
+
+ context.StoreToContext();
+ Operand e = context.Call(typeof(SoftFloat16_32).GetMethod(nameof(SoftFloat16_32.FPConvert)), ne);
+ context.LoadFromContext();
+
+ res = context.VectorInsert(res, e, index);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand ne = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), part + index);
+
+ Operand e = context.ConvertToFP(OperandType.FP64, ne);
+
+ res = context.VectorInsert(res, e, index);
+ }
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ public static void Fcvtms_Gp(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtmsGp);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsMinusInfinity, isFixed: false);
+ }
+ else
+ {
+ EmitFcvt_s_Gp(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Floor), op1));
+ }
+ }
+
+ public static void Fcvtms_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtmsV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsMinusInfinity, scalar: false);
+ }
+ else
+ {
+ EmitFcvt(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Floor), op1), signed: true, scalar: false);
+ }
+ }
+
+ public static void Fcvtmu_Gp(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtmuGp);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsMinusInfinity, isFixed: false);
+ }
+ else
+ {
+ EmitFcvt_u_Gp(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Floor), op1));
+ }
+ }
+
+ public static void Fcvtn_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpFRd(context, Intrinsic.Arm64FcvtnV);
+ }
+ else if (Optimizations.UseSse2 && sizeF == 1)
+ {
+ Operand d = GetVec(op.Rd);
+
+ Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128 ? Intrinsic.X86Movlhps : Intrinsic.X86Movhlps;
+
+ Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtpd2ps, GetVec(op.Rn));
+ nInt = context.AddIntrinsic(Intrinsic.X86Movlhps, nInt, nInt);
+
+ Operand res = context.VectorZeroUpper64(d);
+ res = context.AddIntrinsic(movInst, res, nInt);
+
+ context.Copy(d, res);
+ }
+ else if (Optimizations.UseF16c && sizeF == 0)
+ {
+ Debug.Assert(!Optimizations.ForceLegacySse);
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128 ? Intrinsic.X86Movlhps : Intrinsic.X86Movhlps;
+
+ Operand nInt = context.AddIntrinsic(Intrinsic.X86Vcvtps2ph, n, Const(X86GetRoundControl(FPRoundingMode.ToNearest)));
+ nInt = context.AddIntrinsic(Intrinsic.X86Movlhps, nInt, nInt);
+
+ Operand res = context.VectorZeroUpper64(d);
+ res = context.AddIntrinsic(movInst, res, nInt);
+
+ context.Copy(d, res);
+ }
+ else
+ {
+ OperandType type = sizeF == 0 ? OperandType.FP32 : OperandType.FP64;
+
+ int elems = 4 >> sizeF;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ Operand d = GetVec(op.Rd);
+
+ Operand res = part == 0 ? context.VectorZero() : context.Copy(d);
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
+
+ if (sizeF == 0)
+ {
+ context.StoreToContext();
+ Operand e = context.Call(typeof(SoftFloat32_16).GetMethod(nameof(SoftFloat32_16.FPConvert)), ne);
+ context.LoadFromContext();
+
+ res = EmitVectorInsert(context, res, e, part + index, 1);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand e = context.ConvertToFP(OperandType.FP32, ne);
+
+ res = context.VectorInsert(res, e, part + index);
+ }
+ }
+
+ context.Copy(d, res);
+ }
+ }
+
+ public static void Fcvtns_Gp(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtnsGp);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvts_Gp(context, FPRoundingMode.ToNearest, isFixed: false);
+ }
+ else
+ {
+ EmitFcvt_s_Gp(context, (op1) => EmitRoundMathCall(context, MidpointRounding.ToEven, op1));
+ }
+ }
+
+ public static void Fcvtns_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtnsS);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearest, scalar: true);
+ }
+ else
+ {
+ EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.ToEven, op1), signed: true, scalar: true);
+ }
+ }
+
+ public static void Fcvtns_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtnsV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearest, scalar: false);
+ }
+ else
+ {
+ EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.ToEven, op1), signed: true, scalar: false);
+ }
+ }
+
+ public static void Fcvtnu_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtnuS);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearest, scalar: true);
+ }
+ else
+ {
+ EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.ToEven, op1), signed: false, scalar: true);
+ }
+ }
+
+ public static void Fcvtnu_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtnuV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearest, scalar: false);
+ }
+ else
+ {
+ EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.ToEven, op1), signed: false, scalar: false);
+ }
+ }
+
+ public static void Fcvtps_Gp(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtpsGp);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsPlusInfinity, isFixed: false);
+ }
+ else
+ {
+ EmitFcvt_s_Gp(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Ceiling), op1));
+ }
+ }
+
+ public static void Fcvtpu_Gp(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtpuGp);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsPlusInfinity, isFixed: false);
+ }
+ else
+ {
+ EmitFcvt_u_Gp(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Ceiling), op1));
+ }
+ }
+
+ public static void Fcvtzs_Gp(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtzsGp);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsZero, isFixed: false);
+ }
+ else
+ {
+ EmitFcvt_s_Gp(context, (op1) => op1);
+ }
+ }
+
+ public static void Fcvtzs_Gp_Fixed(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpFToGp(context, Intrinsic.Arm64FcvtzsGpFixed, op.FBits);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsZero, isFixed: true);
+ }
+ else
+ {
+ EmitFcvtzs_Gp_Fixed(context);
+ }
+ }
+
+ public static void Fcvtzs_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtzsS);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsZero, scalar: true);
+ }
+ else
+ {
+ EmitFcvtz(context, signed: true, scalar: true);
+ }
+ }
+
+ public static void Fcvtzs_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtzsV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsZero, scalar: false);
+ }
+ else
+ {
+ EmitFcvtz(context, signed: true, scalar: false);
+ }
+ }
+
+ public static void Fcvtzs_V_Fixed(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorConvertBinaryOpF(context, Intrinsic.Arm64FcvtzsVFixed, GetFBits(context));
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsZero, scalar: false);
+ }
+ else
+ {
+ EmitFcvtz(context, signed: true, scalar: false);
+ }
+ }
+
+ public static void Fcvtzu_Gp(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtzuGp);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsZero, isFixed: false);
+ }
+ else
+ {
+ EmitFcvt_u_Gp(context, (op1) => op1);
+ }
+ }
+
+ public static void Fcvtzu_Gp_Fixed(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpFToGp(context, Intrinsic.Arm64FcvtzuGpFixed, op.FBits);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsZero, isFixed: true);
+ }
+ else
+ {
+ EmitFcvtzu_Gp_Fixed(context);
+ }
+ }
+
+ public static void Fcvtzu_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtzuS);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtuOpF(context, FPRoundingMode.TowardsZero, scalar: true);
+ }
+ else
+ {
+ EmitFcvtz(context, signed: false, scalar: true);
+ }
+ }
+
+ public static void Fcvtzu_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtzuV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtuOpF(context, FPRoundingMode.TowardsZero, scalar: false);
+ }
+ else
+ {
+ EmitFcvtz(context, signed: false, scalar: false);
+ }
+ }
+
+ public static void Fcvtzu_V_Fixed(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorConvertBinaryOpF(context, Intrinsic.Arm64FcvtzuVFixed, GetFBits(context));
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtuOpF(context, FPRoundingMode.TowardsZero, scalar: false);
+ }
+ else
+ {
+ EmitFcvtz(context, signed: false, scalar: false);
+ }
+ }
+
+ public static void Scvtf_Gp(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpFFromGp(context, Intrinsic.Arm64ScvtfGp);
+ }
+ else
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ Operand res = GetIntOrZR(context, op.Rn);
+
+ if (op.RegisterSize == RegisterSize.Int32)
+ {
+ res = context.SignExtend32(OperandType.I64, res);
+ }
+
+ res = EmitFPConvert(context, res, op.Size, signed: true);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+ }
+
+ public static void Scvtf_Gp_Fixed(ArmEmitterContext context)
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpFFromGp(context, Intrinsic.Arm64ScvtfGpFixed, op.FBits);
+ }
+ else
+ {
+ Operand res = GetIntOrZR(context, op.Rn);
+
+ if (op.RegisterSize == RegisterSize.Int32)
+ {
+ res = context.SignExtend32(OperandType.I64, res);
+ }
+
+ res = EmitFPConvert(context, res, op.Size, signed: true);
+
+ res = EmitI2fFBitsMul(context, res, op.FBits);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+ }
+
+ public static void Scvtf_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64ScvtfS);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitSse2ScvtfOp(context, scalar: true);
+ }
+ else
+ {
+ EmitCvtf(context, signed: true, scalar: true);
+ }
+ }
+
+ public static void Scvtf_S_Fixed(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpF(context, Intrinsic.Arm64ScvtfSFixed, GetFBits(context));
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitSse2ScvtfOp(context, scalar: true);
+ }
+ else
+ {
+ EmitCvtf(context, signed: true, scalar: true);
+ }
+ }
+
+ public static void Scvtf_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64ScvtfV);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitSse2ScvtfOp(context, scalar: false);
+ }
+ else
+ {
+ EmitCvtf(context, signed: true, scalar: false);
+ }
+ }
+
+ public static void Scvtf_V_Fixed(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorConvertBinaryOpF(context, Intrinsic.Arm64ScvtfVFixed, GetFBits(context));
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitSse2ScvtfOp(context, scalar: false);
+ }
+ else
+ {
+ EmitCvtf(context, signed: true, scalar: false);
+ }
+ }
+
+ public static void Ucvtf_Gp(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpFFromGp(context, Intrinsic.Arm64UcvtfGp);
+ }
+ else
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ Operand res = GetIntOrZR(context, op.Rn);
+
+ res = EmitFPConvert(context, res, op.Size, signed: false);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+ }
+
+ public static void Ucvtf_Gp_Fixed(ArmEmitterContext context)
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpFFromGp(context, Intrinsic.Arm64UcvtfGpFixed, op.FBits);
+ }
+ else
+ {
+ Operand res = GetIntOrZR(context, op.Rn);
+
+ res = EmitFPConvert(context, res, op.Size, signed: false);
+
+ res = EmitI2fFBitsMul(context, res, op.FBits);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+ }
+
+ public static void Ucvtf_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64UcvtfS);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitSse2UcvtfOp(context, scalar: true);
+ }
+ else
+ {
+ EmitCvtf(context, signed: false, scalar: true);
+ }
+ }
+
+ public static void Ucvtf_S_Fixed(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpF(context, Intrinsic.Arm64UcvtfSFixed, GetFBits(context));
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitSse2UcvtfOp(context, scalar: true);
+ }
+ else
+ {
+ EmitCvtf(context, signed: false, scalar: true);
+ }
+ }
+
+ public static void Ucvtf_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64UcvtfV);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitSse2UcvtfOp(context, scalar: false);
+ }
+ else
+ {
+ EmitCvtf(context, signed: false, scalar: false);
+ }
+ }
+
+ public static void Ucvtf_V_Fixed(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorConvertBinaryOpF(context, Intrinsic.Arm64UcvtfVFixed, GetFBits(context));
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitSse2UcvtfOp(context, scalar: false);
+ }
+ else
+ {
+ EmitCvtf(context, signed: false, scalar: false);
+ }
+ }
+
+ private static void EmitFcvt(ArmEmitterContext context, Func1I emit, bool signed, bool scalar)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ Operand n = GetVec(op.Rn);
+
+ int sizeF = op.Size & 1;
+ int sizeI = sizeF + 2;
+
+ OperandType type = sizeF == 0 ? OperandType.FP32 : OperandType.FP64;
+
+ int elems = !scalar ? op.GetBytesCount() >> sizeI : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = context.VectorExtract(type, n, index);
+
+ Operand e = emit(ne);
+
+ if (sizeF == 0)
+ {
+ MethodInfo info = signed
+ ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS32))
+ : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU32));
+
+ e = context.Call(info, e);
+
+ e = context.ZeroExtend32(OperandType.I64, e);
+ }
+ else /* if (sizeF == 1) */
+ {
+ MethodInfo info = signed
+ ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToS64))
+ : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToU64));
+
+ e = context.Call(info, e);
+ }
+
+ res = EmitVectorInsert(context, res, e, index, sizeI);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static void EmitFcvtz(ArmEmitterContext context, bool signed, bool scalar)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ Operand n = GetVec(op.Rn);
+
+ int sizeF = op.Size & 1;
+ int sizeI = sizeF + 2;
+
+ OperandType type = sizeF == 0 ? OperandType.FP32 : OperandType.FP64;
+
+ int fBits = GetFBits(context);
+
+ int elems = !scalar ? op.GetBytesCount() >> sizeI : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = context.VectorExtract(type, n, index);
+
+ Operand e = EmitF2iFBitsMul(context, ne, fBits);
+
+ if (sizeF == 0)
+ {
+ MethodInfo info = signed
+ ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS32))
+ : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU32));
+
+ e = context.Call(info, e);
+
+ e = context.ZeroExtend32(OperandType.I64, e);
+ }
+ else /* if (sizeF == 1) */
+ {
+ MethodInfo info = signed
+ ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToS64))
+ : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToU64));
+
+ e = context.Call(info, e);
+ }
+
+ res = EmitVectorInsert(context, res, e, index, sizeI);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static void EmitFcvt_s_Gp(ArmEmitterContext context, Func1I emit)
+ {
+ EmitFcvt___Gp(context, emit, signed: true);
+ }
+
+ private static void EmitFcvt_u_Gp(ArmEmitterContext context, Func1I emit)
+ {
+ EmitFcvt___Gp(context, emit, signed: false);
+ }
+
+ private static void EmitFcvt___Gp(ArmEmitterContext context, Func1I emit, bool signed)
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64;
+
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
+
+ Operand res = signed
+ ? EmitScalarFcvts(context, emit(ne), 0)
+ : EmitScalarFcvtu(context, emit(ne), 0);
+
+ SetIntOrZR(context, op.Rd, res);
+ }
+
+ private static void EmitFcvtzs_Gp_Fixed(ArmEmitterContext context)
+ {
+ EmitFcvtz__Gp_Fixed(context, signed: true);
+ }
+
+ private static void EmitFcvtzu_Gp_Fixed(ArmEmitterContext context)
+ {
+ EmitFcvtz__Gp_Fixed(context, signed: false);
+ }
+
+ private static void EmitFcvtz__Gp_Fixed(ArmEmitterContext context, bool signed)
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64;
+
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
+
+ Operand res = signed
+ ? EmitScalarFcvts(context, ne, op.FBits)
+ : EmitScalarFcvtu(context, ne, op.FBits);
+
+ SetIntOrZR(context, op.Rd, res);
+ }
+
+ private static void EmitCvtf(ArmEmitterContext context, bool signed, bool scalar)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int sizeF = op.Size & 1;
+ int sizeI = sizeF + 2;
+
+ int fBits = GetFBits(context);
+
+ int elems = !scalar ? op.GetBytesCount() >> sizeI : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorLongExtract(context, op.Rn, index, sizeI);
+
+ Operand e = EmitFPConvert(context, ne, sizeF, signed);
+
+ e = EmitI2fFBitsMul(context, e, fBits);
+
+ res = context.VectorInsert(res, e, index);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static int GetFBits(ArmEmitterContext context)
+ {
+ if (context.CurrOp is OpCodeSimdShImm op)
+ {
+ return GetImmShr(op);
+ }
+
+ return 0;
+ }
+
+ private static Operand EmitFPConvert(ArmEmitterContext context, Operand value, int size, bool signed)
+ {
+ Debug.Assert(value.Type == OperandType.I32 || value.Type == OperandType.I64);
+ Debug.Assert((uint)size < 2);
+
+ OperandType type = size == 0 ? OperandType.FP32 : OperandType.FP64;
+
+ if (signed)
+ {
+ return context.ConvertToFP(type, value);
+ }
+ else
+ {
+ return context.ConvertToFPUI(type, value);
+ }
+ }
+
+ private static Operand EmitScalarFcvts(ArmEmitterContext context, Operand value, int fBits)
+ {
+ Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64);
+
+ value = EmitF2iFBitsMul(context, value, fBits);
+
+ MethodInfo info;
+
+ if (context.CurrOp.RegisterSize == RegisterSize.Int32)
+ {
+ info = value.Type == OperandType.FP32
+ ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS32))
+ : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToS32));
+ }
+ else
+ {
+ info = value.Type == OperandType.FP32
+ ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS64))
+ : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToS64));
+ }
+
+ return context.Call(info, value);
+ }
+
+ private static Operand EmitScalarFcvtu(ArmEmitterContext context, Operand value, int fBits)
+ {
+ Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64);
+
+ value = EmitF2iFBitsMul(context, value, fBits);
+
+ MethodInfo info;
+
+ if (context.CurrOp.RegisterSize == RegisterSize.Int32)
+ {
+ info = value.Type == OperandType.FP32
+ ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU32))
+ : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToU32));
+ }
+ else
+ {
+ info = value.Type == OperandType.FP32
+ ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU64))
+ : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToU64));
+ }
+
+ return context.Call(info, value);
+ }
+
+ private static Operand EmitF2iFBitsMul(ArmEmitterContext context, Operand value, int fBits)
+ {
+ Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64);
+
+ if (fBits == 0)
+ {
+ return value;
+ }
+
+ if (value.Type == OperandType.FP32)
+ {
+ return context.Multiply(value, ConstF(MathF.Pow(2f, fBits)));
+ }
+ else /* if (value.Type == OperandType.FP64) */
+ {
+ return context.Multiply(value, ConstF(Math.Pow(2d, fBits)));
+ }
+ }
+
+ private static Operand EmitI2fFBitsMul(ArmEmitterContext context, Operand value, int fBits)
+ {
+ Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64);
+
+ if (fBits == 0)
+ {
+ return value;
+ }
+
+ if (value.Type == OperandType.FP32)
+ {
+ return context.Multiply(value, ConstF(1f / MathF.Pow(2f, fBits)));
+ }
+ else /* if (value.Type == OperandType.FP64) */
+ {
+ return context.Multiply(value, ConstF(1d / Math.Pow(2d, fBits)));
+ }
+ }
+
+ public static Operand EmitSse2CvtDoubleToInt64OpF(ArmEmitterContext context, Operand opF, bool scalar)
+ {
+ Debug.Assert(opF.Type == OperandType.V128);
+
+ Operand longL = context.AddIntrinsicLong (Intrinsic.X86Cvtsd2si, opF); // opFL
+ Operand res = context.VectorCreateScalar(longL);
+
+ if (!scalar)
+ {
+ Operand opFH = context.AddIntrinsic (Intrinsic.X86Movhlps, res, opF); // res doesn't matter.
+ Operand longH = context.AddIntrinsicLong (Intrinsic.X86Cvtsd2si, opFH);
+ Operand resH = context.VectorCreateScalar(longH);
+ res = context.AddIntrinsic (Intrinsic.X86Movlhps, res, resH);
+ }
+
+ return res;
+ }
+
+ private static Operand EmitSse2CvtInt64ToDoubleOp(ArmEmitterContext context, Operand op, bool scalar)
+ {
+ Debug.Assert(op.Type == OperandType.V128);
+
+ Operand longL = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, op); // opL
+ Operand res = context.AddIntrinsic (Intrinsic.X86Cvtsi2sd, context.VectorZero(), longL);
+
+ if (!scalar)
+ {
+ Operand opH = context.AddIntrinsic (Intrinsic.X86Movhlps, res, op); // res doesn't matter.
+ Operand longH = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, opH);
+ Operand resH = context.AddIntrinsic (Intrinsic.X86Cvtsi2sd, res, longH); // res doesn't matter.
+ res = context.AddIntrinsic (Intrinsic.X86Movlhps, res, resH);
+ }
+
+ return res;
+ }
+
+ private static void EmitSse2ScvtfOp(ArmEmitterContext context, bool scalar)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ // sizeF == ((OpCodeSimdShImm)op).Size - 2
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, n);
+
+ if (op is OpCodeSimdShImm fixedOp)
+ {
+ int fBits = GetImmShr(fixedOp);
+
+ // BitConverter.Int32BitsToSingle(fpScaled) == 1f / MathF.Pow(2f, fBits)
+ int fpScaled = 0x3F800000 - fBits * 0x800000;
+
+ Operand fpScaledMask = scalar
+ ? X86GetScalar (context, fpScaled)
+ : X86GetAllElements(context, fpScaled);
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, res, fpScaledMask);
+ }
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper96(res);
+ }
+ else if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand res = EmitSse2CvtInt64ToDoubleOp(context, n, scalar);
+
+ if (op is OpCodeSimdShImm fixedOp)
+ {
+ int fBits = GetImmShr(fixedOp);
+
+ // BitConverter.Int64BitsToDouble(fpScaled) == 1d / Math.Pow(2d, fBits)
+ long fpScaled = 0x3FF0000000000000L - fBits * 0x10000000000000L;
+
+ Operand fpScaledMask = scalar
+ ? X86GetScalar (context, fpScaled)
+ : X86GetAllElements(context, fpScaled);
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulpd, res, fpScaledMask);
+ }
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ private static void EmitSse2UcvtfOp(ArmEmitterContext context, bool scalar)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ // sizeF == ((OpCodeSimdShImm)op).Size - 2
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand mask = scalar // 65536.000f (1 << 16)
+ ? X86GetScalar (context, 0x47800000)
+ : X86GetAllElements(context, 0x47800000);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Psrld, n, Const(16));
+ res = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res);
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, res, mask);
+
+ Operand res2 = context.AddIntrinsic(Intrinsic.X86Pslld, n, Const(16));
+ res2 = context.AddIntrinsic(Intrinsic.X86Psrld, res2, Const(16));
+ res2 = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res2);
+
+ res = context.AddIntrinsic(Intrinsic.X86Addps, res, res2);
+
+ if (op is OpCodeSimdShImm fixedOp)
+ {
+ int fBits = GetImmShr(fixedOp);
+
+ // BitConverter.Int32BitsToSingle(fpScaled) == 1f / MathF.Pow(2f, fBits)
+ int fpScaled = 0x3F800000 - fBits * 0x800000;
+
+ Operand fpScaledMask = scalar
+ ? X86GetScalar (context, fpScaled)
+ : X86GetAllElements(context, fpScaled);
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, res, fpScaledMask);
+ }
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper96(res);
+ }
+ else if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand mask = scalar // 4294967296.0000000d (1L << 32)
+ ? X86GetScalar (context, 0x41F0000000000000L)
+ : X86GetAllElements(context, 0x41F0000000000000L);
+
+ Operand res = context.AddIntrinsic (Intrinsic.X86Psrlq, n, Const(32));
+ res = EmitSse2CvtInt64ToDoubleOp(context, res, scalar);
+ res = context.AddIntrinsic (Intrinsic.X86Mulpd, res, mask);
+
+ Operand res2 = context.AddIntrinsic (Intrinsic.X86Psllq, n, Const(32));
+ res2 = context.AddIntrinsic (Intrinsic.X86Psrlq, res2, Const(32));
+ res2 = EmitSse2CvtInt64ToDoubleOp(context, res2, scalar);
+
+ res = context.AddIntrinsic(Intrinsic.X86Addpd, res, res2);
+
+ if (op is OpCodeSimdShImm fixedOp)
+ {
+ int fBits = GetImmShr(fixedOp);
+
+ // BitConverter.Int64BitsToDouble(fpScaled) == 1d / Math.Pow(2d, fBits)
+ long fpScaled = 0x3FF0000000000000L - fBits * 0x10000000000000L;
+
+ Operand fpScaledMask = scalar
+ ? X86GetScalar (context, fpScaled)
+ : X86GetAllElements(context, fpScaled);
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulpd, res, fpScaledMask);
+ }
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ private static void EmitSse41FcvtsOpF(ArmEmitterContext context, FPRoundingMode roundMode, bool scalar)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ // sizeF == ((OpCodeSimdShImm)op).Size - 2
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const((int)CmpCondition.OrderedQ));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
+
+ if (op is OpCodeSimdShImm fixedOp)
+ {
+ int fBits = GetImmShr(fixedOp);
+
+ // BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, fBits)
+ int fpScaled = 0x3F800000 + fBits * 0x800000;
+
+ Operand fpScaledMask = scalar
+ ? X86GetScalar (context, fpScaled)
+ : X86GetAllElements(context, fpScaled);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Mulps, nRes, fpScaledMask);
+ }
+
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundps, nRes, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar);
+ }
+
+ Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes);
+
+ Operand fpMaxValMask = scalar // 2.14748365E9f (2147483648)
+ ? X86GetScalar (context, 0x4F000000)
+ : X86GetAllElements(context, 0x4F000000);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
+
+ Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nInt, nRes);
+
+ if (scalar)
+ {
+ dRes = context.VectorZeroUpper96(dRes);
+ }
+ else if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ dRes = context.VectorZeroUpper64(dRes);
+ }
+
+ context.Copy(GetVec(op.Rd), dRes);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const((int)CmpCondition.OrderedQ));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
+
+ if (op is OpCodeSimdShImm fixedOp)
+ {
+ int fBits = GetImmShr(fixedOp);
+
+ // BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, fBits)
+ long fpScaled = 0x3FF0000000000000L + fBits * 0x10000000000000L;
+
+ Operand fpScaledMask = scalar
+ ? X86GetScalar (context, fpScaled)
+ : X86GetAllElements(context, fpScaled);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Mulpd, nRes, fpScaledMask);
+ }
+
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundpd, nRes, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar);
+ }
+
+ Operand nLong = EmitSse2CvtDoubleToInt64OpF(context, nRes, scalar);
+
+ Operand fpMaxValMask = scalar // 9.2233720368547760E18d (9223372036854775808)
+ ? X86GetScalar (context, 0x43E0000000000000L)
+ : X86GetAllElements(context, 0x43E0000000000000L);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
+
+ Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nLong, nRes);
+
+ if (scalar)
+ {
+ dRes = context.VectorZeroUpper64(dRes);
+ }
+
+ context.Copy(GetVec(op.Rd), dRes);
+ }
+ }
+
+ private static void EmitSse41FcvtuOpF(ArmEmitterContext context, FPRoundingMode roundMode, bool scalar)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ // sizeF == ((OpCodeSimdShImm)op).Size - 2
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const((int)CmpCondition.OrderedQ));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
+
+ if (op is OpCodeSimdShImm fixedOp)
+ {
+ int fBits = GetImmShr(fixedOp);
+
+ // BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, fBits)
+ int fpScaled = 0x3F800000 + fBits * 0x800000;
+
+ Operand fpScaledMask = scalar
+ ? X86GetScalar (context, fpScaled)
+ : X86GetAllElements(context, fpScaled);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Mulps, nRes, fpScaledMask);
+ }
+
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundps, nRes, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar);
+ }
+
+ Operand zero = context.VectorZero();
+
+ Operand nCmp = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+
+ Operand fpMaxValMask = scalar // 2.14748365E9f (2147483648)
+ ? X86GetScalar (context, 0x4F000000)
+ : X86GetAllElements(context, 0x4F000000);
+
+ Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Subps, nRes, fpMaxValMask);
+
+ nCmp = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+
+ Operand nInt2 = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
+
+ Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nInt2, nRes);
+ dRes = context.AddIntrinsic(Intrinsic.X86Paddd, dRes, nInt);
+
+ if (scalar)
+ {
+ dRes = context.VectorZeroUpper96(dRes);
+ }
+ else if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ dRes = context.VectorZeroUpper64(dRes);
+ }
+
+ context.Copy(GetVec(op.Rd), dRes);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const((int)CmpCondition.OrderedQ));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
+
+ if (op is OpCodeSimdShImm fixedOp)
+ {
+ int fBits = GetImmShr(fixedOp);
+
+ // BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, fBits)
+ long fpScaled = 0x3FF0000000000000L + fBits * 0x10000000000000L;
+
+ Operand fpScaledMask = scalar
+ ? X86GetScalar (context, fpScaled)
+ : X86GetAllElements(context, fpScaled);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Mulpd, nRes, fpScaledMask);
+ }
+
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundpd, nRes, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar);
+ }
+
+ Operand zero = context.VectorZero();
+
+ Operand nCmp = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+
+ Operand fpMaxValMask = scalar // 9.2233720368547760E18d (9223372036854775808)
+ ? X86GetScalar (context, 0x43E0000000000000L)
+ : X86GetAllElements(context, 0x43E0000000000000L);
+
+ Operand nLong = EmitSse2CvtDoubleToInt64OpF(context, nRes, scalar);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Subpd, nRes, fpMaxValMask);
+
+ nCmp = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+
+ Operand nLong2 = EmitSse2CvtDoubleToInt64OpF(context, nRes, scalar);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
+
+ Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nLong2, nRes);
+ dRes = context.AddIntrinsic(Intrinsic.X86Paddq, dRes, nLong);
+
+ if (scalar)
+ {
+ dRes = context.VectorZeroUpper64(dRes);
+ }
+
+ context.Copy(GetVec(op.Rd), dRes);
+ }
+ }
+
+ private static void EmitSse41Fcvts_Gp(ArmEmitterContext context, FPRoundingMode roundMode, bool isFixed)
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ if (op.Size == 0)
+ {
+ Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, n, n, Const((int)CmpCondition.OrderedQ));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
+
+ if (isFixed)
+ {
+ // BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, op.FBits)
+ int fpScaled = 0x3F800000 + op.FBits * 0x800000;
+
+ Operand fpScaledMask = X86GetScalar(context, fpScaled);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Mulss, nRes, fpScaledMask);
+ }
+
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundss, nRes, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true);
+ }
+
+ Operand nIntOrLong = op.RegisterSize == RegisterSize.Int32
+ ? context.AddIntrinsicInt (Intrinsic.X86Cvtss2si, nRes)
+ : context.AddIntrinsicLong(Intrinsic.X86Cvtss2si, nRes);
+
+ int fpMaxVal = op.RegisterSize == RegisterSize.Int32
+ ? 0x4F000000 // 2.14748365E9f (2147483648)
+ : 0x5F000000; // 9.223372E18f (9223372036854775808)
+
+ Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
+
+ Operand nInt = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, nRes);
+
+ if (op.RegisterSize == RegisterSize.Int64)
+ {
+ nInt = context.SignExtend32(OperandType.I64, nInt);
+ }
+
+ Operand dRes = context.BitwiseExclusiveOr(nIntOrLong, nInt);
+
+ SetIntOrZR(context, op.Rd, dRes);
+ }
+ else /* if (op.Size == 1) */
+ {
+ Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, n, Const((int)CmpCondition.OrderedQ));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
+
+ if (isFixed)
+ {
+ // BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, op.FBits)
+ long fpScaled = 0x3FF0000000000000L + op.FBits * 0x10000000000000L;
+
+ Operand fpScaledMask = X86GetScalar(context, fpScaled);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Mulsd, nRes, fpScaledMask);
+ }
+
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundsd, nRes, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true);
+ }
+
+ Operand nIntOrLong = op.RegisterSize == RegisterSize.Int32
+ ? context.AddIntrinsicInt (Intrinsic.X86Cvtsd2si, nRes)
+ : context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, nRes);
+
+ long fpMaxVal = op.RegisterSize == RegisterSize.Int32
+ ? 0x41E0000000000000L // 2147483648.0000000d (2147483648)
+ : 0x43E0000000000000L; // 9.2233720368547760E18d (9223372036854775808)
+
+ Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
+
+ Operand nLong = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, nRes);
+
+ if (op.RegisterSize == RegisterSize.Int32)
+ {
+ nLong = context.ConvertI64ToI32(nLong);
+ }
+
+ Operand dRes = context.BitwiseExclusiveOr(nIntOrLong, nLong);
+
+ SetIntOrZR(context, op.Rd, dRes);
+ }
+ }
+
+ private static void EmitSse41Fcvtu_Gp(ArmEmitterContext context, FPRoundingMode roundMode, bool isFixed)
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ if (op.Size == 0)
+ {
+ Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, n, n, Const((int)CmpCondition.OrderedQ));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
+
+ if (isFixed)
+ {
+ // BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, op.FBits)
+ int fpScaled = 0x3F800000 + op.FBits * 0x800000;
+
+ Operand fpScaledMask = X86GetScalar(context, fpScaled);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Mulss, nRes, fpScaledMask);
+ }
+
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundss, nRes, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true);
+ }
+
+ Operand zero = context.VectorZero();
+
+ Operand nCmp = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+
+ int fpMaxVal = op.RegisterSize == RegisterSize.Int32
+ ? 0x4F000000 // 2.14748365E9f (2147483648)
+ : 0x5F000000; // 9.223372E18f (9223372036854775808)
+
+ Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
+
+ Operand nIntOrLong = op.RegisterSize == RegisterSize.Int32
+ ? context.AddIntrinsicInt (Intrinsic.X86Cvtss2si, nRes)
+ : context.AddIntrinsicLong(Intrinsic.X86Cvtss2si, nRes);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Subss, nRes, fpMaxValMask);
+
+ nCmp = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+
+ Operand nIntOrLong2 = op.RegisterSize == RegisterSize.Int32
+ ? context.AddIntrinsicInt (Intrinsic.X86Cvtss2si, nRes)
+ : context.AddIntrinsicLong(Intrinsic.X86Cvtss2si, nRes);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
+
+ Operand nInt = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, nRes);
+
+ if (op.RegisterSize == RegisterSize.Int64)
+ {
+ nInt = context.SignExtend32(OperandType.I64, nInt);
+ }
+
+ Operand dRes = context.BitwiseExclusiveOr(nIntOrLong2, nInt);
+ dRes = context.Add(dRes, nIntOrLong);
+
+ SetIntOrZR(context, op.Rd, dRes);
+ }
+ else /* if (op.Size == 1) */
+ {
+ Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, n, Const((int)CmpCondition.OrderedQ));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
+
+ if (isFixed)
+ {
+ // BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, op.FBits)
+ long fpScaled = 0x3FF0000000000000L + op.FBits * 0x10000000000000L;
+
+ Operand fpScaledMask = X86GetScalar(context, fpScaled);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Mulsd, nRes, fpScaledMask);
+ }
+
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundsd, nRes, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true);
+ }
+
+ Operand zero = context.VectorZero();
+
+ Operand nCmp = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+
+ long fpMaxVal = op.RegisterSize == RegisterSize.Int32
+ ? 0x41E0000000000000L // 2147483648.0000000d (2147483648)
+ : 0x43E0000000000000L; // 9.2233720368547760E18d (9223372036854775808)
+
+ Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
+
+ Operand nIntOrLong = op.RegisterSize == RegisterSize.Int32
+ ? context.AddIntrinsicInt (Intrinsic.X86Cvtsd2si, nRes)
+ : context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, nRes);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Subsd, nRes, fpMaxValMask);
+
+ nCmp = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+
+ Operand nIntOrLong2 = op.RegisterSize == RegisterSize.Int32
+ ? context.AddIntrinsicInt (Intrinsic.X86Cvtsd2si, nRes)
+ : context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, nRes);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
+
+ Operand nLong = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, nRes);
+
+ if (op.RegisterSize == RegisterSize.Int32)
+ {
+ nLong = context.ConvertI64ToI32(nLong);
+ }
+
+ Operand dRes = context.BitwiseExclusiveOr(nIntOrLong2, nLong);
+ dRes = context.Add(dRes, nIntOrLong);
+
+ SetIntOrZR(context, op.Rd, dRes);
+ }
+ }
+
+ private static Operand EmitVectorLongExtract(ArmEmitterContext context, int reg, int index, int size)
+ {
+ OperandType type = size == 3 ? OperandType.I64 : OperandType.I32;
+
+ return context.VectorExtract(type, GetVec(reg), index);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdCvt32.cs b/src/ARMeilleure/Instructions/InstEmitSimdCvt32.cs
new file mode 100644
index 00000000..33ae83df
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdCvt32.cs
@@ -0,0 +1,800 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+using System.Reflection;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper32;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ private static int FlipVdBits(int vd, bool lowBit)
+ {
+ if (lowBit)
+ {
+ // Move the low bit to the top.
+ return ((vd & 0x1) << 4) | (vd >> 1);
+ }
+ else
+ {
+ // Move the high bit to the bottom.
+ return ((vd & 0xf) << 1) | (vd >> 4);
+ }
+ }
+
+ private static Operand EmitSaturateFloatToInt(ArmEmitterContext context, Operand op1, bool unsigned)
+ {
+ MethodInfo info;
+
+ if (op1.Type == OperandType.FP64)
+ {
+ info = unsigned
+ ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToU32))
+ : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToS32));
+ }
+ else
+ {
+ info = unsigned
+ ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU32))
+ : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS32));
+ }
+
+ return context.Call(info, op1);
+ }
+
+ public static void Vcvt_V(ArmEmitterContext context)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ bool unsigned = (op.Opc & 1) != 0;
+ bool toInteger = (op.Opc & 2) != 0;
+ OperandType floatSize = (op.Size == 2) ? OperandType.FP32 : OperandType.FP64;
+
+ if (toInteger)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, unsigned ? Intrinsic.Arm64FcvtzuV : Intrinsic.Arm64FcvtzsV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41ConvertVector32(context, FPRoundingMode.TowardsZero, !unsigned);
+ }
+ else
+ {
+ EmitVectorUnaryOpF32(context, (op1) =>
+ {
+ return EmitSaturateFloatToInt(context, op1, unsigned);
+ });
+ }
+ }
+ else
+ {
+ if (Optimizations.UseSse2)
+ {
+ EmitVectorUnaryOpSimd32(context, (n) =>
+ {
+ if (unsigned)
+ {
+ Operand mask = X86GetAllElements(context, 0x47800000);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Psrld, n, Const(16));
+ res = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res);
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, res, mask);
+
+ Operand res2 = context.AddIntrinsic(Intrinsic.X86Pslld, n, Const(16));
+ res2 = context.AddIntrinsic(Intrinsic.X86Psrld, res2, Const(16));
+ res2 = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res2);
+
+ return context.AddIntrinsic(Intrinsic.X86Addps, res, res2);
+ }
+ else
+ {
+ return context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, n);
+ }
+ });
+ }
+ else
+ {
+ if (unsigned)
+ {
+ EmitVectorUnaryOpZx32(context, (op1) => EmitFPConvert(context, op1, floatSize, false));
+ }
+ else
+ {
+ EmitVectorUnaryOpSx32(context, (op1) => EmitFPConvert(context, op1, floatSize, true));
+ }
+ }
+ }
+ }
+
+ public static void Vcvt_FD(ArmEmitterContext context)
+ {
+ OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
+
+ int vm = op.Vm;
+ int vd;
+ if (op.Size == 3)
+ {
+ vd = FlipVdBits(op.Vd, false);
+ // Double to single.
+ Operand fp = ExtractScalar(context, OperandType.FP64, vm);
+
+ Operand res = context.ConvertToFP(OperandType.FP32, fp);
+
+ InsertScalar(context, vd, res);
+ }
+ else
+ {
+ vd = FlipVdBits(op.Vd, true);
+ // Single to double.
+ Operand fp = ExtractScalar(context, OperandType.FP32, vm);
+
+ Operand res = context.ConvertToFP(OperandType.FP64, fp);
+
+ InsertScalar(context, vd, res);
+ }
+ }
+
+ // VCVT (floating-point to integer, floating-point) | VCVT (integer to floating-point, floating-point).
+ public static void Vcvt_FI(ArmEmitterContext context)
+ {
+ OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp;
+
+ bool toInteger = (op.Opc2 & 0b100) != 0;
+
+ OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32;
+
+ if (toInteger)
+ {
+ bool unsigned = (op.Opc2 & 1) == 0;
+ bool roundWithFpscr = op.Opc != 1;
+
+ if (!roundWithFpscr && Optimizations.UseAdvSimd)
+ {
+ bool doubleSize = floatSize == OperandType.FP64;
+
+ if (doubleSize)
+ {
+ Operand m = GetVecA32(op.Vm >> 1);
+
+ Operand toConvert = InstEmitSimdHelper32Arm64.EmitExtractScalar(context, m, op.Vm, doubleSize);
+
+ Intrinsic inst = (unsigned ? Intrinsic.Arm64FcvtzuGp : Intrinsic.Arm64FcvtzsGp) | Intrinsic.Arm64VDouble;
+
+ Operand asInteger = context.AddIntrinsicInt(inst, toConvert);
+
+ InsertScalar(context, op.Vd, asInteger);
+ }
+ else
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, unsigned ? Intrinsic.Arm64FcvtzuS : Intrinsic.Arm64FcvtzsS);
+ }
+ }
+ else if (!roundWithFpscr && Optimizations.UseSse41)
+ {
+ EmitSse41ConvertInt32(context, FPRoundingMode.TowardsZero, !unsigned);
+ }
+ else
+ {
+ Operand toConvert = ExtractScalar(context, floatSize, op.Vm);
+
+ // TODO: Fast Path.
+ if (roundWithFpscr)
+ {
+ toConvert = EmitRoundByRMode(context, toConvert);
+ }
+
+ // Round towards zero.
+ Operand asInteger = EmitSaturateFloatToInt(context, toConvert, unsigned);
+
+ InsertScalar(context, op.Vd, asInteger);
+ }
+ }
+ else
+ {
+ bool unsigned = op.Opc == 0;
+
+ Operand toConvert = ExtractScalar(context, OperandType.I32, op.Vm);
+
+ Operand asFloat = EmitFPConvert(context, toConvert, floatSize, !unsigned);
+
+ InsertScalar(context, op.Vd, asFloat);
+ }
+ }
+
+ private static Operand EmitRoundMathCall(ArmEmitterContext context, MidpointRounding roundMode, Operand n)
+ {
+ IOpCode32Simd op = (IOpCode32Simd)context.CurrOp;
+
+ string name = nameof(Math.Round);
+
+ MethodInfo info = (op.Size & 1) == 0
+ ? typeof(MathF).GetMethod(name, new Type[] { typeof(float), typeof(MidpointRounding) })
+ : typeof(Math). GetMethod(name, new Type[] { typeof(double), typeof(MidpointRounding) });
+
+ return context.Call(info, n, Const((int)roundMode));
+ }
+
+ private static FPRoundingMode RMToRoundMode(int rm)
+ {
+ FPRoundingMode roundMode;
+ switch (rm)
+ {
+ case 0b00:
+ roundMode = FPRoundingMode.ToNearestAway;
+ break;
+ case 0b01:
+ roundMode = FPRoundingMode.ToNearest;
+ break;
+ case 0b10:
+ roundMode = FPRoundingMode.TowardsPlusInfinity;
+ break;
+ case 0b11:
+ roundMode = FPRoundingMode.TowardsMinusInfinity;
+ break;
+ default:
+ throw new ArgumentOutOfRangeException(nameof(rm));
+ }
+ return roundMode;
+ }
+
+ // VCVTA/M/N/P (floating-point).
+ public static void Vcvt_RM(ArmEmitterContext context)
+ {
+ OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp; // toInteger == true (opCode<18> == 1 => Opc2<2> == 1).
+
+ OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32;
+
+ bool unsigned = op.Opc == 0;
+ int rm = op.Opc2 & 3;
+
+ Intrinsic inst;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ if (unsigned)
+ {
+ inst = rm switch {
+ 0b00 => Intrinsic.Arm64FcvtauS,
+ 0b01 => Intrinsic.Arm64FcvtnuS,
+ 0b10 => Intrinsic.Arm64FcvtpuS,
+ 0b11 => Intrinsic.Arm64FcvtmuS,
+ _ => throw new ArgumentOutOfRangeException(nameof(rm))
+ };
+ }
+ else
+ {
+ inst = rm switch {
+ 0b00 => Intrinsic.Arm64FcvtasS,
+ 0b01 => Intrinsic.Arm64FcvtnsS,
+ 0b10 => Intrinsic.Arm64FcvtpsS,
+ 0b11 => Intrinsic.Arm64FcvtmsS,
+ _ => throw new ArgumentOutOfRangeException(nameof(rm))
+ };
+ }
+
+ InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, inst);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41ConvertInt32(context, RMToRoundMode(rm), !unsigned);
+ }
+ else
+ {
+ Operand toConvert = ExtractScalar(context, floatSize, op.Vm);
+
+ switch (rm)
+ {
+ case 0b00: // Away
+ toConvert = EmitRoundMathCall(context, MidpointRounding.AwayFromZero, toConvert);
+ break;
+ case 0b01: // Nearest
+ toConvert = EmitRoundMathCall(context, MidpointRounding.ToEven, toConvert);
+ break;
+ case 0b10: // Towards positive infinity
+ toConvert = EmitUnaryMathCall(context, nameof(Math.Ceiling), toConvert);
+ break;
+ case 0b11: // Towards negative infinity
+ toConvert = EmitUnaryMathCall(context, nameof(Math.Floor), toConvert);
+ break;
+ }
+
+ Operand asInteger = EmitSaturateFloatToInt(context, toConvert, unsigned);
+
+ InsertScalar(context, op.Vd, asInteger);
+ }
+ }
+
+ public static void Vcvt_TB(ArmEmitterContext context)
+ {
+ OpCode32SimdCvtTB op = (OpCode32SimdCvtTB)context.CurrOp;
+
+ if (Optimizations.UseF16c)
+ {
+ Debug.Assert(!Optimizations.ForceLegacySse);
+
+ if (op.Op)
+ {
+ Operand res = ExtractScalar(context, op.Size == 1 ? OperandType.FP64 : OperandType.FP32, op.Vm);
+ if (op.Size == 1)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Cvtsd2ss, context.VectorZero(), res);
+ }
+ res = context.AddIntrinsic(Intrinsic.X86Vcvtps2ph, res, Const(X86GetRoundControl(FPRoundingMode.ToNearest)));
+ res = context.VectorExtract16(res, 0);
+ InsertScalar16(context, op.Vd, op.T, res);
+ }
+ else
+ {
+ Operand res = context.VectorCreateScalar(ExtractScalar16(context, op.Vm, op.T));
+ res = context.AddIntrinsic(Intrinsic.X86Vcvtph2ps, res);
+ if (op.Size == 1)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Cvtss2sd, context.VectorZero(), res);
+ }
+ res = context.VectorExtract(op.Size == 1 ? OperandType.I64 : OperandType.I32, res, 0);
+ InsertScalar(context, op.Vd, res);
+ }
+ }
+ else
+ {
+ if (op.Op)
+ {
+ // Convert to half.
+
+ Operand src = ExtractScalar(context, op.Size == 1 ? OperandType.FP64 : OperandType.FP32, op.Vm);
+
+ MethodInfo method = op.Size == 1
+ ? typeof(SoftFloat64_16).GetMethod(nameof(SoftFloat64_16.FPConvert))
+ : typeof(SoftFloat32_16).GetMethod(nameof(SoftFloat32_16.FPConvert));
+
+ context.ExitArmFpMode();
+ context.StoreToContext();
+ Operand res = context.Call(method, src);
+ context.LoadFromContext();
+ context.EnterArmFpMode();
+
+ InsertScalar16(context, op.Vd, op.T, res);
+ }
+ else
+ {
+ // Convert from half.
+
+ Operand src = ExtractScalar16(context, op.Vm, op.T);
+
+ MethodInfo method = op.Size == 1
+ ? typeof(SoftFloat16_64).GetMethod(nameof(SoftFloat16_64.FPConvert))
+ : typeof(SoftFloat16_32).GetMethod(nameof(SoftFloat16_32.FPConvert));
+
+ context.ExitArmFpMode();
+ context.StoreToContext();
+ Operand res = context.Call(method, src);
+ context.LoadFromContext();
+ context.EnterArmFpMode();
+
+ InsertScalar(context, op.Vd, res);
+ }
+ }
+ }
+
+ // VRINTA/M/N/P (floating-point).
+ public static void Vrint_RM(ArmEmitterContext context)
+ {
+ OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
+
+ OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32;
+
+ int rm = op.Opc2 & 3;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ Intrinsic inst = rm switch {
+ 0b00 => Intrinsic.Arm64FrintaS,
+ 0b01 => Intrinsic.Arm64FrintnS,
+ 0b10 => Intrinsic.Arm64FrintpS,
+ 0b11 => Intrinsic.Arm64FrintmS,
+ _ => throw new ArgumentOutOfRangeException(nameof(rm))
+ };
+
+ InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, inst);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitScalarUnaryOpSimd32(context, (m) =>
+ {
+ FPRoundingMode roundMode = RMToRoundMode(rm);
+
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ Intrinsic inst = (op.Size & 1) == 0 ? Intrinsic.X86Roundss : Intrinsic.X86Roundsd;
+ return context.AddIntrinsic(inst, m, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ return EmitSse41RoundToNearestWithTiesToAwayOpF(context, m, scalar: true);
+ }
+ });
+ }
+ else
+ {
+ Operand toConvert = ExtractScalar(context, floatSize, op.Vm);
+
+ switch (rm)
+ {
+ case 0b00: // Away
+ toConvert = EmitRoundMathCall(context, MidpointRounding.AwayFromZero, toConvert);
+ break;
+ case 0b01: // Nearest
+ toConvert = EmitRoundMathCall(context, MidpointRounding.ToEven, toConvert);
+ break;
+ case 0b10: // Towards positive infinity
+ toConvert = EmitUnaryMathCall(context, nameof(Math.Ceiling), toConvert);
+ break;
+ case 0b11: // Towards negative infinity
+ toConvert = EmitUnaryMathCall(context, nameof(Math.Floor), toConvert);
+ break;
+ }
+
+ InsertScalar(context, op.Vd, toConvert);
+ }
+ }
+
+ // VRINTA (vector).
+ public static void Vrinta_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintaS);
+ }
+ else
+ {
+ EmitVectorUnaryOpF32(context, (m) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, m));
+ }
+ }
+
+ // VRINTM (vector).
+ public static void Vrintm_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintmS);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitVectorUnaryOpSimd32(context, (m) =>
+ {
+ return context.AddIntrinsic(Intrinsic.X86Roundps, m, Const(X86GetRoundControl(FPRoundingMode.TowardsMinusInfinity)));
+ });
+ }
+ else
+ {
+ EmitVectorUnaryOpF32(context, (m) => EmitUnaryMathCall(context, nameof(Math.Floor), m));
+ }
+ }
+
+ // VRINTN (vector).
+ public static void Vrintn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintnS);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitVectorUnaryOpSimd32(context, (m) =>
+ {
+ return context.AddIntrinsic(Intrinsic.X86Roundps, m, Const(X86GetRoundControl(FPRoundingMode.ToNearest)));
+ });
+ }
+ else
+ {
+ EmitVectorUnaryOpF32(context, (m) => EmitRoundMathCall(context, MidpointRounding.ToEven, m));
+ }
+ }
+
+ // VRINTP (vector).
+ public static void Vrintp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintpS);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitVectorUnaryOpSimd32(context, (m) =>
+ {
+ return context.AddIntrinsic(Intrinsic.X86Roundps, m, Const(X86GetRoundControl(FPRoundingMode.TowardsPlusInfinity)));
+ });
+ }
+ else
+ {
+ EmitVectorUnaryOpF32(context, (m) => EmitUnaryMathCall(context, nameof(Math.Ceiling), m));
+ }
+ }
+
+ // VRINTZ (floating-point).
+ public static void Vrint_Z(ArmEmitterContext context)
+ {
+ OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FrintzS);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitScalarUnaryOpSimd32(context, (m) =>
+ {
+ Intrinsic inst = (op.Size & 1) == 0 ? Intrinsic.X86Roundss : Intrinsic.X86Roundsd;
+ return context.AddIntrinsic(inst, m, Const(X86GetRoundControl(FPRoundingMode.TowardsZero)));
+ });
+ }
+ else
+ {
+ EmitScalarUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Truncate), op1));
+ }
+ }
+
+ // VRINTX (floating-point).
+ public static void Vrintx_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FrintxS);
+ }
+ else
+ {
+ EmitScalarUnaryOpF32(context, (op1) =>
+ {
+ return EmitRoundByRMode(context, op1);
+ });
+ }
+ }
+
+ private static Operand EmitFPConvert(ArmEmitterContext context, Operand value, OperandType type, bool signed)
+ {
+ Debug.Assert(value.Type == OperandType.I32 || value.Type == OperandType.I64);
+
+ if (signed)
+ {
+ return context.ConvertToFP(type, value);
+ }
+ else
+ {
+ return context.ConvertToFPUI(type, value);
+ }
+ }
+
+ private static void EmitSse41ConvertInt32(ArmEmitterContext context, FPRoundingMode roundMode, bool signed)
+ {
+ // A port of the similar round function in InstEmitSimdCvt.
+ OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp;
+
+ bool doubleSize = (op.Size & 1) != 0;
+ int shift = doubleSize ? 1 : 2;
+ Operand n = GetVecA32(op.Vm >> shift);
+ n = EmitSwapScalar(context, n, op.Vm, doubleSize);
+
+ if (!doubleSize)
+ {
+ Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, n, n, Const((int)CmpCondition.OrderedQ));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
+
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundss, nRes, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true);
+ }
+
+ Operand zero = context.VectorZero();
+
+ Operand nCmp;
+ Operand nIntOrLong2 = default;
+
+ if (!signed)
+ {
+ nCmp = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+ }
+
+ int fpMaxVal = 0x4F000000; // 2.14748365E9f (2147483648)
+
+ Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
+
+ Operand nIntOrLong = context.AddIntrinsicInt(Intrinsic.X86Cvtss2si, nRes);
+
+ if (!signed)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Subss, nRes, fpMaxValMask);
+
+ nCmp = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+
+ nIntOrLong2 = context.AddIntrinsicInt(Intrinsic.X86Cvtss2si, nRes);
+ }
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
+
+ Operand nInt = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, nRes);
+
+ Operand dRes;
+ if (signed)
+ {
+ dRes = context.BitwiseExclusiveOr(nIntOrLong, nInt);
+ }
+ else
+ {
+ dRes = context.BitwiseExclusiveOr(nIntOrLong2, nInt);
+ dRes = context.Add(dRes, nIntOrLong);
+ }
+
+ InsertScalar(context, op.Vd, dRes);
+ }
+ else
+ {
+ Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, n, Const((int)CmpCondition.OrderedQ));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
+
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundsd, nRes, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true);
+ }
+
+ Operand zero = context.VectorZero();
+
+ Operand nCmp;
+ Operand nIntOrLong2 = default;
+
+ if (!signed)
+ {
+ nCmp = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+ }
+
+ long fpMaxVal = 0x41E0000000000000L; // 2147483648.0000000d (2147483648)
+
+ Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
+
+ Operand nIntOrLong = context.AddIntrinsicInt(Intrinsic.X86Cvtsd2si, nRes);
+
+ if (!signed)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Subsd, nRes, fpMaxValMask);
+
+ nCmp = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+
+ nIntOrLong2 = context.AddIntrinsicInt(Intrinsic.X86Cvtsd2si, nRes);
+ }
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
+
+ Operand nLong = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, nRes);
+ nLong = context.ConvertI64ToI32(nLong);
+
+ Operand dRes;
+ if (signed)
+ {
+ dRes = context.BitwiseExclusiveOr(nIntOrLong, nLong);
+ }
+ else
+ {
+ dRes = context.BitwiseExclusiveOr(nIntOrLong2, nLong);
+ dRes = context.Add(dRes, nIntOrLong);
+ }
+
+ InsertScalar(context, op.Vd, dRes);
+ }
+ }
+
+ private static void EmitSse41ConvertVector32(ArmEmitterContext context, FPRoundingMode roundMode, bool signed)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ EmitVectorUnaryOpSimd32(context, (n) =>
+ {
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const((int)CmpCondition.OrderedQ));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundps, nRes, Const(X86GetRoundControl(roundMode)));
+
+ Operand zero = context.VectorZero();
+ Operand nCmp;
+ if (!signed)
+ {
+ nCmp = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+ }
+
+ Operand fpMaxValMask = X86GetAllElements(context, 0x4F000000); // 2.14748365E9f (2147483648)
+
+ Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes);
+ Operand nInt2 = default;
+
+ if (!signed)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Subps, nRes, fpMaxValMask);
+
+ nCmp = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+
+ nInt2 = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes);
+ }
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
+
+ if (signed)
+ {
+ return context.AddIntrinsic(Intrinsic.X86Pxor, nInt, nRes);
+ }
+ else
+ {
+ Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nInt2, nRes);
+ return context.AddIntrinsic(Intrinsic.X86Paddd, dRes, nInt);
+ }
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const((int)CmpCondition.OrderedQ));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundpd, nRes, Const(X86GetRoundControl(roundMode)));
+
+ Operand zero = context.VectorZero();
+ Operand nCmp;
+ if (!signed)
+ {
+ nCmp = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+ }
+
+ Operand fpMaxValMask = X86GetAllElements(context, 0x43E0000000000000L); // 9.2233720368547760E18d (9223372036854775808)
+
+ Operand nLong = InstEmit.EmitSse2CvtDoubleToInt64OpF(context, nRes, false);
+ Operand nLong2 = default;
+
+ if (!signed)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Subpd, nRes, fpMaxValMask);
+
+ nCmp = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+
+ nLong2 = InstEmit.EmitSse2CvtDoubleToInt64OpF(context, nRes, false);
+ }
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
+
+ if (signed)
+ {
+ return context.AddIntrinsic(Intrinsic.X86Pxor, nLong, nRes);
+ }
+ else
+ {
+ Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nLong2, nRes);
+ return context.AddIntrinsic(Intrinsic.X86Paddq, dRes, nLong);
+ }
+ }
+ });
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdHash.cs b/src/ARMeilleure/Instructions/InstEmitSimdHash.cs
new file mode 100644
index 00000000..4fb048ee
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdHash.cs
@@ -0,0 +1,147 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+#region "Sha1"
+ public static void Sha1c_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+
+ Operand ne = context.VectorExtract(OperandType.I32, GetVec(op.Rn), 0);
+
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.HashChoose)), d, ne, m);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Sha1h_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand ne = context.VectorExtract(OperandType.I32, GetVec(op.Rn), 0);
+
+ Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.FixedRotate)), ne);
+
+ context.Copy(GetVec(op.Rd), context.VectorCreateScalar(res));
+ }
+
+ public static void Sha1m_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+
+ Operand ne = context.VectorExtract(OperandType.I32, GetVec(op.Rn), 0);
+
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.HashMajority)), d, ne, m);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Sha1p_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+
+ Operand ne = context.VectorExtract(OperandType.I32, GetVec(op.Rn), 0);
+
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.HashParity)), d, ne, m);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Sha1su0_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha1SchedulePart1)), d, n, m);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Sha1su1_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha1SchedulePart2)), d, n);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+#endregion
+
+#region "Sha256"
+ public static void Sha256h_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = InstEmitSimdHashHelper.EmitSha256h(context, d, n, m, part2: false);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Sha256h2_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = InstEmitSimdHashHelper.EmitSha256h(context, n, d, m, part2: true);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Sha256su0_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Operand res = InstEmitSimdHashHelper.EmitSha256su0(context, d, n);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Sha256su1_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = InstEmitSimdHashHelper.EmitSha256su1(context, d, n, m);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+#endregion
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdHash32.cs b/src/ARMeilleure/Instructions/InstEmitSimdHash32.cs
new file mode 100644
index 00000000..51334608
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdHash32.cs
@@ -0,0 +1,64 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+#region "Sha256"
+ public static void Sha256h_V(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Operand d = GetVecA32(op.Qd);
+ Operand n = GetVecA32(op.Qn);
+ Operand m = GetVecA32(op.Qm);
+
+ Operand res = InstEmitSimdHashHelper.EmitSha256h(context, d, n, m, part2: false);
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void Sha256h2_V(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Operand d = GetVecA32(op.Qd);
+ Operand n = GetVecA32(op.Qn);
+ Operand m = GetVecA32(op.Qm);
+
+ Operand res = InstEmitSimdHashHelper.EmitSha256h(context, n, d, m, part2: true);
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void Sha256su0_V(ArmEmitterContext context)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ Operand d = GetVecA32(op.Qd);
+ Operand m = GetVecA32(op.Qm);
+
+ Operand res = InstEmitSimdHashHelper.EmitSha256su0(context, d, m);
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void Sha256su1_V(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Operand d = GetVecA32(op.Qd);
+ Operand n = GetVecA32(op.Qn);
+ Operand m = GetVecA32(op.Qm);
+
+ Operand res = InstEmitSimdHashHelper.EmitSha256su1(context, d, n, m);
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+#endregion
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdHashHelper.cs b/src/ARMeilleure/Instructions/InstEmitSimdHashHelper.cs
new file mode 100644
index 00000000..23e4948d
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdHashHelper.cs
@@ -0,0 +1,56 @@
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static class InstEmitSimdHashHelper
+ {
+ public static Operand EmitSha256h(ArmEmitterContext context, Operand x, Operand y, Operand w, bool part2)
+ {
+ if (Optimizations.UseSha)
+ {
+ Operand src1 = context.AddIntrinsic(Intrinsic.X86Shufps, y, x, Const(0xbb));
+ Operand src2 = context.AddIntrinsic(Intrinsic.X86Shufps, y, x, Const(0x11));
+ Operand w2 = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, w, w);
+
+ Operand round2 = context.AddIntrinsic(Intrinsic.X86Sha256Rnds2, src1, src2, w);
+ Operand round4 = context.AddIntrinsic(Intrinsic.X86Sha256Rnds2, src2, round2, w2);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, round4, round2, Const(part2 ? 0x11 : 0xbb));
+
+ return res;
+ }
+
+ String method = part2 ? nameof(SoftFallback.HashUpper) : nameof(SoftFallback.HashLower);
+ return context.Call(typeof(SoftFallback).GetMethod(method), x, y, w);
+ }
+
+ public static Operand EmitSha256su0(ArmEmitterContext context, Operand x, Operand y)
+ {
+ if (Optimizations.UseSha)
+ {
+ return context.AddIntrinsic(Intrinsic.X86Sha256Msg1, x, y);
+ }
+
+ return context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha256SchedulePart1)), x, y);
+ }
+
+ public static Operand EmitSha256su1(ArmEmitterContext context, Operand x, Operand y, Operand z)
+ {
+ if (Optimizations.UseSha && Optimizations.UseSsse3)
+ {
+ Operand extr = context.AddIntrinsic(Intrinsic.X86Palignr, z, y, Const(4));
+ Operand tmp = context.AddIntrinsic(Intrinsic.X86Paddd, extr, x);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Sha256Msg2, tmp, z);
+
+ return res;
+ }
+
+ return context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha256SchedulePart2)), x, y, z);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdHelper.cs b/src/ARMeilleure/Instructions/InstEmitSimdHelper.cs
new file mode 100644
index 00000000..c44c9b4d
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdHelper.cs
@@ -0,0 +1,2088 @@
+using ARMeilleure.CodeGen.X86;
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+using System.Reflection;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ using Func1I = Func<Operand, Operand>;
+ using Func2I = Func<Operand, Operand, Operand>;
+ using Func3I = Func<Operand, Operand, Operand, Operand>;
+
+ static class InstEmitSimdHelper
+ {
+#region "Masks"
+ public static readonly long[] EvenMasks = new long[]
+ {
+ 14L << 56 | 12L << 48 | 10L << 40 | 08L << 32 | 06L << 24 | 04L << 16 | 02L << 8 | 00L << 0, // B
+ 13L << 56 | 12L << 48 | 09L << 40 | 08L << 32 | 05L << 24 | 04L << 16 | 01L << 8 | 00L << 0, // H
+ 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 03L << 24 | 02L << 16 | 01L << 8 | 00L << 0 // S
+ };
+
+ public static readonly long[] OddMasks = new long[]
+ {
+ 15L << 56 | 13L << 48 | 11L << 40 | 09L << 32 | 07L << 24 | 05L << 16 | 03L << 8 | 01L << 0, // B
+ 15L << 56 | 14L << 48 | 11L << 40 | 10L << 32 | 07L << 24 | 06L << 16 | 03L << 8 | 02L << 0, // H
+ 15L << 56 | 14L << 48 | 13L << 40 | 12L << 32 | 07L << 24 | 06L << 16 | 05L << 8 | 04L << 0 // S
+ };
+
+ public static readonly long ZeroMask = 128L << 56 | 128L << 48 | 128L << 40 | 128L << 32 | 128L << 24 | 128L << 16 | 128L << 8 | 128L << 0;
+
+ public static ulong X86GetGf2p8LogicalShiftLeft(int shift)
+ {
+ ulong identity = (0b00000001UL << 56) | (0b00000010UL << 48) | (0b00000100UL << 40) | (0b00001000UL << 32) |
+ (0b00010000UL << 24) | (0b00100000UL << 16) | (0b01000000UL << 8) | (0b10000000UL << 0);
+
+ return shift >= 0 ? identity >> (shift * 8) : identity << (-shift * 8);
+ }
+#endregion
+
+#region "X86 SSE Intrinsics"
+ public static readonly Intrinsic[] X86PaddInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Paddb,
+ Intrinsic.X86Paddw,
+ Intrinsic.X86Paddd,
+ Intrinsic.X86Paddq
+ };
+
+ public static readonly Intrinsic[] X86PcmpeqInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Pcmpeqb,
+ Intrinsic.X86Pcmpeqw,
+ Intrinsic.X86Pcmpeqd,
+ Intrinsic.X86Pcmpeqq
+ };
+
+ public static readonly Intrinsic[] X86PcmpgtInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Pcmpgtb,
+ Intrinsic.X86Pcmpgtw,
+ Intrinsic.X86Pcmpgtd,
+ Intrinsic.X86Pcmpgtq
+ };
+
+ public static readonly Intrinsic[] X86PmaxsInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Pmaxsb,
+ Intrinsic.X86Pmaxsw,
+ Intrinsic.X86Pmaxsd
+ };
+
+ public static readonly Intrinsic[] X86PmaxuInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Pmaxub,
+ Intrinsic.X86Pmaxuw,
+ Intrinsic.X86Pmaxud
+ };
+
+ public static readonly Intrinsic[] X86PminsInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Pminsb,
+ Intrinsic.X86Pminsw,
+ Intrinsic.X86Pminsd
+ };
+
+ public static readonly Intrinsic[] X86PminuInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Pminub,
+ Intrinsic.X86Pminuw,
+ Intrinsic.X86Pminud
+ };
+
+ public static readonly Intrinsic[] X86PmovsxInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Pmovsxbw,
+ Intrinsic.X86Pmovsxwd,
+ Intrinsic.X86Pmovsxdq
+ };
+
+ public static readonly Intrinsic[] X86PmovzxInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Pmovzxbw,
+ Intrinsic.X86Pmovzxwd,
+ Intrinsic.X86Pmovzxdq
+ };
+
+ public static readonly Intrinsic[] X86PsllInstruction = new Intrinsic[]
+ {
+ 0,
+ Intrinsic.X86Psllw,
+ Intrinsic.X86Pslld,
+ Intrinsic.X86Psllq
+ };
+
+ public static readonly Intrinsic[] X86PsraInstruction = new Intrinsic[]
+ {
+ 0,
+ Intrinsic.X86Psraw,
+ Intrinsic.X86Psrad
+ };
+
+ public static readonly Intrinsic[] X86PsrlInstruction = new Intrinsic[]
+ {
+ 0,
+ Intrinsic.X86Psrlw,
+ Intrinsic.X86Psrld,
+ Intrinsic.X86Psrlq
+ };
+
+ public static readonly Intrinsic[] X86PsubInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Psubb,
+ Intrinsic.X86Psubw,
+ Intrinsic.X86Psubd,
+ Intrinsic.X86Psubq
+ };
+
+ public static readonly Intrinsic[] X86PunpckhInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Punpckhbw,
+ Intrinsic.X86Punpckhwd,
+ Intrinsic.X86Punpckhdq,
+ Intrinsic.X86Punpckhqdq
+ };
+
+ public static readonly Intrinsic[] X86PunpcklInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Punpcklbw,
+ Intrinsic.X86Punpcklwd,
+ Intrinsic.X86Punpckldq,
+ Intrinsic.X86Punpcklqdq
+ };
+#endregion
+
+ public static void EnterArmFpMode(EmitterContext context, Func<FPState, Operand> getFpFlag)
+ {
+ if (Optimizations.UseSse2)
+ {
+ Operand mxcsr = context.AddIntrinsicInt(Intrinsic.X86Stmxcsr);
+
+ Operand fzTrue = getFpFlag(FPState.FzFlag);
+ Operand r0True = getFpFlag(FPState.RMode0Flag);
+ Operand r1True = getFpFlag(FPState.RMode1Flag);
+
+ mxcsr = context.BitwiseAnd(mxcsr, Const(~(int)(Mxcsr.Ftz | Mxcsr.Daz | Mxcsr.Rhi | Mxcsr.Rlo)));
+
+ mxcsr = context.BitwiseOr(mxcsr, context.ConditionalSelect(fzTrue, Const((int)(Mxcsr.Ftz | Mxcsr.Daz | Mxcsr.Um | Mxcsr.Dm)), Const(0)));
+
+ // X86 round modes in order: nearest, negative, positive, zero
+ // ARM round modes in order: nearest, positive, negative, zero
+ // Read the bits backwards to correct this.
+
+ mxcsr = context.BitwiseOr(mxcsr, context.ConditionalSelect(r0True, Const((int)Mxcsr.Rhi), Const(0)));
+ mxcsr = context.BitwiseOr(mxcsr, context.ConditionalSelect(r1True, Const((int)Mxcsr.Rlo), Const(0)));
+
+ context.AddIntrinsicNoRet(Intrinsic.X86Ldmxcsr, mxcsr);
+ }
+ else if (Optimizations.UseAdvSimd)
+ {
+ Operand fpcr = context.AddIntrinsicInt(Intrinsic.Arm64MrsFpcr);
+
+ Operand fzTrue = getFpFlag(FPState.FzFlag);
+ Operand r0True = getFpFlag(FPState.RMode0Flag);
+ Operand r1True = getFpFlag(FPState.RMode1Flag);
+
+ fpcr = context.BitwiseAnd(fpcr, Const(~(int)(FPCR.Fz | FPCR.RMode0 | FPCR.RMode1)));
+
+ fpcr = context.BitwiseOr(fpcr, context.ConditionalSelect(fzTrue, Const((int)FPCR.Fz), Const(0)));
+ fpcr = context.BitwiseOr(fpcr, context.ConditionalSelect(r0True, Const((int)FPCR.RMode0), Const(0)));
+ fpcr = context.BitwiseOr(fpcr, context.ConditionalSelect(r1True, Const((int)FPCR.RMode1), Const(0)));
+
+ context.AddIntrinsicNoRet(Intrinsic.Arm64MsrFpcr, fpcr);
+
+ // TODO: Restore FPSR
+ }
+ }
+
+ public static void ExitArmFpMode(EmitterContext context, Action<FPState, Operand> setFpFlag)
+ {
+ if (Optimizations.UseSse2)
+ {
+ Operand mxcsr = context.AddIntrinsicInt(Intrinsic.X86Stmxcsr);
+
+ // Unset round mode (to nearest) and ftz.
+ mxcsr = context.BitwiseAnd(mxcsr, Const(~(int)(Mxcsr.Ftz | Mxcsr.Daz | Mxcsr.Rhi | Mxcsr.Rlo)));
+
+ context.AddIntrinsicNoRet(Intrinsic.X86Ldmxcsr, mxcsr);
+
+ // Status flags would be stored here if they were used.
+ }
+ else if (Optimizations.UseAdvSimd)
+ {
+ Operand fpcr = context.AddIntrinsicInt(Intrinsic.Arm64MrsFpcr);
+
+ // Unset round mode (to nearest) and fz.
+ fpcr = context.BitwiseAnd(fpcr, Const(~(int)(FPCR.Fz | FPCR.RMode0 | FPCR.RMode1)));
+
+ context.AddIntrinsicNoRet(Intrinsic.Arm64MsrFpcr, fpcr);
+
+ // TODO: Store FPSR
+ }
+ }
+
+ public static int GetImmShl(OpCodeSimdShImm op)
+ {
+ return op.Imm - (8 << op.Size);
+ }
+
+ public static int GetImmShr(OpCodeSimdShImm op)
+ {
+ return (8 << (op.Size + 1)) - op.Imm;
+ }
+
+ public static Operand X86GetScalar(ArmEmitterContext context, float value)
+ {
+ return X86GetScalar(context, BitConverter.SingleToInt32Bits(value));
+ }
+
+ public static Operand X86GetScalar(ArmEmitterContext context, double value)
+ {
+ return X86GetScalar(context, BitConverter.DoubleToInt64Bits(value));
+ }
+
+ public static Operand X86GetScalar(ArmEmitterContext context, int value)
+ {
+ return context.VectorCreateScalar(Const(value));
+ }
+
+ public static Operand X86GetScalar(ArmEmitterContext context, long value)
+ {
+ return context.VectorCreateScalar(Const(value));
+ }
+
+ public static Operand X86GetAllElements(ArmEmitterContext context, float value)
+ {
+ return X86GetAllElements(context, BitConverter.SingleToInt32Bits(value));
+ }
+
+ public static Operand X86GetAllElements(ArmEmitterContext context, double value)
+ {
+ return X86GetAllElements(context, BitConverter.DoubleToInt64Bits(value));
+ }
+
+ public static Operand X86GetAllElements(ArmEmitterContext context, short value)
+ {
+ ulong value1 = (ushort)value;
+ ulong value2 = value1 << 16 | value1;
+ ulong value4 = value2 << 32 | value2;
+
+ return X86GetAllElements(context, (long)value4);
+ }
+
+ public static Operand X86GetAllElements(ArmEmitterContext context, int value)
+ {
+ Operand vector = context.VectorCreateScalar(Const(value));
+
+ vector = context.AddIntrinsic(Intrinsic.X86Shufps, vector, vector, Const(0));
+
+ return vector;
+ }
+
+ public static Operand X86GetAllElements(ArmEmitterContext context, long value)
+ {
+ Operand vector = context.VectorCreateScalar(Const(value));
+
+ vector = context.AddIntrinsic(Intrinsic.X86Movlhps, vector, vector);
+
+ return vector;
+ }
+
+ public static Operand X86GetElements(ArmEmitterContext context, long e1, long e0)
+ {
+ return X86GetElements(context, (ulong)e1, (ulong)e0);
+ }
+
+ public static Operand X86GetElements(ArmEmitterContext context, ulong e1, ulong e0)
+ {
+ Operand vector0 = context.VectorCreateScalar(Const(e0));
+ Operand vector1 = context.VectorCreateScalar(Const(e1));
+
+ return context.AddIntrinsic(Intrinsic.X86Punpcklqdq, vector0, vector1);
+ }
+
+ public static int X86GetRoundControl(FPRoundingMode roundMode)
+ {
+ switch (roundMode)
+ {
+ case FPRoundingMode.ToNearest: return 8 | 0; // even
+ case FPRoundingMode.TowardsPlusInfinity: return 8 | 2;
+ case FPRoundingMode.TowardsMinusInfinity: return 8 | 1;
+ case FPRoundingMode.TowardsZero: return 8 | 3;
+ }
+
+ throw new ArgumentException($"Invalid rounding mode \"{roundMode}\".");
+ }
+
+ public static Operand EmitSse41RoundToNearestWithTiesToAwayOpF(ArmEmitterContext context, Operand n, bool scalar)
+ {
+ Debug.Assert(n.Type == OperandType.V128);
+
+ Operand nCopy = context.Copy(n);
+
+ Operand rC = Const(X86GetRoundControl(FPRoundingMode.TowardsZero));
+
+ IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+
+ if ((op.Size & 1) == 0)
+ {
+ Operand signMask = scalar ? X86GetScalar(context, int.MinValue) : X86GetAllElements(context, int.MinValue);
+ signMask = context.AddIntrinsic(Intrinsic.X86Pand, signMask, nCopy);
+
+ // 0x3EFFFFFF == BitConverter.SingleToInt32Bits(0.5f) - 1
+ Operand valueMask = scalar ? X86GetScalar(context, 0x3EFFFFFF) : X86GetAllElements(context, 0x3EFFFFFF);
+ valueMask = context.AddIntrinsic(Intrinsic.X86Por, valueMask, signMask);
+
+ nCopy = context.AddIntrinsic(scalar ? Intrinsic.X86Addss : Intrinsic.X86Addps, nCopy, valueMask);
+
+ nCopy = context.AddIntrinsic(scalar ? Intrinsic.X86Roundss : Intrinsic.X86Roundps, nCopy, rC);
+ }
+ else
+ {
+ Operand signMask = scalar ? X86GetScalar(context, long.MinValue) : X86GetAllElements(context, long.MinValue);
+ signMask = context.AddIntrinsic(Intrinsic.X86Pand, signMask, nCopy);
+
+ // 0x3FDFFFFFFFFFFFFFL == BitConverter.DoubleToInt64Bits(0.5d) - 1L
+ Operand valueMask = scalar ? X86GetScalar(context, 0x3FDFFFFFFFFFFFFFL) : X86GetAllElements(context, 0x3FDFFFFFFFFFFFFFL);
+ valueMask = context.AddIntrinsic(Intrinsic.X86Por, valueMask, signMask);
+
+ nCopy = context.AddIntrinsic(scalar ? Intrinsic.X86Addsd : Intrinsic.X86Addpd, nCopy, valueMask);
+
+ nCopy = context.AddIntrinsic(scalar ? Intrinsic.X86Roundsd : Intrinsic.X86Roundpd, nCopy, rC);
+ }
+
+ return nCopy;
+ }
+
+ public static Operand EmitCountSetBits8(ArmEmitterContext context, Operand op) // "size" is 8 (SIMD&FP Inst.).
+ {
+ Debug.Assert(op.Type == OperandType.I32 || op.Type == OperandType.I64);
+
+ Operand op0 = context.Subtract(op, context.BitwiseAnd(context.ShiftRightUI(op, Const(1)), Const(op.Type, 0x55L)));
+
+ Operand c1 = Const(op.Type, 0x33L);
+ Operand op1 = context.Add(context.BitwiseAnd(context.ShiftRightUI(op0, Const(2)), c1), context.BitwiseAnd(op0, c1));
+
+ return context.BitwiseAnd(context.Add(op1, context.ShiftRightUI(op1, Const(4))), Const(op.Type, 0x0fL));
+ }
+
+ public static void EmitScalarUnaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
+
+ Operand res = context.AddIntrinsic(inst, n);
+
+ if ((op.Size & 1) != 0)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+ else
+ {
+ res = context.VectorZeroUpper96(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitScalarBinaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
+
+ Operand res = context.AddIntrinsic(inst, n, m);
+
+ if ((op.Size & 1) != 0)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+ else
+ {
+ res = context.VectorZeroUpper96(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorUnaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
+
+ Operand res = context.AddIntrinsic(inst, n);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorBinaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
+
+ Operand res = context.AddIntrinsic(inst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static Operand EmitUnaryMathCall(ArmEmitterContext context, string name, Operand n)
+ {
+ IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+
+ MethodInfo info = (op.Size & 1) == 0
+ ? typeof(MathF).GetMethod(name, new Type[] { typeof(float) })
+ : typeof(Math). GetMethod(name, new Type[] { typeof(double) });
+
+ return context.Call(info, n);
+ }
+
+ public static Operand EmitRoundMathCall(ArmEmitterContext context, MidpointRounding roundMode, Operand n)
+ {
+ IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+
+ string name = nameof(Math.Round);
+
+ MethodInfo info = (op.Size & 1) == 0
+ ? typeof(MathF).GetMethod(name, new Type[] { typeof(float), typeof(MidpointRounding) })
+ : typeof(Math). GetMethod(name, new Type[] { typeof(double), typeof(MidpointRounding) });
+
+ return context.Call(info, n, Const((int)roundMode));
+ }
+
+ public static Operand EmitGetRoundingMode(ArmEmitterContext context)
+ {
+ Operand rMode = context.ShiftLeft(GetFpFlag(FPState.RMode1Flag), Const(1));
+ rMode = context.BitwiseOr(rMode, GetFpFlag(FPState.RMode0Flag));
+
+ return rMode;
+ }
+
+ public static Operand EmitRoundByRMode(ArmEmitterContext context, Operand op)
+ {
+ Debug.Assert(op.Type == OperandType.FP32 || op.Type == OperandType.FP64);
+
+ Operand lbl1 = Label();
+ Operand lbl2 = Label();
+ Operand lbl3 = Label();
+ Operand lblEnd = Label();
+
+ Operand rN = Const((int)FPRoundingMode.ToNearest);
+ Operand rP = Const((int)FPRoundingMode.TowardsPlusInfinity);
+ Operand rM = Const((int)FPRoundingMode.TowardsMinusInfinity);
+
+ Operand res = context.AllocateLocal(op.Type);
+
+ Operand rMode = EmitGetRoundingMode(context);
+
+ context.BranchIf(lbl1, rMode, rN, Comparison.NotEqual);
+ context.Copy(res, EmitRoundMathCall(context, MidpointRounding.ToEven, op));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lbl1);
+ context.BranchIf(lbl2, rMode, rP, Comparison.NotEqual);
+ context.Copy(res, EmitUnaryMathCall(context, nameof(Math.Ceiling), op));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lbl2);
+ context.BranchIf(lbl3, rMode, rM, Comparison.NotEqual);
+ context.Copy(res, EmitUnaryMathCall(context, nameof(Math.Floor), op));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lbl3);
+ context.Copy(res, EmitUnaryMathCall(context, nameof(Math.Truncate), op));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+
+ public static Operand EmitSoftFloatCall(ArmEmitterContext context, string name, params Operand[] callArgs)
+ {
+ IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+
+ MethodInfo info = (op.Size & 1) == 0
+ ? typeof(SoftFloat32).GetMethod(name)
+ : typeof(SoftFloat64).GetMethod(name);
+
+ context.ExitArmFpMode();
+ context.StoreToContext();
+ Operand res = context.Call(info, callArgs);
+ context.LoadFromContext();
+ context.EnterArmFpMode();
+
+ return res;
+ }
+
+ public static void EmitScalarBinaryOpByElemF(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
+ Operand m = context.VectorExtract(type, GetVec(op.Rm), op.Index);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n, m), 0));
+ }
+
+ public static void EmitScalarTernaryOpByElemF(ArmEmitterContext context, Func3I emit)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ Operand d = context.VectorExtract(type, GetVec(op.Rd), 0);
+ Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
+ Operand m = context.VectorExtract(type, GetVec(op.Rm), op.Index);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(d, n, m), 0));
+ }
+
+ public static void EmitScalarUnaryOpSx(ArmEmitterContext context, Func1I emit)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = EmitVectorExtractSx(context, op.Rn, 0, op.Size);
+
+ Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n), 0, op.Size);
+
+ context.Copy(GetVec(op.Rd), d);
+ }
+
+ public static void EmitScalarBinaryOpSx(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = EmitVectorExtractSx(context, op.Rn, 0, op.Size);
+ Operand m = EmitVectorExtractSx(context, op.Rm, 0, op.Size);
+
+ Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n, m), 0, op.Size);
+
+ context.Copy(GetVec(op.Rd), d);
+ }
+
+ public static void EmitScalarUnaryOpZx(ArmEmitterContext context, Func1I emit)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size);
+
+ Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n), 0, op.Size);
+
+ context.Copy(GetVec(op.Rd), d);
+ }
+
+ public static void EmitScalarBinaryOpZx(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size);
+ Operand m = EmitVectorExtractZx(context, op.Rm, 0, op.Size);
+
+ Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n, m), 0, op.Size);
+
+ context.Copy(GetVec(op.Rd), d);
+ }
+
+ public static void EmitScalarTernaryOpZx(ArmEmitterContext context, Func3I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = EmitVectorExtractZx(context, op.Rd, 0, op.Size);
+ Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size);
+ Operand m = EmitVectorExtractZx(context, op.Rm, 0, op.Size);
+
+ d = EmitVectorInsert(context, context.VectorZero(), emit(d, n, m), 0, op.Size);
+
+ context.Copy(GetVec(op.Rd), d);
+ }
+
+ public static void EmitScalarUnaryOpF(ArmEmitterContext context, Func1I emit)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n), 0));
+ }
+
+ public static void EmitScalarBinaryOpF(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
+ Operand m = context.VectorExtract(type, GetVec(op.Rm), 0);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n, m), 0));
+ }
+
+ public static void EmitScalarTernaryRaOpF(ArmEmitterContext context, Func3I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ Operand a = context.VectorExtract(type, GetVec(op.Ra), 0);
+ Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
+ Operand m = context.VectorExtract(type, GetVec(op.Rm), 0);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(a, n, m), 0));
+ }
+
+ public static void EmitVectorUnaryOpF(ArmEmitterContext context, Func1I emit)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = op.GetBytesCount() >> sizeF + 2;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
+
+ res = context.VectorInsert(res, emit(ne), index);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorBinaryOpF(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = op.GetBytesCount() >> sizeF + 2;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
+ Operand me = context.VectorExtract(type, GetVec(op.Rm), index);
+
+ res = context.VectorInsert(res, emit(ne, me), index);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorTernaryOpF(ArmEmitterContext context, Func3I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = op.GetBytesCount() >> sizeF + 2;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = context.VectorExtract(type, GetVec(op.Rd), index);
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
+ Operand me = context.VectorExtract(type, GetVec(op.Rm), index);
+
+ res = context.VectorInsert(res, emit(de, ne, me), index);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorBinaryOpByElemF(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = op.GetBytesCount() >> sizeF + 2;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
+ Operand me = context.VectorExtract(type, GetVec(op.Rm), op.Index);
+
+ res = context.VectorInsert(res, emit(ne, me), index);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorTernaryOpByElemF(ArmEmitterContext context, Func3I emit)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = op.GetBytesCount() >> sizeF + 2;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = context.VectorExtract(type, GetVec(op.Rd), index);
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
+ Operand me = context.VectorExtract(type, GetVec(op.Rm), op.Index);
+
+ res = context.VectorInsert(res, emit(de, ne, me), index);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorUnaryOpSx(ArmEmitterContext context, Func1I emit)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(ne), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorBinaryOpSx(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+ Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorTernaryOpSx(ArmEmitterContext context, Func3I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = EmitVectorExtractSx(context, op.Rd, index, op.Size);
+ Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+ Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorUnaryOpZx(ArmEmitterContext context, Func1I emit)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(ne), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorBinaryOpZx(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+ Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorTernaryOpZx(ArmEmitterContext context, Func3I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+ Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorBinaryOpByElemSx(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ Operand me = EmitVectorExtractSx(context, op.Rm, op.Index, op.Size);
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorBinaryOpByElemZx(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ Operand me = EmitVectorExtractZx(context, op.Rm, op.Index, op.Size);
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorTernaryOpByElemZx(ArmEmitterContext context, Func3I emit)
+ {
+ OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ Operand me = EmitVectorExtractZx(context, op.Rm, op.Index, op.Size);
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorImmUnaryOp(ArmEmitterContext context, Func1I emit)
+ {
+ OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
+
+ Operand imm = Const(op.Immediate);
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ res = EmitVectorInsert(context, res, emit(imm), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorImmBinaryOp(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
+
+ Operand imm = Const(op.Immediate);
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(de, imm), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorWidenRmBinaryOpSx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorWidenRmBinaryOp(context, emit, signed: true);
+ }
+
+ public static void EmitVectorWidenRmBinaryOpZx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorWidenRmBinaryOp(context, emit, signed: false);
+ }
+
+ private static void EmitVectorWidenRmBinaryOp(ArmEmitterContext context, Func2I emit, bool signed)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = 8 >> op.Size;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signed);
+ Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorWidenRnRmBinaryOpSx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorWidenRnRmBinaryOp(context, emit, signed: true);
+ }
+
+ public static void EmitVectorWidenRnRmBinaryOpZx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorWidenRnRmBinaryOp(context, emit, signed: false);
+ }
+
+ private static void EmitVectorWidenRnRmBinaryOp(ArmEmitterContext context, Func2I emit, bool signed)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = 8 >> op.Size;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
+ Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorWidenRnRmTernaryOpSx(ArmEmitterContext context, Func3I emit)
+ {
+ EmitVectorWidenRnRmTernaryOp(context, emit, signed: true);
+ }
+
+ public static void EmitVectorWidenRnRmTernaryOpZx(ArmEmitterContext context, Func3I emit)
+ {
+ EmitVectorWidenRnRmTernaryOp(context, emit, signed: false);
+ }
+
+ private static void EmitVectorWidenRnRmTernaryOp(ArmEmitterContext context, Func3I emit, bool signed)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = 8 >> op.Size;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed);
+ Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
+ Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size + 1);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorWidenBinaryOpByElemSx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorWidenBinaryOpByElem(context, emit, signed: true);
+ }
+
+ public static void EmitVectorWidenBinaryOpByElemZx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorWidenBinaryOpByElem(context, emit, signed: false);
+ }
+
+ private static void EmitVectorWidenBinaryOpByElem(ArmEmitterContext context, Func2I emit, bool signed)
+ {
+ OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ Operand me = EmitVectorExtract(context, op.Rm, op.Index, op.Size, signed);
+
+ int elems = 8 >> op.Size;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorWidenTernaryOpByElemSx(ArmEmitterContext context, Func3I emit)
+ {
+ EmitVectorWidenTernaryOpByElem(context, emit, signed: true);
+ }
+
+ public static void EmitVectorWidenTernaryOpByElemZx(ArmEmitterContext context, Func3I emit)
+ {
+ EmitVectorWidenTernaryOpByElem(context, emit, signed: false);
+ }
+
+ private static void EmitVectorWidenTernaryOpByElem(ArmEmitterContext context, Func3I emit, bool signed)
+ {
+ OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ Operand me = EmitVectorExtract(context, op.Rm, op.Index, op.Size, signed);
+
+ int elems = 8 >> op.Size;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed);
+ Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size + 1);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorPairwiseOpSx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorPairwiseOp(context, emit, signed: true);
+ }
+
+ public static void EmitVectorPairwiseOpZx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorPairwiseOp(context, emit, signed: false);
+ }
+
+ private static void EmitVectorPairwiseOp(ArmEmitterContext context, Func2I emit, bool signed)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int pairs = op.GetPairsCount() >> op.Size;
+
+ for (int index = 0; index < pairs; index++)
+ {
+ int pairIndex = index << 1;
+
+ Operand n0 = EmitVectorExtract(context, op.Rn, pairIndex, op.Size, signed);
+ Operand n1 = EmitVectorExtract(context, op.Rn, pairIndex + 1, op.Size, signed);
+
+ Operand m0 = EmitVectorExtract(context, op.Rm, pairIndex, op.Size, signed);
+ Operand m1 = EmitVectorExtract(context, op.Rm, pairIndex + 1, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(n0, n1), index, op.Size);
+ res = EmitVectorInsert(context, res, emit(m0, m1), pairs + index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitSsse3VectorPairwiseOp(ArmEmitterContext context, Intrinsic[] inst)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ Operand zeroEvenMask = X86GetElements(context, ZeroMask, EvenMasks[op.Size]);
+ Operand zeroOddMask = X86GetElements(context, ZeroMask, OddMasks [op.Size]);
+
+ Operand mN = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, n, m); // m:n
+
+ Operand left = context.AddIntrinsic(Intrinsic.X86Pshufb, mN, zeroEvenMask); // 0:even from m:n
+ Operand right = context.AddIntrinsic(Intrinsic.X86Pshufb, mN, zeroOddMask); // 0:odd from m:n
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst[op.Size], left, right));
+ }
+ else if (op.Size < 3)
+ {
+ Operand oddEvenMask = X86GetElements(context, OddMasks[op.Size], EvenMasks[op.Size]);
+
+ Operand oddEvenN = context.AddIntrinsic(Intrinsic.X86Pshufb, n, oddEvenMask); // odd:even from n
+ Operand oddEvenM = context.AddIntrinsic(Intrinsic.X86Pshufb, m, oddEvenMask); // odd:even from m
+
+ Operand left = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, oddEvenN, oddEvenM);
+ Operand right = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, oddEvenN, oddEvenM);
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst[op.Size], left, right));
+ }
+ else
+ {
+ Operand left = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, n, m);
+ Operand right = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, n, m);
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst[3], left, right));
+ }
+ }
+
+ public static void EmitVectorAcrossVectorOpSx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorAcrossVectorOp(context, emit, signed: true, isLong: false);
+ }
+
+ public static void EmitVectorAcrossVectorOpZx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorAcrossVectorOp(context, emit, signed: false, isLong: false);
+ }
+
+ public static void EmitVectorLongAcrossVectorOpSx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorAcrossVectorOp(context, emit, signed: true, isLong: true);
+ }
+
+ public static void EmitVectorLongAcrossVectorOpZx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorAcrossVectorOp(context, emit, signed: false, isLong: true);
+ }
+
+ private static void EmitVectorAcrossVectorOp(
+ ArmEmitterContext context,
+ Func2I emit,
+ bool signed,
+ bool isLong)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ Operand res = EmitVectorExtract(context, op.Rn, 0, op.Size, signed);
+
+ for (int index = 1; index < elems; index++)
+ {
+ Operand n = EmitVectorExtract(context, op.Rn, index, op.Size, signed);
+
+ res = emit(res, n);
+ }
+
+ int size = isLong ? op.Size + 1 : op.Size;
+
+ Operand d = EmitVectorInsert(context, context.VectorZero(), res, 0, size);
+
+ context.Copy(GetVec(op.Rd), d);
+ }
+
+ public static void EmitVectorAcrossVectorOpF(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Debug.Assert((op.Size & 1) == 0 && op.RegisterSize == RegisterSize.Simd128);
+
+ Operand res = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), 0);
+
+ for (int index = 1; index < 4; index++)
+ {
+ Operand n = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), index);
+
+ res = emit(res, n);
+ }
+
+ Operand d = context.VectorInsert(context.VectorZero(), res, 0);
+
+ context.Copy(GetVec(op.Rd), d);
+ }
+
+ public static void EmitSse2VectorAcrossVectorOpF(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Debug.Assert((op.Size & 1) == 0 && op.RegisterSize == RegisterSize.Simd128);
+
+ const int sm0 = 0 << 6 | 0 << 4 | 0 << 2 | 0 << 0;
+ const int sm1 = 1 << 6 | 1 << 4 | 1 << 2 | 1 << 0;
+ const int sm2 = 2 << 6 | 2 << 4 | 2 << 2 | 2 << 0;
+ const int sm3 = 3 << 6 | 3 << 4 | 3 << 2 | 3 << 0;
+
+ Operand nCopy = context.Copy(GetVec(op.Rn));
+
+ Operand part0 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(sm0));
+ Operand part1 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(sm1));
+ Operand part2 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(sm2));
+ Operand part3 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(sm3));
+
+ Operand res = emit(emit(part0, part1), emit(part2, part3));
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+ }
+
+ public static void EmitScalarPairwiseOpF(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ Operand ne0 = context.VectorExtract(type, GetVec(op.Rn), 0);
+ Operand ne1 = context.VectorExtract(type, GetVec(op.Rn), 1);
+
+ Operand res = context.VectorInsert(context.VectorZero(), emit(ne0, ne1), 0);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitSse2ScalarPairwiseOpF(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Operand op0, op1;
+
+ if ((op.Size & 1) == 0)
+ {
+ const int sm0 = 2 << 6 | 2 << 4 | 2 << 2 | 0 << 0;
+ const int sm1 = 2 << 6 | 2 << 4 | 2 << 2 | 1 << 0;
+
+ Operand zeroN = context.VectorZeroUpper64(n);
+
+ op0 = context.AddIntrinsic(Intrinsic.X86Pshufd, zeroN, Const(sm0));
+ op1 = context.AddIntrinsic(Intrinsic.X86Pshufd, zeroN, Const(sm1));
+ }
+ else /* if ((op.Size & 1) == 1) */
+ {
+ Operand zero = context.VectorZero();
+
+ op0 = context.AddIntrinsic(Intrinsic.X86Movlhps, n, zero);
+ op1 = context.AddIntrinsic(Intrinsic.X86Movhlps, zero, n);
+ }
+
+ context.Copy(GetVec(op.Rd), emit(op0, op1));
+ }
+
+ public static void EmitVectorPairwiseOpF(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int pairs = op.GetPairsCount() >> sizeF + 2;
+
+ for (int index = 0; index < pairs; index++)
+ {
+ int pairIndex = index << 1;
+
+ Operand n0 = context.VectorExtract(type, GetVec(op.Rn), pairIndex);
+ Operand n1 = context.VectorExtract(type, GetVec(op.Rn), pairIndex + 1);
+
+ Operand m0 = context.VectorExtract(type, GetVec(op.Rm), pairIndex);
+ Operand m1 = context.VectorExtract(type, GetVec(op.Rm), pairIndex + 1);
+
+ res = context.VectorInsert(res, emit(n0, n1), index);
+ res = context.VectorInsert(res, emit(m0, m1), pairs + index);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitSse2VectorPairwiseOpF(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand nCopy = context.Copy(GetVec(op.Rn));
+ Operand mCopy = context.Copy(GetVec(op.Rm));
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ Operand unpck = context.AddIntrinsic(Intrinsic.X86Unpcklps, nCopy, mCopy);
+
+ Operand zero = context.VectorZero();
+
+ Operand part0 = context.AddIntrinsic(Intrinsic.X86Movlhps, unpck, zero);
+ Operand part1 = context.AddIntrinsic(Intrinsic.X86Movhlps, zero, unpck);
+
+ context.Copy(GetVec(op.Rd), emit(part0, part1));
+ }
+ else /* if (op.RegisterSize == RegisterSize.Simd128) */
+ {
+ const int sm0 = 2 << 6 | 0 << 4 | 2 << 2 | 0 << 0;
+ const int sm1 = 3 << 6 | 1 << 4 | 3 << 2 | 1 << 0;
+
+ Operand part0 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, mCopy, Const(sm0));
+ Operand part1 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, mCopy, Const(sm1));
+
+ context.Copy(GetVec(op.Rd), emit(part0, part1));
+ }
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand part0 = context.AddIntrinsic(Intrinsic.X86Unpcklpd, nCopy, mCopy);
+ Operand part1 = context.AddIntrinsic(Intrinsic.X86Unpckhpd, nCopy, mCopy);
+
+ context.Copy(GetVec(op.Rd), emit(part0, part1));
+ }
+ }
+
+ public enum CmpCondition
+ {
+ // Legacy Sse.
+ Equal = 0, // Ordered, non-signaling.
+ LessThan = 1, // Ordered, signaling.
+ LessThanOrEqual = 2, // Ordered, signaling.
+ UnorderedQ = 3, // Non-signaling.
+ NotLessThan = 5, // Unordered, signaling.
+ NotLessThanOrEqual = 6, // Unordered, signaling.
+ OrderedQ = 7, // Non-signaling.
+
+ // Vex.
+ GreaterThanOrEqual = 13, // Ordered, signaling.
+ GreaterThan = 14, // Ordered, signaling.
+ OrderedS = 23 // Signaling.
+ }
+
+ [Flags]
+ public enum SaturatingFlags
+ {
+ None = 0,
+
+ ByElem = 1 << 0,
+ Scalar = 1 << 1,
+ Signed = 1 << 2,
+
+ Add = 1 << 3,
+ Sub = 1 << 4,
+
+ Accumulate = 1 << 5
+ }
+
+ public static void EmitScalarSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit)
+ {
+ EmitSaturatingUnaryOpSx(context, emit, SaturatingFlags.Scalar | SaturatingFlags.Signed);
+ }
+
+ public static void EmitVectorSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit)
+ {
+ EmitSaturatingUnaryOpSx(context, emit, SaturatingFlags.Signed);
+ }
+
+ public static void EmitSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit, SaturatingFlags flags)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ bool scalar = (flags & SaturatingFlags.Scalar) != 0;
+
+ int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+ Operand de;
+
+ if (op.Size <= 2)
+ {
+ de = EmitSignedSrcSatQ(context, emit(ne), op.Size, signedDst: true);
+ }
+ else /* if (op.Size == 3) */
+ {
+ de = EmitUnarySignedSatQAbsOrNeg(context, emit(ne));
+ }
+
+ res = EmitVectorInsert(context, res, de, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitScalarSaturatingBinaryOpSx(ArmEmitterContext context, Func2I emit = null, SaturatingFlags flags = SaturatingFlags.None)
+ {
+ EmitSaturatingBinaryOp(context, emit, SaturatingFlags.Scalar | SaturatingFlags.Signed | flags);
+ }
+
+ public static void EmitScalarSaturatingBinaryOpZx(ArmEmitterContext context, SaturatingFlags flags)
+ {
+ EmitSaturatingBinaryOp(context, null, SaturatingFlags.Scalar | flags);
+ }
+
+ public static void EmitVectorSaturatingBinaryOpSx(ArmEmitterContext context, Func2I emit = null, SaturatingFlags flags = SaturatingFlags.None)
+ {
+ EmitSaturatingBinaryOp(context, emit, SaturatingFlags.Signed | flags);
+ }
+
+ public static void EmitVectorSaturatingBinaryOpZx(ArmEmitterContext context, SaturatingFlags flags)
+ {
+ EmitSaturatingBinaryOp(context, null, flags);
+ }
+
+ public static void EmitVectorSaturatingBinaryOpByElemSx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitSaturatingBinaryOp(context, emit, SaturatingFlags.ByElem | SaturatingFlags.Signed);
+ }
+
+ public static void EmitSaturatingBinaryOp(ArmEmitterContext context, Func2I emit, SaturatingFlags flags)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ bool byElem = (flags & SaturatingFlags.ByElem) != 0;
+ bool scalar = (flags & SaturatingFlags.Scalar) != 0;
+ bool signed = (flags & SaturatingFlags.Signed) != 0;
+
+ bool add = (flags & SaturatingFlags.Add) != 0;
+ bool sub = (flags & SaturatingFlags.Sub) != 0;
+
+ bool accumulate = (flags & SaturatingFlags.Accumulate) != 0;
+
+ int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
+
+ if (add || sub)
+ {
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de;
+ Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size, signed);
+ Operand me = EmitVectorExtract(context, ((OpCodeSimdReg)op).Rm, index, op.Size, signed);
+
+ if (op.Size <= 2)
+ {
+ Operand temp = add ? context.Add(ne, me) : context.Subtract(ne, me);
+
+ de = EmitSignedSrcSatQ(context, temp, op.Size, signedDst: signed);
+ }
+ else /* if (op.Size == 3) */
+ {
+ if (add)
+ {
+ de = signed ? EmitBinarySignedSatQAdd(context, ne, me) : EmitBinaryUnsignedSatQAdd(context, ne, me);
+ }
+ else /* if (sub) */
+ {
+ de = signed ? EmitBinarySignedSatQSub(context, ne, me) : EmitBinaryUnsignedSatQSub(context, ne, me);
+ }
+ }
+
+ res = EmitVectorInsert(context, res, de, index, op.Size);
+ }
+ }
+ else if (accumulate)
+ {
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de;
+ Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size, !signed);
+ Operand me = EmitVectorExtract(context, op.Rd, index, op.Size, signed);
+
+ if (op.Size <= 2)
+ {
+ Operand temp = context.Add(ne, me);
+
+ de = EmitSignedSrcSatQ(context, temp, op.Size, signedDst: signed);
+ }
+ else /* if (op.Size == 3) */
+ {
+ de = signed ? EmitBinarySignedSatQAcc(context, ne, me) : EmitBinaryUnsignedSatQAcc(context, ne, me);
+ }
+
+ res = EmitVectorInsert(context, res, de, index, op.Size);
+ }
+ }
+ else
+ {
+ Operand me = default;
+
+ if (byElem)
+ {
+ OpCodeSimdRegElem opRegElem = (OpCodeSimdRegElem)op;
+
+ me = EmitVectorExtract(context, opRegElem.Rm, opRegElem.Index, op.Size, signed);
+ }
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size, signed);
+
+ if (!byElem)
+ {
+ me = EmitVectorExtract(context, ((OpCodeSimdReg)op).Rm, index, op.Size, signed);
+ }
+
+ Operand de = EmitSignedSrcSatQ(context, emit(ne, me), op.Size, signedDst: signed);
+
+ res = EmitVectorInsert(context, res, de, index, op.Size);
+ }
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ [Flags]
+ public enum SaturatingNarrowFlags
+ {
+ Scalar = 1 << 0,
+ SignedSrc = 1 << 1,
+ SignedDst = 1 << 2,
+
+ ScalarSxSx = Scalar | SignedSrc | SignedDst,
+ ScalarSxZx = Scalar | SignedSrc,
+ ScalarZxZx = Scalar,
+
+ VectorSxSx = SignedSrc | SignedDst,
+ VectorSxZx = SignedSrc,
+ VectorZxZx = 0
+ }
+
+ public static void EmitSaturatingNarrowOp(ArmEmitterContext context, SaturatingNarrowFlags flags)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ bool scalar = (flags & SaturatingNarrowFlags.Scalar) != 0;
+ bool signedSrc = (flags & SaturatingNarrowFlags.SignedSrc) != 0;
+ bool signedDst = (flags & SaturatingNarrowFlags.SignedDst) != 0;
+
+ int elems = !scalar ? 8 >> op.Size : 1;
+
+ int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0;
+
+ Operand d = GetVec(op.Rd);
+
+ Operand res = part == 0 ? context.VectorZero() : context.Copy(d);
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signedSrc);
+
+ Operand temp = signedSrc
+ ? EmitSignedSrcSatQ(context, ne, op.Size, signedDst)
+ : EmitUnsignedSrcSatQ(context, ne, op.Size, signedDst);
+
+ res = EmitVectorInsert(context, res, temp, part + index, op.Size);
+ }
+
+ context.Copy(d, res);
+ }
+
+ // long SignedSignSatQ(long op, int size);
+ public static Operand EmitSignedSignSatQ(ArmEmitterContext context, Operand op, int size)
+ {
+ int eSize = 8 << size;
+
+ Debug.Assert(op.Type == OperandType.I64);
+ Debug.Assert(eSize == 8 || eSize == 16 || eSize == 32 || eSize == 64);
+
+ Operand lbl1 = Label();
+ Operand lblEnd = Label();
+
+ Operand zeroL = Const(0L);
+ Operand maxT = Const((1L << (eSize - 1)) - 1L);
+ Operand minT = Const(-(1L << (eSize - 1)));
+
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), zeroL);
+
+ context.BranchIf(lbl1, op, zeroL, Comparison.LessOrEqual);
+ context.Copy(res, maxT);
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lbl1);
+ context.BranchIf(lblEnd, op, zeroL, Comparison.GreaterOrEqual);
+ context.Copy(res, minT);
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+
+ // private static ulong UnsignedSignSatQ(ulong op, int size);
+ public static Operand EmitUnsignedSignSatQ(ArmEmitterContext context, Operand op, int size)
+ {
+ int eSize = 8 << size;
+
+ Debug.Assert(op.Type == OperandType.I64);
+ Debug.Assert(eSize == 8 || eSize == 16 || eSize == 32 || eSize == 64);
+
+ Operand lblEnd = Label();
+
+ Operand zeroUL = Const(0UL);
+ Operand maxT = Const(ulong.MaxValue >> (64 - eSize));
+
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), zeroUL);
+
+ context.BranchIf(lblEnd, op, zeroUL, Comparison.LessOrEqualUI);
+ context.Copy(res, maxT);
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+
+ // TSrc (16bit, 32bit, 64bit; signed) > TDst (8bit, 16bit, 32bit; signed, unsigned).
+ // long SignedSrcSignedDstSatQ(long op, int size); ulong SignedSrcUnsignedDstSatQ(long op, int size);
+ public static Operand EmitSignedSrcSatQ(ArmEmitterContext context, Operand op, int sizeDst, bool signedDst)
+ {
+ int eSizeDst = 8 << sizeDst;
+
+ Debug.Assert(op.Type == OperandType.I64);
+ Debug.Assert(eSizeDst == 8 || eSizeDst == 16 || eSizeDst == 32);
+
+ Operand lbl1 = Label();
+ Operand lblEnd = Label();
+
+ Operand maxT = signedDst ? Const((1L << (eSizeDst - 1)) - 1L) : Const((1UL << eSizeDst) - 1UL);
+ Operand minT = signedDst ? Const(-(1L << (eSizeDst - 1))) : Const(0UL);
+
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), op);
+
+ context.BranchIf(lbl1, op, maxT, Comparison.LessOrEqual);
+ context.Copy(res, maxT);
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lbl1);
+ context.BranchIf(lblEnd, op, minT, Comparison.GreaterOrEqual);
+ context.Copy(res, minT);
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+
+ // TSrc (16bit, 32bit, 64bit; unsigned) > TDst (8bit, 16bit, 32bit; signed, unsigned).
+ // long UnsignedSrcSignedDstSatQ(ulong op, int size); ulong UnsignedSrcUnsignedDstSatQ(ulong op, int size);
+ public static Operand EmitUnsignedSrcSatQ(ArmEmitterContext context, Operand op, int sizeDst, bool signedDst)
+ {
+ int eSizeDst = 8 << sizeDst;
+
+ Debug.Assert(op.Type == OperandType.I64);
+ Debug.Assert(eSizeDst == 8 || eSizeDst == 16 || eSizeDst == 32);
+
+ Operand lblEnd = Label();
+
+ Operand maxT = signedDst ? Const((1L << (eSizeDst - 1)) - 1L) : Const((1UL << eSizeDst) - 1UL);
+
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), op);
+
+ context.BranchIf(lblEnd, op, maxT, Comparison.LessOrEqualUI);
+ context.Copy(res, maxT);
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+
+ // long UnarySignedSatQAbsOrNeg(long op);
+ private static Operand EmitUnarySignedSatQAbsOrNeg(ArmEmitterContext context, Operand op)
+ {
+ Debug.Assert(op.Type == OperandType.I64);
+
+ Operand lblEnd = Label();
+
+ Operand minL = Const(long.MinValue);
+ Operand maxL = Const(long.MaxValue);
+
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), op);
+
+ context.BranchIf(lblEnd, op, minL, Comparison.NotEqual);
+ context.Copy(res, maxL);
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+
+ // long BinarySignedSatQAdd(long op1, long op2);
+ public static Operand EmitBinarySignedSatQAdd(ArmEmitterContext context, Operand op1, Operand op2)
+ {
+ Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
+
+ Operand lblEnd = Label();
+
+ Operand minL = Const(long.MinValue);
+ Operand maxL = Const(long.MaxValue);
+ Operand zeroL = Const(0L);
+
+ Operand add = context.Add(op1, op2);
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), add);
+
+ Operand left = context.BitwiseNot(context.BitwiseExclusiveOr(op1, op2));
+ Operand right = context.BitwiseExclusiveOr(op1, add);
+ context.BranchIf(lblEnd, context.BitwiseAnd(left, right), zeroL, Comparison.GreaterOrEqual);
+
+ Operand isPositive = context.ICompareGreaterOrEqual(op1, zeroL);
+ context.Copy(res, context.ConditionalSelect(isPositive, maxL, minL));
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+
+ // ulong BinaryUnsignedSatQAdd(ulong op1, ulong op2);
+ public static Operand EmitBinaryUnsignedSatQAdd(ArmEmitterContext context, Operand op1, Operand op2)
+ {
+ Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
+
+ Operand lblEnd = Label();
+
+ Operand maxUL = Const(ulong.MaxValue);
+
+ Operand add = context.Add(op1, op2);
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), add);
+
+ context.BranchIf(lblEnd, add, op1, Comparison.GreaterOrEqualUI);
+ context.Copy(res, maxUL);
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+
+ // long BinarySignedSatQSub(long op1, long op2);
+ public static Operand EmitBinarySignedSatQSub(ArmEmitterContext context, Operand op1, Operand op2)
+ {
+ Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
+
+ Operand lblEnd = Label();
+
+ Operand minL = Const(long.MinValue);
+ Operand maxL = Const(long.MaxValue);
+ Operand zeroL = Const(0L);
+
+ Operand sub = context.Subtract(op1, op2);
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), sub);
+
+ Operand left = context.BitwiseExclusiveOr(op1, op2);
+ Operand right = context.BitwiseExclusiveOr(op1, sub);
+ context.BranchIf(lblEnd, context.BitwiseAnd(left, right), zeroL, Comparison.GreaterOrEqual);
+
+ Operand isPositive = context.ICompareGreaterOrEqual(op1, zeroL);
+ context.Copy(res, context.ConditionalSelect(isPositive, maxL, minL));
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+
+ // ulong BinaryUnsignedSatQSub(ulong op1, ulong op2);
+ public static Operand EmitBinaryUnsignedSatQSub(ArmEmitterContext context, Operand op1, Operand op2)
+ {
+ Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
+
+ Operand lblEnd = Label();
+
+ Operand zeroL = Const(0L);
+
+ Operand sub = context.Subtract(op1, op2);
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), sub);
+
+ context.BranchIf(lblEnd, op1, op2, Comparison.GreaterOrEqualUI);
+ context.Copy(res, zeroL);
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+
+ // long BinarySignedSatQAcc(ulong op1, long op2);
+ private static Operand EmitBinarySignedSatQAcc(ArmEmitterContext context, Operand op1, Operand op2)
+ {
+ Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
+
+ Operand lbl1 = Label();
+ Operand lbl2 = Label();
+ Operand lblEnd = Label();
+
+ Operand maxL = Const(long.MaxValue);
+ Operand zeroL = Const(0L);
+
+ Operand add = context.Add(op1, op2);
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), add);
+
+ context.BranchIf(lbl1, op1, maxL, Comparison.GreaterUI);
+ Operand notOp2AndRes = context.BitwiseAnd(context.BitwiseNot(op2), add);
+ context.BranchIf(lblEnd, notOp2AndRes, zeroL, Comparison.GreaterOrEqual);
+ context.Copy(res, maxL);
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lbl1);
+ context.BranchIf(lbl2, op2, zeroL, Comparison.Less);
+ context.Copy(res, maxL);
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lbl2);
+ context.BranchIf(lblEnd, add, maxL, Comparison.LessOrEqualUI);
+ context.Copy(res, maxL);
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+
+ // ulong BinaryUnsignedSatQAcc(long op1, ulong op2);
+ private static Operand EmitBinaryUnsignedSatQAcc(ArmEmitterContext context, Operand op1, Operand op2)
+ {
+ Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
+
+ Operand lbl1 = Label();
+ Operand lblEnd = Label();
+
+ Operand maxUL = Const(ulong.MaxValue);
+ Operand maxL = Const(long.MaxValue);
+ Operand zeroL = Const(0L);
+
+ Operand add = context.Add(op1, op2);
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), add);
+
+ context.BranchIf(lbl1, op1, zeroL, Comparison.Less);
+ context.BranchIf(lblEnd, add, op1, Comparison.GreaterOrEqualUI);
+ context.Copy(res, maxUL);
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lbl1);
+ context.BranchIf(lblEnd, op2, maxL, Comparison.GreaterUI);
+ context.BranchIf(lblEnd, add, zeroL, Comparison.GreaterOrEqual);
+ context.Copy(res, zeroL);
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+
+ public static Operand EmitFloatAbs(ArmEmitterContext context, Operand value, bool single, bool vector)
+ {
+ Operand mask;
+ if (single)
+ {
+ mask = vector ? X86GetAllElements(context, -0f) : X86GetScalar(context, -0f);
+ }
+ else
+ {
+ mask = vector ? X86GetAllElements(context, -0d) : X86GetScalar(context, -0d);
+ }
+
+ return context.AddIntrinsic(single ? Intrinsic.X86Andnps : Intrinsic.X86Andnpd, mask, value);
+ }
+
+ public static Operand EmitVectorExtractSx(ArmEmitterContext context, int reg, int index, int size)
+ {
+ return EmitVectorExtract(context, reg, index, size, true);
+ }
+
+ public static Operand EmitVectorExtractZx(ArmEmitterContext context, int reg, int index, int size)
+ {
+ return EmitVectorExtract(context, reg, index, size, false);
+ }
+
+ public static Operand EmitVectorExtract(ArmEmitterContext context, int reg, int index, int size, bool signed)
+ {
+ ThrowIfInvalid(index, size);
+
+ Operand res = default;
+
+ switch (size)
+ {
+ case 0:
+ res = context.VectorExtract8(GetVec(reg), index);
+ break;
+
+ case 1:
+ res = context.VectorExtract16(GetVec(reg), index);
+ break;
+
+ case 2:
+ res = context.VectorExtract(OperandType.I32, GetVec(reg), index);
+ break;
+
+ case 3:
+ res = context.VectorExtract(OperandType.I64, GetVec(reg), index);
+ break;
+ }
+
+ if (signed)
+ {
+ switch (size)
+ {
+ case 0: res = context.SignExtend8 (OperandType.I64, res); break;
+ case 1: res = context.SignExtend16(OperandType.I64, res); break;
+ case 2: res = context.SignExtend32(OperandType.I64, res); break;
+ }
+ }
+ else
+ {
+ switch (size)
+ {
+ case 0: res = context.ZeroExtend8 (OperandType.I64, res); break;
+ case 1: res = context.ZeroExtend16(OperandType.I64, res); break;
+ case 2: res = context.ZeroExtend32(OperandType.I64, res); break;
+ }
+ }
+
+ return res;
+ }
+
+ public static Operand EmitVectorInsert(ArmEmitterContext context, Operand vector, Operand value, int index, int size)
+ {
+ ThrowIfInvalid(index, size);
+
+ if (size < 3 && value.Type == OperandType.I64)
+ {
+ value = context.ConvertI64ToI32(value);
+ }
+
+ switch (size)
+ {
+ case 0: vector = context.VectorInsert8 (vector, value, index); break;
+ case 1: vector = context.VectorInsert16(vector, value, index); break;
+ case 2: vector = context.VectorInsert (vector, value, index); break;
+ case 3: vector = context.VectorInsert (vector, value, index); break;
+ }
+
+ return vector;
+ }
+
+ public static void ThrowIfInvalid(int index, int size)
+ {
+ if ((uint)size > 3u)
+ {
+ throw new ArgumentOutOfRangeException(nameof(size));
+ }
+
+ if ((uint)index >= 16u >> size)
+ {
+ throw new ArgumentOutOfRangeException(nameof(index));
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdHelper32.cs b/src/ARMeilleure/Instructions/InstEmitSimdHelper32.cs
new file mode 100644
index 00000000..36d27d42
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdHelper32.cs
@@ -0,0 +1,1286 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+using System.Reflection;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ using Func1I = Func<Operand, Operand>;
+ using Func2I = Func<Operand, Operand, Operand>;
+ using Func3I = Func<Operand, Operand, Operand, Operand>;
+
+ static class InstEmitSimdHelper32
+ {
+ public static (int, int) GetQuadwordAndSubindex(int index, RegisterSize size)
+ {
+ switch (size)
+ {
+ case RegisterSize.Simd128:
+ return (index >> 1, 0);
+ case RegisterSize.Simd64:
+ case RegisterSize.Int64:
+ return (index >> 1, index & 1);
+ case RegisterSize.Int32:
+ return (index >> 2, index & 3);
+ }
+
+ throw new ArgumentException("Unrecognized Vector Register Size.");
+ }
+
+ public static Operand ExtractScalar(ArmEmitterContext context, OperandType type, int reg)
+ {
+ Debug.Assert(type != OperandType.V128);
+
+ if (type == OperandType.FP64 || type == OperandType.I64)
+ {
+ // From dreg.
+ return context.VectorExtract(type, GetVecA32(reg >> 1), reg & 1);
+ }
+ else
+ {
+ // From sreg.
+ return context.VectorExtract(type, GetVecA32(reg >> 2), reg & 3);
+ }
+ }
+
+ public static void InsertScalar(ArmEmitterContext context, int reg, Operand value)
+ {
+ Debug.Assert(value.Type != OperandType.V128);
+
+ Operand vec, insert;
+ if (value.Type == OperandType.FP64 || value.Type == OperandType.I64)
+ {
+ // From dreg.
+ vec = GetVecA32(reg >> 1);
+ insert = context.VectorInsert(vec, value, reg & 1);
+ }
+ else
+ {
+ // From sreg.
+ vec = GetVecA32(reg >> 2);
+ insert = context.VectorInsert(vec, value, reg & 3);
+ }
+
+ context.Copy(vec, insert);
+ }
+
+ public static Operand ExtractScalar16(ArmEmitterContext context, int reg, bool top)
+ {
+ return context.VectorExtract16(GetVecA32(reg >> 2), ((reg & 3) << 1) | (top ? 1 : 0));
+ }
+
+ public static void InsertScalar16(ArmEmitterContext context, int reg, bool top, Operand value)
+ {
+ Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.I32);
+
+ Operand vec, insert;
+ vec = GetVecA32(reg >> 2);
+ insert = context.VectorInsert16(vec, value, ((reg & 3) << 1) | (top ? 1 : 0));
+
+ context.Copy(vec, insert);
+ }
+
+ public static Operand ExtractElement(ArmEmitterContext context, int reg, int size, bool signed)
+ {
+ return EmitVectorExtract32(context, reg >> (4 - size), reg & ((16 >> size) - 1), size, signed);
+ }
+
+ public static void EmitVectorImmUnaryOp32(ArmEmitterContext context, Func1I emit)
+ {
+ IOpCode32SimdImm op = (IOpCode32SimdImm)context.CurrOp;
+
+ Operand imm = Const(op.Immediate);
+
+ int elems = op.Elems;
+ (int index, int subIndex) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
+
+ Operand vec = GetVecA32(index);
+ Operand res = vec;
+
+ for (int item = 0; item < elems; item++)
+ {
+ res = EmitVectorInsert(context, res, emit(imm), item + subIndex * elems, op.Size);
+ }
+
+ context.Copy(vec, res);
+ }
+
+ public static void EmitScalarUnaryOpF32(ArmEmitterContext context, Func1I emit)
+ {
+ OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
+
+ OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ Operand m = ExtractScalar(context, type, op.Vm);
+
+ InsertScalar(context, op.Vd, emit(m));
+ }
+
+ public static void EmitScalarBinaryOpF32(ArmEmitterContext context, Func2I emit)
+ {
+ OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+ OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ Operand n = ExtractScalar(context, type, op.Vn);
+ Operand m = ExtractScalar(context, type, op.Vm);
+
+ InsertScalar(context, op.Vd, emit(n, m));
+ }
+
+ public static void EmitScalarBinaryOpI32(ArmEmitterContext context, Func2I emit)
+ {
+ OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+ OperandType type = (op.Size & 1) != 0 ? OperandType.I64 : OperandType.I32;
+
+ if (op.Size < 2)
+ {
+ throw new NotSupportedException("Cannot perform a scalar SIMD operation on integers smaller than 32 bits.");
+ }
+
+ Operand n = ExtractScalar(context, type, op.Vn);
+ Operand m = ExtractScalar(context, type, op.Vm);
+
+ InsertScalar(context, op.Vd, emit(n, m));
+ }
+
+ public static void EmitScalarTernaryOpF32(ArmEmitterContext context, Func3I emit)
+ {
+ OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+ OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ Operand a = ExtractScalar(context, type, op.Vd);
+ Operand n = ExtractScalar(context, type, op.Vn);
+ Operand m = ExtractScalar(context, type, op.Vm);
+
+ InsertScalar(context, op.Vd, emit(a, n, m));
+ }
+
+ public static void EmitVectorUnaryOpF32(ArmEmitterContext context, Func1I emit)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = op.GetBytesCount() >> sizeF + 2;
+
+ Operand res = GetVecA32(op.Qd);
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand me = context.VectorExtract(type, GetVecA32(op.Qm), op.Fm + index);
+
+ res = context.VectorInsert(res, emit(me), op.Fd + index);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorBinaryOpF32(ArmEmitterContext context, Func2I emit)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = op.GetBytesCount() >> (sizeF + 2);
+
+ Operand res = GetVecA32(op.Qd);
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = context.VectorExtract(type, GetVecA32(op.Qn), op.Fn + index);
+ Operand me = context.VectorExtract(type, GetVecA32(op.Qm), op.Fm + index);
+
+ res = context.VectorInsert(res, emit(ne, me), op.Fd + index);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorTernaryOpF32(ArmEmitterContext context, Func3I emit)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = op.GetBytesCount() >> sizeF + 2;
+
+ Operand res = GetVecA32(op.Qd);
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = context.VectorExtract(type, GetVecA32(op.Qd), op.Fd + index);
+ Operand ne = context.VectorExtract(type, GetVecA32(op.Qn), op.Fn + index);
+ Operand me = context.VectorExtract(type, GetVecA32(op.Qm), op.Fm + index);
+
+ res = context.VectorInsert(res, emit(de, ne, me), op.Fd + index);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ // Integer
+
+ public static void EmitVectorUnaryAccumulateOpI32(ArmEmitterContext context, Func1I emit, bool signed)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ Operand res = GetVecA32(op.Qd);
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = EmitVectorExtract32(context, op.Qd, op.Id + index, op.Size, signed);
+ Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, context.Add(de, emit(me)), op.Id + index, op.Size);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorUnaryOpI32(ArmEmitterContext context, Func1I emit, bool signed)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ Operand res = GetVecA32(op.Qd);
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(me), op.Id + index, op.Size);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorBinaryOpI32(ArmEmitterContext context, Func2I emit, bool signed)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Operand res = GetVecA32(op.Qd);
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed);
+ Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(ne, me), op.Id + index, op.Size);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorBinaryLongOpI32(ArmEmitterContext context, Func2I emit, bool signed)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed);
+ Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed);
+
+ if (op.Size == 2)
+ {
+ ne = signed ? context.SignExtend32(OperandType.I64, ne) : context.ZeroExtend32(OperandType.I64, ne);
+ me = signed ? context.SignExtend32(OperandType.I64, me) : context.ZeroExtend32(OperandType.I64, me);
+ }
+
+ res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorBinaryWideOpI32(ArmEmitterContext context, Func2I emit, bool signed)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size + 1, signed);
+ Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed);
+
+ if (op.Size == 2)
+ {
+ me = signed ? context.SignExtend32(OperandType.I64, me) : context.ZeroExtend32(OperandType.I64, me);
+ }
+
+ res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorImmBinaryQdQmOpZx32(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorImmBinaryQdQmOpI32(context, emit, false);
+ }
+
+ public static void EmitVectorImmBinaryQdQmOpSx32(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorImmBinaryQdQmOpI32(context, emit, true);
+ }
+
+ public static void EmitVectorImmBinaryQdQmOpI32(ArmEmitterContext context, Func2I emit, bool signed)
+ {
+ OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
+
+ Operand res = GetVecA32(op.Qd);
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = EmitVectorExtract32(context, op.Qd, op.Id + index, op.Size, signed);
+ Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(de, me), op.Id + index, op.Size);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorTernaryLongOpI32(ArmEmitterContext context, Func3I emit, bool signed)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = EmitVectorExtract32(context, op.Qd, op.Id + index, op.Size + 1, signed);
+ Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed);
+ Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed);
+
+ if (op.Size == 2)
+ {
+ ne = signed ? context.SignExtend32(OperandType.I64, ne) : context.ZeroExtend32(OperandType.I64, ne);
+ me = signed ? context.SignExtend32(OperandType.I64, me) : context.ZeroExtend32(OperandType.I64, me);
+ }
+
+ res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size + 1);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorTernaryOpI32(ArmEmitterContext context, Func3I emit, bool signed)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Operand res = GetVecA32(op.Qd);
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = EmitVectorExtract32(context, op.Qd, op.Id + index, op.Size, signed);
+ Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed);
+ Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(de, ne, me), op.Id + index, op.Size);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorUnaryOpSx32(ArmEmitterContext context, Func1I emit)
+ {
+ EmitVectorUnaryOpI32(context, emit, true);
+ }
+
+ public static void EmitVectorUnaryOpSx32(ArmEmitterContext context, Func1I emit, bool accumulate)
+ {
+ if (accumulate)
+ {
+ EmitVectorUnaryAccumulateOpI32(context, emit, true);
+ }
+ else
+ {
+ EmitVectorUnaryOpI32(context, emit, true);
+ }
+ }
+
+ public static void EmitVectorBinaryOpSx32(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorBinaryOpI32(context, emit, true);
+ }
+
+ public static void EmitVectorTernaryOpSx32(ArmEmitterContext context, Func3I emit)
+ {
+ EmitVectorTernaryOpI32(context, emit, true);
+ }
+
+ public static void EmitVectorUnaryOpZx32(ArmEmitterContext context, Func1I emit)
+ {
+ EmitVectorUnaryOpI32(context, emit, false);
+ }
+
+ public static void EmitVectorUnaryOpZx32(ArmEmitterContext context, Func1I emit, bool accumulate)
+ {
+ if (accumulate)
+ {
+ EmitVectorUnaryAccumulateOpI32(context, emit, false);
+ }
+ else
+ {
+ EmitVectorUnaryOpI32(context, emit, false);
+ }
+ }
+
+ public static void EmitVectorBinaryOpZx32(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorBinaryOpI32(context, emit, false);
+ }
+
+ public static void EmitVectorTernaryOpZx32(ArmEmitterContext context, Func3I emit)
+ {
+ EmitVectorTernaryOpI32(context, emit, false);
+ }
+
+ // Vector by scalar
+
+ public static void EmitVectorByScalarOpF32(ArmEmitterContext context, Func2I emit)
+ {
+ OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = op.GetBytesCount() >> sizeF + 2;
+
+ Operand m = ExtractScalar(context, type, op.Vm);
+
+ Operand res = GetVecA32(op.Qd);
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = context.VectorExtract(type, GetVecA32(op.Qn), op.Fn + index);
+
+ res = context.VectorInsert(res, emit(ne, m), op.Fd + index);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorByScalarOpI32(ArmEmitterContext context, Func2I emit, bool signed)
+ {
+ OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
+
+ Operand m = ExtractElement(context, op.Vm, op.Size, signed);
+
+ Operand res = GetVecA32(op.Qd);
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(ne, m), op.Id + index, op.Size);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorByScalarLongOpI32(ArmEmitterContext context, Func2I emit, bool signed)
+ {
+ OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
+
+ Operand m = ExtractElement(context, op.Vm, op.Size, signed);
+
+ if (op.Size == 2)
+ {
+ m = signed ? context.SignExtend32(OperandType.I64, m) : context.ZeroExtend32(OperandType.I64, m);
+ }
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed);
+
+ if (op.Size == 2)
+ {
+ ne = signed ? context.SignExtend32(OperandType.I64, ne) : context.ZeroExtend32(OperandType.I64, ne);
+ }
+
+ res = EmitVectorInsert(context, res, emit(ne, m), index, op.Size + 1);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorsByScalarOpF32(ArmEmitterContext context, Func3I emit)
+ {
+ OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = op.GetBytesCount() >> sizeF + 2;
+
+ Operand m = ExtractScalar(context, type, op.Vm);
+
+ Operand res = GetVecA32(op.Qd);
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = context.VectorExtract(type, GetVecA32(op.Qd), op.Fd + index);
+ Operand ne = context.VectorExtract(type, GetVecA32(op.Qn), op.Fn + index);
+
+ res = context.VectorInsert(res, emit(de, ne, m), op.Fd + index);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorsByScalarOpI32(ArmEmitterContext context, Func3I emit, bool signed)
+ {
+ OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
+
+ Operand m = EmitVectorExtract32(context, op.Vm >> (4 - op.Size), op.Vm & ((1 << (4 - op.Size)) - 1), op.Size, signed);
+
+ Operand res = GetVecA32(op.Qd);
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = EmitVectorExtract32(context, op.Qd, op.Id + index, op.Size, signed);
+ Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(de, ne, m), op.Id + index, op.Size);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ // Pairwise
+
+ public static void EmitVectorPairwiseOpF32(ArmEmitterContext context, Func2I emit)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = op.GetBytesCount() >> (sizeF + 2);
+ int pairs = elems >> 1;
+
+ Operand res = GetVecA32(op.Qd);
+ Operand mvec = GetVecA32(op.Qm);
+ Operand nvec = GetVecA32(op.Qn);
+
+ for (int index = 0; index < pairs; index++)
+ {
+ int pairIndex = index << 1;
+
+ Operand n1 = context.VectorExtract(type, nvec, op.Fn + pairIndex);
+ Operand n2 = context.VectorExtract(type, nvec, op.Fn + pairIndex + 1);
+
+ res = context.VectorInsert(res, emit(n1, n2), op.Fd + index);
+
+ Operand m1 = context.VectorExtract(type, mvec, op.Fm + pairIndex);
+ Operand m2 = context.VectorExtract(type, mvec, op.Fm + pairIndex + 1);
+
+ res = context.VectorInsert(res, emit(m1, m2), op.Fd + index + pairs);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorPairwiseOpI32(ArmEmitterContext context, Func2I emit, bool signed)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ int elems = op.GetBytesCount() >> op.Size;
+ int pairs = elems >> 1;
+
+ Operand res = GetVecA32(op.Qd);
+
+ for (int index = 0; index < pairs; index++)
+ {
+ int pairIndex = index << 1;
+ Operand n1 = EmitVectorExtract32(context, op.Qn, op.In + pairIndex, op.Size, signed);
+ Operand n2 = EmitVectorExtract32(context, op.Qn, op.In + pairIndex + 1, op.Size, signed);
+
+ Operand m1 = EmitVectorExtract32(context, op.Qm, op.Im + pairIndex, op.Size, signed);
+ Operand m2 = EmitVectorExtract32(context, op.Qm, op.Im + pairIndex + 1, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(n1, n2), op.Id + index, op.Size);
+ res = EmitVectorInsert(context, res, emit(m1, m2), op.Id + index + pairs, op.Size);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorPairwiseLongOpI32(ArmEmitterContext context, Func2I emit, bool signed)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ int elems = (op.Q ? 16 : 8) >> op.Size;
+ int pairs = elems >> 1;
+ int id = (op.Vd & 1) * pairs;
+
+ Operand res = GetVecA32(op.Qd);
+
+ for (int index = 0; index < pairs; index++)
+ {
+ int pairIndex = index << 1;
+ Operand m1 = EmitVectorExtract32(context, op.Qm, op.Im + pairIndex, op.Size, signed);
+ Operand m2 = EmitVectorExtract32(context, op.Qm, op.Im + pairIndex + 1, op.Size, signed);
+
+ if (op.Size == 2)
+ {
+ m1 = signed ? context.SignExtend32(OperandType.I64, m1) : context.ZeroExtend32(OperandType.I64, m1);
+ m2 = signed ? context.SignExtend32(OperandType.I64, m2) : context.ZeroExtend32(OperandType.I64, m2);
+ }
+
+ res = EmitVectorInsert(context, res, emit(m1, m2), id + index, op.Size + 1);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ // Narrow
+
+ public static void EmitVectorUnaryNarrowOp32(ArmEmitterContext context, Func1I emit, bool signed = false)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ int elems = 8 >> op.Size; // Size contains the target element size. (for when it becomes a doubleword)
+
+ Operand res = GetVecA32(op.Qd);
+ int id = (op.Vd & 1) << (3 - op.Size); // Target doubleword base.
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand m = EmitVectorExtract32(context, op.Qm, index, op.Size + 1, signed);
+
+ res = EmitVectorInsert(context, res, emit(m), id + index, op.Size);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ // Intrinsic Helpers
+
+ public static Operand EmitMoveDoubleWordToSide(ArmEmitterContext context, Operand input, int originalV, int targetV)
+ {
+ Debug.Assert(input.Type == OperandType.V128);
+
+ int originalSide = originalV & 1;
+ int targetSide = targetV & 1;
+
+ if (originalSide == targetSide)
+ {
+ return input;
+ }
+
+ if (targetSide == 1)
+ {
+ return context.AddIntrinsic(Intrinsic.X86Movlhps, input, input); // Low to high.
+ }
+ else
+ {
+ return context.AddIntrinsic(Intrinsic.X86Movhlps, input, input); // High to low.
+ }
+ }
+
+ public static Operand EmitDoubleWordInsert(ArmEmitterContext context, Operand target, Operand value, int targetV)
+ {
+ Debug.Assert(target.Type == OperandType.V128 && value.Type == OperandType.V128);
+
+ int targetSide = targetV & 1;
+ int shuffleMask = 2;
+
+ if (targetSide == 1)
+ {
+ return context.AddIntrinsic(Intrinsic.X86Shufpd, target, value, Const(shuffleMask));
+ }
+ else
+ {
+ return context.AddIntrinsic(Intrinsic.X86Shufpd, value, target, Const(shuffleMask));
+ }
+ }
+
+ public static Operand EmitScalarInsert(ArmEmitterContext context, Operand target, Operand value, int reg, bool doubleWidth)
+ {
+ Debug.Assert(target.Type == OperandType.V128 && value.Type == OperandType.V128);
+
+ // Insert from index 0 in value to index in target.
+ int index = reg & (doubleWidth ? 1 : 3);
+
+ if (doubleWidth)
+ {
+ if (index == 1)
+ {
+ return context.AddIntrinsic(Intrinsic.X86Movlhps, target, value); // Low to high.
+ }
+ else
+ {
+ return context.AddIntrinsic(Intrinsic.X86Shufpd, value, target, Const(2)); // Low to low, keep high from original.
+ }
+ }
+ else
+ {
+ if (Optimizations.UseSse41)
+ {
+ return context.AddIntrinsic(Intrinsic.X86Insertps, target, value, Const(index << 4));
+ }
+ else
+ {
+ target = EmitSwapScalar(context, target, index, doubleWidth); // Swap value to replace into element 0.
+ target = context.AddIntrinsic(Intrinsic.X86Movss, target, value); // Move the value into element 0 of the vector.
+ return EmitSwapScalar(context, target, index, doubleWidth); // Swap new value back to the correct index.
+ }
+ }
+ }
+
+ public static Operand EmitSwapScalar(ArmEmitterContext context, Operand target, int reg, bool doubleWidth)
+ {
+ // Index into 0, 0 into index. This swap happens at the start of an A32 scalar op if required.
+ int index = reg & (doubleWidth ? 1 : 3);
+ if (index == 0) return target;
+
+ if (doubleWidth)
+ {
+ int shuffleMask = 1; // Swap top and bottom. (b0 = 1, b1 = 0)
+ return context.AddIntrinsic(Intrinsic.X86Shufpd, target, target, Const(shuffleMask));
+ }
+ else
+ {
+ int shuffleMask = (3 << 6) | (2 << 4) | (1 << 2) | index; // Swap index and 0. (others remain)
+ shuffleMask &= ~(3 << (index * 2));
+
+ return context.AddIntrinsic(Intrinsic.X86Shufps, target, target, Const(shuffleMask));
+ }
+ }
+
+ // Vector Operand Templates
+
+ public static void EmitVectorUnaryOpSimd32(ArmEmitterContext context, Func1I vectorFunc)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ Operand m = GetVecA32(op.Qm);
+ Operand d = GetVecA32(op.Qd);
+
+ if (!op.Q) // Register swap: move relevant doubleword to destination side.
+ {
+ m = EmitMoveDoubleWordToSide(context, m, op.Vm, op.Vd);
+ }
+
+ Operand res = vectorFunc(m);
+
+ if (!op.Q) // Register insert.
+ {
+ res = EmitDoubleWordInsert(context, d, res, op.Vd);
+ }
+
+ context.Copy(d, res);
+ }
+
+ public static void EmitVectorUnaryOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
+
+ EmitVectorUnaryOpSimd32(context, (m) => context.AddIntrinsic(inst, m));
+ }
+
+ public static void EmitVectorBinaryOpSimd32(ArmEmitterContext context, Func2I vectorFunc, int side = -1)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Operand n = GetVecA32(op.Qn);
+ Operand m = GetVecA32(op.Qm);
+ Operand d = GetVecA32(op.Qd);
+
+ if (side == -1)
+ {
+ side = op.Vd;
+ }
+
+ if (!op.Q) // Register swap: move relevant doubleword to destination side.
+ {
+ n = EmitMoveDoubleWordToSide(context, n, op.Vn, side);
+ m = EmitMoveDoubleWordToSide(context, m, op.Vm, side);
+ }
+
+ Operand res = vectorFunc(n, m);
+
+ if (!op.Q) // Register insert.
+ {
+ if (side != op.Vd)
+ {
+ res = EmitMoveDoubleWordToSide(context, res, side, op.Vd);
+ }
+ res = EmitDoubleWordInsert(context, d, res, op.Vd);
+ }
+
+ context.Copy(d, res);
+ }
+
+ public static void EmitVectorBinaryOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
+ EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(inst, n, m));
+ }
+
+ public static void EmitVectorTernaryOpSimd32(ArmEmitterContext context, Func3I vectorFunc)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Operand n = GetVecA32(op.Qn);
+ Operand m = GetVecA32(op.Qm);
+ Operand d = GetVecA32(op.Qd);
+ Operand initialD = d;
+
+ if (!op.Q) // Register swap: move relevant doubleword to destination side.
+ {
+ n = EmitMoveDoubleWordToSide(context, n, op.Vn, op.Vd);
+ m = EmitMoveDoubleWordToSide(context, m, op.Vm, op.Vd);
+ }
+
+ Operand res = vectorFunc(d, n, m);
+
+ if (!op.Q) // Register insert.
+ {
+ res = EmitDoubleWordInsert(context, initialD, res, op.Vd);
+ }
+
+ context.Copy(initialD, res);
+ }
+
+ public static void EmitVectorTernaryOpF32(ArmEmitterContext context, Intrinsic inst32pt1, Intrinsic inst64pt1, Intrinsic inst32pt2, Intrinsic inst64pt2)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Intrinsic inst1 = (op.Size & 1) != 0 ? inst64pt1 : inst32pt1;
+ Intrinsic inst2 = (op.Size & 1) != 0 ? inst64pt2 : inst32pt2;
+
+ EmitVectorTernaryOpSimd32(context, (d, n, m) =>
+ {
+ Operand res = context.AddIntrinsic(inst1, n, m);
+ return res = context.AddIntrinsic(inst2, d, res);
+ });
+ }
+
+ public static void EmitVectorTernaryOpF32(ArmEmitterContext context, Intrinsic inst32)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Debug.Assert((op.Size & 1) == 0);
+
+ EmitVectorTernaryOpSimd32(context, (d, n, m) =>
+ {
+ return context.AddIntrinsic(inst32, d, n, m);
+ });
+ }
+
+ public static void EmitScalarUnaryOpSimd32(ArmEmitterContext context, Func1I scalarFunc)
+ {
+ OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
+
+ bool doubleSize = (op.Size & 1) != 0;
+ int shift = doubleSize ? 1 : 2;
+ Operand m = GetVecA32(op.Vm >> shift);
+ Operand d = GetVecA32(op.Vd >> shift);
+
+ m = EmitSwapScalar(context, m, op.Vm, doubleSize);
+
+ Operand res = scalarFunc(m);
+
+ // Insert scalar into vector.
+ res = EmitScalarInsert(context, d, res, op.Vd, doubleSize);
+
+ context.Copy(d, res);
+ }
+
+ public static void EmitScalarUnaryOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+ {
+ OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
+
+ Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
+
+ EmitScalarUnaryOpSimd32(context, (m) => (inst == 0) ? m : context.AddIntrinsic(inst, m));
+ }
+
+ public static void EmitScalarBinaryOpSimd32(ArmEmitterContext context, Func2I scalarFunc)
+ {
+ OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+ bool doubleSize = (op.Size & 1) != 0;
+ int shift = doubleSize ? 1 : 2;
+ Operand n = GetVecA32(op.Vn >> shift);
+ Operand m = GetVecA32(op.Vm >> shift);
+ Operand d = GetVecA32(op.Vd >> shift);
+
+ n = EmitSwapScalar(context, n, op.Vn, doubleSize);
+ m = EmitSwapScalar(context, m, op.Vm, doubleSize);
+
+ Operand res = scalarFunc(n, m);
+
+ // Insert scalar into vector.
+ res = EmitScalarInsert(context, d, res, op.Vd, doubleSize);
+
+ context.Copy(d, res);
+ }
+
+ public static void EmitScalarBinaryOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+ {
+ OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+ Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
+
+ EmitScalarBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(inst, n, m));
+ }
+
+ public static void EmitScalarTernaryOpSimd32(ArmEmitterContext context, Func3I scalarFunc)
+ {
+ OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+ bool doubleSize = (op.Size & 1) != 0;
+ int shift = doubleSize ? 1 : 2;
+ Operand n = GetVecA32(op.Vn >> shift);
+ Operand m = GetVecA32(op.Vm >> shift);
+ Operand d = GetVecA32(op.Vd >> shift);
+ Operand initialD = d;
+
+ n = EmitSwapScalar(context, n, op.Vn, doubleSize);
+ m = EmitSwapScalar(context, m, op.Vm, doubleSize);
+ d = EmitSwapScalar(context, d, op.Vd, doubleSize);
+
+ Operand res = scalarFunc(d, n, m);
+
+ // Insert scalar into vector.
+ res = EmitScalarInsert(context, initialD, res, op.Vd, doubleSize);
+
+ context.Copy(initialD, res);
+ }
+
+ public static void EmitScalarTernaryOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+ {
+ OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+ bool doubleSize = (op.Size & 1) != 0;
+
+ Intrinsic inst = doubleSize ? inst64 : inst32;
+
+ EmitScalarTernaryOpSimd32(context, (d, n, m) =>
+ {
+ return context.AddIntrinsic(inst, d, n, m);
+ });
+ }
+
+ public static void EmitScalarTernaryOpF32(
+ ArmEmitterContext context,
+ Intrinsic inst32pt1,
+ Intrinsic inst64pt1,
+ Intrinsic inst32pt2,
+ Intrinsic inst64pt2,
+ bool isNegD = false)
+ {
+ OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+ bool doubleSize = (op.Size & 1) != 0;
+
+ Intrinsic inst1 = doubleSize ? inst64pt1 : inst32pt1;
+ Intrinsic inst2 = doubleSize ? inst64pt2 : inst32pt2;
+
+ EmitScalarTernaryOpSimd32(context, (d, n, m) =>
+ {
+ Operand res = context.AddIntrinsic(inst1, n, m);
+
+ if (isNegD)
+ {
+ Operand mask = doubleSize
+ ? X86GetScalar(context, -0d)
+ : X86GetScalar(context, -0f);
+
+ d = doubleSize
+ ? context.AddIntrinsic(Intrinsic.X86Xorpd, mask, d)
+ : context.AddIntrinsic(Intrinsic.X86Xorps, mask, d);
+ }
+
+ return context.AddIntrinsic(inst2, d, res);
+ });
+ }
+
+ // By Scalar
+
+ public static void EmitVectorByScalarOpSimd32(ArmEmitterContext context, Func2I vectorFunc)
+ {
+ OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
+
+ Operand n = GetVecA32(op.Qn);
+ Operand d = GetVecA32(op.Qd);
+
+ int index = op.Vm & 3;
+ int dupeMask = (index << 6) | (index << 4) | (index << 2) | index;
+ Operand m = GetVecA32(op.Vm >> 2);
+ m = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(dupeMask));
+
+ if (!op.Q) // Register swap: move relevant doubleword to destination side.
+ {
+ n = EmitMoveDoubleWordToSide(context, n, op.Vn, op.Vd);
+ }
+
+ Operand res = vectorFunc(n, m);
+
+ if (!op.Q) // Register insert.
+ {
+ res = EmitDoubleWordInsert(context, d, res, op.Vd);
+ }
+
+ context.Copy(d, res);
+ }
+
+ public static void EmitVectorByScalarOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+ {
+ OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
+
+ Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
+ EmitVectorByScalarOpSimd32(context, (n, m) => context.AddIntrinsic(inst, n, m));
+ }
+
+ public static void EmitVectorsByScalarOpSimd32(ArmEmitterContext context, Func3I vectorFunc)
+ {
+ OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
+
+ Operand n = GetVecA32(op.Qn);
+ Operand d = GetVecA32(op.Qd);
+ Operand initialD = d;
+
+ int index = op.Vm & 3;
+ int dupeMask = (index << 6) | (index << 4) | (index << 2) | index;
+ Operand m = GetVecA32(op.Vm >> 2);
+ m = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(dupeMask));
+
+ if (!op.Q) // Register swap: move relevant doubleword to destination side.
+ {
+ n = EmitMoveDoubleWordToSide(context, n, op.Vn, op.Vd);
+ }
+
+ Operand res = vectorFunc(d, n, m);
+
+ if (!op.Q) // Register insert.
+ {
+ res = EmitDoubleWordInsert(context, initialD, res, op.Vd);
+ }
+
+ context.Copy(initialD, res);
+ }
+
+ public static void EmitVectorsByScalarOpF32(ArmEmitterContext context, Intrinsic inst32pt1, Intrinsic inst64pt1, Intrinsic inst32pt2, Intrinsic inst64pt2)
+ {
+ OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
+
+ Intrinsic inst1 = (op.Size & 1) != 0 ? inst64pt1 : inst32pt1;
+ Intrinsic inst2 = (op.Size & 1) != 0 ? inst64pt2 : inst32pt2;
+
+ EmitVectorsByScalarOpSimd32(context, (d, n, m) =>
+ {
+ Operand res = context.AddIntrinsic(inst1, n, m);
+ return res = context.AddIntrinsic(inst2, d, res);
+ });
+ }
+
+ // Pairwise
+
+ public static void EmitSse2VectorPairwiseOpF32(ArmEmitterContext context, Intrinsic inst32)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ EmitVectorBinaryOpSimd32(context, (n, m) =>
+ {
+ Operand unpck = context.AddIntrinsic(Intrinsic.X86Unpcklps, n, m);
+
+ Operand part0 = unpck;
+ Operand part1 = context.AddIntrinsic(Intrinsic.X86Movhlps, unpck, unpck);
+
+ return context.AddIntrinsic(inst32, part0, part1);
+ }, 0);
+ }
+
+ public static void EmitSsse3VectorPairwiseOp32(ArmEmitterContext context, Intrinsic[] inst)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ EmitVectorBinaryOpSimd32(context, (n, m) =>
+ {
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ Operand zeroEvenMask = X86GetElements(context, ZeroMask, EvenMasks[op.Size]);
+ Operand zeroOddMask = X86GetElements(context, ZeroMask, OddMasks[op.Size]);
+
+ Operand mN = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, n, m); // m:n
+
+ Operand left = context.AddIntrinsic(Intrinsic.X86Pshufb, mN, zeroEvenMask); // 0:even from m:n
+ Operand right = context.AddIntrinsic(Intrinsic.X86Pshufb, mN, zeroOddMask); // 0:odd from m:n
+
+ return context.AddIntrinsic(inst[op.Size], left, right);
+ }
+ else if (op.Size < 3)
+ {
+ Operand oddEvenMask = X86GetElements(context, OddMasks[op.Size], EvenMasks[op.Size]);
+
+ Operand oddEvenN = context.AddIntrinsic(Intrinsic.X86Pshufb, n, oddEvenMask); // odd:even from n
+ Operand oddEvenM = context.AddIntrinsic(Intrinsic.X86Pshufb, m, oddEvenMask); // odd:even from m
+
+ Operand left = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, oddEvenN, oddEvenM);
+ Operand right = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, oddEvenN, oddEvenM);
+
+ return context.AddIntrinsic(inst[op.Size], left, right);
+ }
+ else
+ {
+ Operand left = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, n, m);
+ Operand right = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, n, m);
+
+ return context.AddIntrinsic(inst[3], left, right);
+ }
+ }, 0);
+ }
+
+ // Generic Functions
+
+ public static Operand EmitSoftFloatCallDefaultFpscr(ArmEmitterContext context, string name, params Operand[] callArgs)
+ {
+ IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+
+ MethodInfo info = (op.Size & 1) == 0
+ ? typeof(SoftFloat32).GetMethod(name)
+ : typeof(SoftFloat64).GetMethod(name);
+
+ Array.Resize(ref callArgs, callArgs.Length + 1);
+ callArgs[callArgs.Length - 1] = Const(1);
+
+ context.ExitArmFpMode();
+ context.StoreToContext();
+ Operand res = context.Call(info, callArgs);
+ context.LoadFromContext();
+ context.EnterArmFpMode();
+
+ return res;
+ }
+
+ public static Operand EmitVectorExtractSx32(ArmEmitterContext context, int reg, int index, int size)
+ {
+ return EmitVectorExtract32(context, reg, index, size, true);
+ }
+
+ public static Operand EmitVectorExtractZx32(ArmEmitterContext context, int reg, int index, int size)
+ {
+ return EmitVectorExtract32(context, reg, index, size, false);
+ }
+
+ public static Operand EmitVectorExtract32(ArmEmitterContext context, int reg, int index, int size, bool signed)
+ {
+ ThrowIfInvalid(index, size);
+
+ Operand res = default;
+
+ switch (size)
+ {
+ case 0:
+ res = context.VectorExtract8(GetVec(reg), index);
+ break;
+
+ case 1:
+ res = context.VectorExtract16(GetVec(reg), index);
+ break;
+
+ case 2:
+ res = context.VectorExtract(OperandType.I32, GetVec(reg), index);
+ break;
+
+ case 3:
+ res = context.VectorExtract(OperandType.I64, GetVec(reg), index);
+ break;
+ }
+
+ if (signed)
+ {
+ switch (size)
+ {
+ case 0: res = context.SignExtend8(OperandType.I32, res); break;
+ case 1: res = context.SignExtend16(OperandType.I32, res); break;
+ }
+ }
+ else
+ {
+ switch (size)
+ {
+ case 0: res = context.ZeroExtend8(OperandType.I32, res); break;
+ case 1: res = context.ZeroExtend16(OperandType.I32, res); break;
+ }
+ }
+
+ return res;
+ }
+
+ public static Operand EmitPolynomialMultiply(ArmEmitterContext context, Operand op1, Operand op2, int eSize)
+ {
+ Debug.Assert(eSize <= 32);
+
+ Operand result = eSize == 32 ? Const(0L) : Const(0);
+
+ if (eSize == 32)
+ {
+ op1 = context.ZeroExtend32(OperandType.I64, op1);
+ op2 = context.ZeroExtend32(OperandType.I64, op2);
+ }
+
+ for (int i = 0; i < eSize; i++)
+ {
+ Operand mask = context.BitwiseAnd(op1, Const(op1.Type, 1L << i));
+
+ result = context.BitwiseExclusiveOr(result, context.Multiply(op2, mask));
+ }
+
+ return result;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdHelper32Arm64.cs b/src/ARMeilleure/Instructions/InstEmitSimdHelper32Arm64.cs
new file mode 100644
index 00000000..98236be6
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdHelper32Arm64.cs
@@ -0,0 +1,366 @@
+
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ using Func1I = Func<Operand, Operand>;
+ using Func2I = Func<Operand, Operand, Operand>;
+ using Func3I = Func<Operand, Operand, Operand, Operand>;
+
+ static class InstEmitSimdHelper32Arm64
+ {
+ // Intrinsic Helpers
+
+ public static Operand EmitMoveDoubleWordToSide(ArmEmitterContext context, Operand input, int originalV, int targetV)
+ {
+ Debug.Assert(input.Type == OperandType.V128);
+
+ int originalSide = originalV & 1;
+ int targetSide = targetV & 1;
+
+ if (originalSide == targetSide)
+ {
+ return input;
+ }
+
+ Intrinsic vType = Intrinsic.Arm64VDWord | Intrinsic.Arm64V128;
+
+ if (targetSide == 1)
+ {
+ return context.AddIntrinsic(Intrinsic.Arm64DupVe | vType, input, Const(OperandType.I32, 0)); // Low to high.
+ }
+ else
+ {
+ return context.AddIntrinsic(Intrinsic.Arm64DupVe | vType, input, Const(OperandType.I32, 1)); // High to low.
+ }
+ }
+
+ public static Operand EmitDoubleWordInsert(ArmEmitterContext context, Operand target, Operand value, int targetV)
+ {
+ Debug.Assert(target.Type == OperandType.V128 && value.Type == OperandType.V128);
+
+ int targetSide = targetV & 1;
+ Operand idx = Const(targetSide);
+
+ return context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, target, idx, value, idx);
+ }
+
+ public static Operand EmitScalarInsert(ArmEmitterContext context, Operand target, Operand value, int reg, bool doubleWidth)
+ {
+ Debug.Assert(target.Type == OperandType.V128 && value.Type == OperandType.V128);
+
+ // Insert from index 0 in value to index in target.
+ int index = reg & (doubleWidth ? 1 : 3);
+
+ if (doubleWidth)
+ {
+ return context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, target, Const(index), value, Const(0));
+ }
+ else
+ {
+ return context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VWord, target, Const(index), value, Const(0));
+ }
+ }
+
+ public static Operand EmitExtractScalar(ArmEmitterContext context, Operand target, int reg, bool doubleWidth)
+ {
+ int index = reg & (doubleWidth ? 1 : 3);
+ if (index == 0) return target; // Element is already at index 0, so just return the vector directly.
+
+ if (doubleWidth)
+ {
+ return context.AddIntrinsic(Intrinsic.Arm64DupSe | Intrinsic.Arm64VDWord, target, Const(1)); // Extract high (index 1).
+ }
+ else
+ {
+ return context.AddIntrinsic(Intrinsic.Arm64DupSe | Intrinsic.Arm64VWord, target, Const(index)); // Extract element at index.
+ }
+ }
+
+ // Vector Operand Templates
+
+ public static void EmitVectorUnaryOpSimd32(ArmEmitterContext context, Func1I vectorFunc)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ Operand m = GetVecA32(op.Qm);
+ Operand d = GetVecA32(op.Qd);
+
+ if (!op.Q) // Register swap: move relevant doubleword to destination side.
+ {
+ m = EmitMoveDoubleWordToSide(context, m, op.Vm, op.Vd);
+ }
+
+ Operand res = vectorFunc(m);
+
+ if (!op.Q) // Register insert.
+ {
+ res = EmitDoubleWordInsert(context, d, res, op.Vd);
+ }
+
+ context.Copy(d, res);
+ }
+
+ public static void EmitVectorUnaryOpF32(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
+ EmitVectorUnaryOpSimd32(context, (m) => context.AddIntrinsic(inst, m));
+ }
+
+ public static void EmitVectorBinaryOpSimd32(ArmEmitterContext context, Func2I vectorFunc, int side = -1)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Operand n = GetVecA32(op.Qn);
+ Operand m = GetVecA32(op.Qm);
+ Operand d = GetVecA32(op.Qd);
+
+ if (side == -1)
+ {
+ side = op.Vd;
+ }
+
+ if (!op.Q) // Register swap: move relevant doubleword to destination side.
+ {
+ n = EmitMoveDoubleWordToSide(context, n, op.Vn, side);
+ m = EmitMoveDoubleWordToSide(context, m, op.Vm, side);
+ }
+
+ Operand res = vectorFunc(n, m);
+
+ if (!op.Q) // Register insert.
+ {
+ if (side != op.Vd)
+ {
+ res = EmitMoveDoubleWordToSide(context, res, side, op.Vd);
+ }
+ res = EmitDoubleWordInsert(context, d, res, op.Vd);
+ }
+
+ context.Copy(d, res);
+ }
+
+ public static void EmitVectorBinaryOpF32(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
+ EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(inst, n, m));
+ }
+
+ public static void EmitVectorTernaryOpSimd32(ArmEmitterContext context, Func3I vectorFunc)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Operand n = GetVecA32(op.Qn);
+ Operand m = GetVecA32(op.Qm);
+ Operand d = GetVecA32(op.Qd);
+ Operand initialD = d;
+
+ if (!op.Q) // Register swap: move relevant doubleword to destination side.
+ {
+ n = EmitMoveDoubleWordToSide(context, n, op.Vn, op.Vd);
+ m = EmitMoveDoubleWordToSide(context, m, op.Vm, op.Vd);
+ }
+
+ Operand res = vectorFunc(d, n, m);
+
+ if (!op.Q) // Register insert.
+ {
+ res = EmitDoubleWordInsert(context, initialD, res, op.Vd);
+ }
+
+ context.Copy(initialD, res);
+ }
+
+ public static void EmitVectorTernaryOpF32(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
+ EmitVectorTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(inst, d, n, m));
+ }
+
+ public static void EmitScalarUnaryOpSimd32(ArmEmitterContext context, Func1I scalarFunc)
+ {
+ OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
+
+ bool doubleSize = (op.Size & 1) != 0;
+ int shift = doubleSize ? 1 : 2;
+ Operand m = GetVecA32(op.Vm >> shift);
+ Operand d = GetVecA32(op.Vd >> shift);
+
+ m = EmitExtractScalar(context, m, op.Vm, doubleSize);
+
+ Operand res = scalarFunc(m);
+
+ // Insert scalar into vector.
+ res = EmitScalarInsert(context, d, res, op.Vd, doubleSize);
+
+ context.Copy(d, res);
+ }
+
+ public static void EmitScalarUnaryOpF32(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
+
+ inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
+ EmitScalarUnaryOpSimd32(context, (m) => (inst == 0) ? m : context.AddIntrinsic(inst, m));
+ }
+
+ public static void EmitScalarBinaryOpSimd32(ArmEmitterContext context, Func2I scalarFunc)
+ {
+ OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+ bool doubleSize = (op.Size & 1) != 0;
+ int shift = doubleSize ? 1 : 2;
+ Operand n = GetVecA32(op.Vn >> shift);
+ Operand m = GetVecA32(op.Vm >> shift);
+ Operand d = GetVecA32(op.Vd >> shift);
+
+ n = EmitExtractScalar(context, n, op.Vn, doubleSize);
+ m = EmitExtractScalar(context, m, op.Vm, doubleSize);
+
+ Operand res = scalarFunc(n, m);
+
+ // Insert scalar into vector.
+ res = EmitScalarInsert(context, d, res, op.Vd, doubleSize);
+
+ context.Copy(d, res);
+ }
+
+ public static void EmitScalarBinaryOpF32(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+ inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
+ EmitScalarBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(inst, n, m));
+ }
+
+ public static void EmitScalarTernaryOpSimd32(ArmEmitterContext context, Func3I scalarFunc)
+ {
+ OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+ bool doubleSize = (op.Size & 1) != 0;
+ int shift = doubleSize ? 1 : 2;
+ Operand n = GetVecA32(op.Vn >> shift);
+ Operand m = GetVecA32(op.Vm >> shift);
+ Operand d = GetVecA32(op.Vd >> shift);
+ Operand initialD = d;
+
+ n = EmitExtractScalar(context, n, op.Vn, doubleSize);
+ m = EmitExtractScalar(context, m, op.Vm, doubleSize);
+ d = EmitExtractScalar(context, d, op.Vd, doubleSize);
+
+ Operand res = scalarFunc(d, n, m);
+
+ // Insert scalar into vector.
+ res = EmitScalarInsert(context, initialD, res, op.Vd, doubleSize);
+
+ context.Copy(initialD, res);
+ }
+
+ public static void EmitScalarTernaryOpF32(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+ inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
+ EmitScalarTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(inst, d, n, m));
+ }
+
+ // Pairwise
+
+ public static void EmitVectorPairwiseOpF32(ArmEmitterContext context, Intrinsic inst32)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ inst32 |= Intrinsic.Arm64V64 | Intrinsic.Arm64VFloat;
+ EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(inst32, n, m), 0);
+ }
+
+ public static void EmitVcmpOrVcmpe(ArmEmitterContext context, bool signalNaNs)
+ {
+ OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
+
+ bool cmpWithZero = (op.Opc & 2) != 0;
+
+ Intrinsic inst = signalNaNs ? Intrinsic.Arm64FcmpeS : Intrinsic.Arm64FcmpS;
+ inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
+
+ bool doubleSize = (op.Size & 1) != 0;
+ int shift = doubleSize ? 1 : 2;
+ Operand n = GetVecA32(op.Vd >> shift);
+ Operand m = GetVecA32(op.Vm >> shift);
+
+ n = EmitExtractScalar(context, n, op.Vd, doubleSize);
+ m = cmpWithZero ? Const(0) : EmitExtractScalar(context, m, op.Vm, doubleSize);
+
+ Operand nzcv = context.AddIntrinsicInt(inst, n, m);
+
+ Operand one = Const(1);
+
+ SetFpFlag(context, FPState.VFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(28)), one));
+ SetFpFlag(context, FPState.CFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(29)), one));
+ SetFpFlag(context, FPState.ZFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(30)), one));
+ SetFpFlag(context, FPState.NFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(31)), one));
+ }
+
+ public static void EmitCmpOpF32(ArmEmitterContext context, CmpCondition cond, bool zero)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ Intrinsic inst;
+ if (zero)
+ {
+ inst = cond switch
+ {
+ CmpCondition.Equal => Intrinsic.Arm64FcmeqVz,
+ CmpCondition.GreaterThan => Intrinsic.Arm64FcmgtVz,
+ CmpCondition.GreaterThanOrEqual => Intrinsic.Arm64FcmgeVz,
+ CmpCondition.LessThan => Intrinsic.Arm64FcmltVz,
+ CmpCondition.LessThanOrEqual => Intrinsic.Arm64FcmleVz,
+ _ => throw new InvalidOperationException()
+ };
+ }
+ else {
+ inst = cond switch
+ {
+ CmpCondition.Equal => Intrinsic.Arm64FcmeqV,
+ CmpCondition.GreaterThan => Intrinsic.Arm64FcmgtV,
+ CmpCondition.GreaterThanOrEqual => Intrinsic.Arm64FcmgeV,
+ _ => throw new InvalidOperationException()
+ };
+ }
+
+ inst |= (sizeF != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
+
+ if (zero)
+ {
+ EmitVectorUnaryOpSimd32(context, (m) =>
+ {
+ return context.AddIntrinsic(inst, m);
+ });
+ }
+ else
+ {
+ EmitVectorBinaryOpSimd32(context, (n, m) =>
+ {
+ return context.AddIntrinsic(inst, n, m);
+ });
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdHelperArm64.cs b/src/ARMeilleure/Instructions/InstEmitSimdHelperArm64.cs
new file mode 100644
index 00000000..f0d242ae
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdHelperArm64.cs
@@ -0,0 +1,720 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static class InstEmitSimdHelperArm64
+ {
+ public static void EmitScalarUnaryOpF(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n));
+ }
+
+ public static void EmitScalarUnaryOpFFromGp(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n));
+ }
+
+ public static void EmitScalarUnaryOpFToGp(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ SetIntOrZR(context, op.Rd, op.RegisterSize == RegisterSize.Int32
+ ? context.AddIntrinsicInt (inst, n)
+ : context.AddIntrinsicLong(inst, n));
+ }
+
+ public static void EmitScalarBinaryOpF(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m));
+ }
+
+ public static void EmitScalarBinaryOpFByElem(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m, Const(op.Index)));
+ }
+
+ public static void EmitScalarTernaryOpF(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+ Operand a = GetVec(op.Ra);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, a, n, m));
+ }
+
+ public static void EmitScalarTernaryOpFRdByElem(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ context.Copy(d, context.AddIntrinsic(inst, d, n, m, Const(op.Index)));
+ }
+
+ public static void EmitScalarUnaryOp(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n));
+ }
+
+ public static void EmitScalarBinaryOp(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m));
+ }
+
+ public static void EmitScalarBinaryOpRd(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n));
+ }
+
+ public static void EmitScalarTernaryOpRd(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ context.Copy(d, context.AddIntrinsic(inst, d, n, m));
+ }
+
+ public static void EmitScalarShiftBinaryOp(ArmEmitterContext context, Intrinsic inst, int shift)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(shift)));
+ }
+
+ public static void EmitScalarShiftTernaryOpRd(ArmEmitterContext context, Intrinsic inst, int shift)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n, Const(shift)));
+ }
+
+ public static void EmitScalarSaturatingShiftTernaryOpRd(ArmEmitterContext context, Intrinsic inst, int shift)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n, Const(shift)));
+
+ context.SetPendingQcFlagSync();
+ }
+
+ public static void EmitScalarSaturatingUnaryOp(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ Operand result = context.AddIntrinsic(inst, n);
+
+ context.Copy(GetVec(op.Rd), result);
+
+ context.SetPendingQcFlagSync();
+ }
+
+ public static void EmitScalarSaturatingBinaryOp(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ Operand result = context.AddIntrinsic(inst, n, m);
+
+ context.Copy(GetVec(op.Rd), result);
+
+ context.SetPendingQcFlagSync();
+ }
+
+ public static void EmitScalarSaturatingBinaryOpRd(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ Operand result = context.AddIntrinsic(inst, d, n);
+
+ context.Copy(GetVec(op.Rd), result);
+
+ context.SetPendingQcFlagSync();
+ }
+
+ public static void EmitScalarConvertBinaryOpF(ArmEmitterContext context, Intrinsic inst, int fBits)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(fBits)));
+ }
+
+ public static void EmitScalarConvertBinaryOpFFromGp(ArmEmitterContext context, Intrinsic inst, int fBits)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(fBits)));
+ }
+
+ public static void EmitScalarConvertBinaryOpFToGp(ArmEmitterContext context, Intrinsic inst, int fBits)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ SetIntOrZR(context, op.Rd, op.RegisterSize == RegisterSize.Int32
+ ? context.AddIntrinsicInt (inst, n, Const(fBits))
+ : context.AddIntrinsicLong(inst, n, Const(fBits)));
+ }
+
+ public static void EmitVectorUnaryOpF(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n));
+ }
+
+ public static void EmitVectorBinaryOpF(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m));
+ }
+
+ public static void EmitVectorBinaryOpFRd(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n));
+ }
+
+ public static void EmitVectorBinaryOpFByElem(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m, Const(op.Index)));
+ }
+
+ public static void EmitVectorTernaryOpFRd(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(d, context.AddIntrinsic(inst, d, n, m));
+ }
+
+ public static void EmitVectorTernaryOpFRdByElem(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(d, context.AddIntrinsic(inst, d, n, m, Const(op.Index)));
+ }
+
+ public static void EmitVectorUnaryOp(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n));
+ }
+
+ public static void EmitVectorBinaryOp(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m));
+ }
+
+ public static void EmitVectorBinaryOpRd(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n));
+ }
+
+ public static void EmitVectorBinaryOpByElem(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m, Const(op.Index)));
+ }
+
+ public static void EmitVectorTernaryOpRd(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(d, context.AddIntrinsic(inst, d, n, m));
+ }
+
+ public static void EmitVectorTernaryOpRdByElem(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(d, context.AddIntrinsic(inst, d, n, m, Const(op.Index)));
+ }
+
+ public static void EmitVectorShiftBinaryOp(ArmEmitterContext context, Intrinsic inst, int shift)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(shift)));
+ }
+
+ public static void EmitVectorShiftTernaryOpRd(ArmEmitterContext context, Intrinsic inst, int shift)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n, Const(shift)));
+ }
+
+ public static void EmitVectorSaturatingShiftTernaryOpRd(ArmEmitterContext context, Intrinsic inst, int shift)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n, Const(shift)));
+
+ context.SetPendingQcFlagSync();
+ }
+
+ public static void EmitVectorSaturatingUnaryOp(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ Operand result = context.AddIntrinsic(inst, n);
+
+ context.Copy(GetVec(op.Rd), result);
+
+ context.SetPendingQcFlagSync();
+ }
+
+ public static void EmitVectorSaturatingBinaryOp(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ Operand result = context.AddIntrinsic(inst, n, m);
+
+ context.Copy(GetVec(op.Rd), result);
+
+ context.SetPendingQcFlagSync();
+ }
+
+ public static void EmitVectorSaturatingBinaryOpRd(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ Operand result = context.AddIntrinsic(inst, d, n);
+
+ context.Copy(GetVec(op.Rd), result);
+
+ context.SetPendingQcFlagSync();
+ }
+
+ public static void EmitVectorSaturatingBinaryOpByElem(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ Operand result = context.AddIntrinsic(inst, n, m, Const(op.Index));
+
+ context.Copy(GetVec(op.Rd), result);
+
+ context.SetPendingQcFlagSync();
+ }
+
+ public static void EmitVectorConvertBinaryOpF(ArmEmitterContext context, Intrinsic inst, int fBits)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(fBits)));
+ }
+
+ public static void EmitVectorLookupTable(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdTbl op = (OpCodeSimdTbl)context.CurrOp;
+
+ Operand[] operands = new Operand[op.Size + 1];
+
+ operands[op.Size] = GetVec(op.Rm);
+
+ for (int index = 0; index < op.Size; index++)
+ {
+ operands[index] = GetVec((op.Rn + index) & 0x1F);
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, operands));
+ }
+
+ public static void EmitFcmpOrFcmpe(ArmEmitterContext context, bool signalNaNs)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ bool cmpWithZero = !(op is OpCodeSimdFcond) ? op.Bit3 : false;
+
+ Intrinsic inst = signalNaNs ? Intrinsic.Arm64FcmpeS : Intrinsic.Arm64FcmpS;
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ Operand n = GetVec(op.Rn);
+ Operand m = cmpWithZero ? Const(0) : GetVec(op.Rm);
+
+ Operand nzcv = context.AddIntrinsicInt(inst, n, m);
+
+ Operand one = Const(1);
+
+ SetFlag(context, PState.VFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(28)), one));
+ SetFlag(context, PState.CFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(29)), one));
+ SetFlag(context, PState.ZFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(30)), one));
+ SetFlag(context, PState.NFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(31)), one));
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdLogical.cs b/src/ARMeilleure/Instructions/InstEmitSimdLogical.cs
new file mode 100644
index 00000000..2bf531e6
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdLogical.cs
@@ -0,0 +1,612 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void And_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64AndV);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pand, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) => context.BitwiseAnd(op1, op2));
+ }
+ }
+
+ public static void Bic_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64BicV);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pandn, m, n);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) =>
+ {
+ return context.BitwiseAnd(op1, context.BitwiseNot(op2));
+ });
+ }
+ }
+
+ public static void Bic_Vi(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
+
+ int eSize = 8 << op.Size;
+
+ Operand d = GetVec(op.Rd);
+ Operand imm = eSize switch {
+ 16 => X86GetAllElements(context, (short)~op.Immediate),
+ 32 => X86GetAllElements(context, (int)~op.Immediate),
+ _ => throw new InvalidOperationException($"Invalid element size {eSize}.")
+ };
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pand, d, imm);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorImmBinaryOp(context, (op1, op2) =>
+ {
+ return context.BitwiseAnd(op1, context.BitwiseNot(op2));
+ });
+ }
+ }
+
+ public static void Bif_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64BifV);
+ }
+ else
+ {
+ EmitBifBit(context, notRm: true);
+ }
+ }
+
+ public static void Bit_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64BitV);
+ }
+ else
+ {
+ EmitBifBit(context, notRm: false);
+ }
+ }
+
+ private static void EmitBifBit(ArmEmitterContext context, bool notRm)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse2)
+ {
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, d);
+
+ if (notRm)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Pandn, m, res);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Pand, m, res);
+ }
+
+ res = context.AddIntrinsic(Intrinsic.X86Pxor, d, res);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else
+ {
+ Operand res = context.VectorZero();
+
+ int elems = op.RegisterSize == RegisterSize.Simd128 ? 2 : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand d = EmitVectorExtractZx(context, op.Rd, index, 3);
+ Operand n = EmitVectorExtractZx(context, op.Rn, index, 3);
+ Operand m = EmitVectorExtractZx(context, op.Rm, index, 3);
+
+ if (notRm)
+ {
+ m = context.BitwiseNot(m);
+ }
+
+ Operand e = context.BitwiseExclusiveOr(d, n);
+
+ e = context.BitwiseAnd(e, m);
+ e = context.BitwiseExclusiveOr(e, d);
+
+ res = EmitVectorInsert(context, res, e, index, 3);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ public static void Bsl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64BslV);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, m);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pand, res, d);
+ res = context.AddIntrinsic(Intrinsic.X86Pxor, res, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else
+ {
+ EmitVectorTernaryOpZx(context, (op1, op2, op3) =>
+ {
+ return context.BitwiseExclusiveOr(
+ context.BitwiseAnd(op1,
+ context.BitwiseExclusiveOr(op2, op3)), op3);
+ });
+ }
+ }
+
+ public static void Eor_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64EorV);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) => context.BitwiseExclusiveOr(op1, op2));
+ }
+ }
+
+ public static void Not_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAvx512Ortho)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Vpternlogd, n, n, Const(~0b10101010));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Operand mask = X86GetAllElements(context, -1L);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pandn, n, mask);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorUnaryOpZx(context, (op1) => context.BitwiseNot(op1));
+ }
+ }
+
+ public static void Orn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64OrnV);
+ }
+ else if (Optimizations.UseAvx512Ortho)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Vpternlogd, n, m, Const(0b11001100 | ~0b10101010));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand mask = X86GetAllElements(context, -1L);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pandn, m, mask);
+
+ res = context.AddIntrinsic(Intrinsic.X86Por, res, n);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) =>
+ {
+ return context.BitwiseOr(op1, context.BitwiseNot(op2));
+ });
+ }
+ }
+
+ public static void Orr_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64OrrV);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Por, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) => context.BitwiseOr(op1, op2));
+ }
+ }
+
+ public static void Orr_Vi(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
+
+ int eSize = 8 << op.Size;
+
+ Operand d = GetVec(op.Rd);
+ Operand imm = eSize switch {
+ 16 => X86GetAllElements(context, (short)op.Immediate),
+ 32 => X86GetAllElements(context, (int)op.Immediate),
+ _ => throw new InvalidOperationException($"Invalid element size {eSize}.")
+ };
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Por, d, imm);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorImmBinaryOp(context, (op1, op2) => context.BitwiseOr(op1, op2));
+ }
+ }
+
+ public static void Rbit_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ if (Optimizations.UseGfni)
+ {
+ const long bitMatrix =
+ (0b10000000L << 56) |
+ (0b01000000L << 48) |
+ (0b00100000L << 40) |
+ (0b00010000L << 32) |
+ (0b00001000L << 24) |
+ (0b00000100L << 16) |
+ (0b00000010L << 8) |
+ (0b00000001L << 0);
+
+ Operand vBitMatrix = X86GetAllElements(context, bitMatrix);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Gf2p8affineqb, GetVec(op.Rn), vBitMatrix, Const(0));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand res = context.VectorZero();
+ int elems = op.RegisterSize == RegisterSize.Simd128 ? 16 : 8;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, 0);
+
+ Operand de = EmitReverseBits8Op(context, ne);
+
+ res = EmitVectorInsert(context, res, de, index, 0);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ private static Operand EmitReverseBits8Op(ArmEmitterContext context, Operand op)
+ {
+ Debug.Assert(op.Type == OperandType.I64);
+
+ Operand val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op, Const(0xaaul)), Const(1)),
+ context.ShiftLeft (context.BitwiseAnd(op, Const(0x55ul)), Const(1)));
+
+ val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xccul)), Const(2)),
+ context.ShiftLeft (context.BitwiseAnd(val, Const(0x33ul)), Const(2)));
+
+ return context.BitwiseOr(context.ShiftRightUI(val, Const(4)),
+ context.ShiftLeft (context.BitwiseAnd(val, Const(0x0ful)), Const(4)));
+ }
+
+ public static void Rev16_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSsse3)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ const long maskE0 = 06L << 56 | 07L << 48 | 04L << 40 | 05L << 32 | 02L << 24 | 03L << 16 | 00L << 8 | 01L << 0;
+ const long maskE1 = 14L << 56 | 15L << 48 | 12L << 40 | 13L << 32 | 10L << 24 | 11L << 16 | 08L << 8 | 09L << 0;
+
+ Operand mask = X86GetScalar(context, maskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitRev_V(context, containerSize: 1);
+ }
+ }
+
+ public static void Rev32_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSsse3)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Operand mask;
+
+ if (op.Size == 0)
+ {
+ const long maskE0 = 04L << 56 | 05L << 48 | 06L << 40 | 07L << 32 | 00L << 24 | 01L << 16 | 02L << 8 | 03L << 0;
+ const long maskE1 = 12L << 56 | 13L << 48 | 14L << 40 | 15L << 32 | 08L << 24 | 09L << 16 | 10L << 8 | 11L << 0;
+
+ mask = X86GetScalar(context, maskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+ }
+ else /* if (op.Size == 1) */
+ {
+ const long maskE0 = 05L << 56 | 04L << 48 | 07L << 40 | 06L << 32 | 01L << 24 | 00L << 16 | 03L << 8 | 02L << 0;
+ const long maskE1 = 13L << 56 | 12L << 48 | 15L << 40 | 14L << 32 | 09L << 24 | 08L << 16 | 11L << 8 | 10L << 0;
+
+ mask = X86GetScalar(context, maskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+ }
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitRev_V(context, containerSize: 2);
+ }
+ }
+
+ public static void Rev64_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSsse3)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Operand mask;
+
+ if (op.Size == 0)
+ {
+ const long maskE0 = 00L << 56 | 01L << 48 | 02L << 40 | 03L << 32 | 04L << 24 | 05L << 16 | 06L << 8 | 07L << 0;
+ const long maskE1 = 08L << 56 | 09L << 48 | 10L << 40 | 11L << 32 | 12L << 24 | 13L << 16 | 14L << 8 | 15L << 0;
+
+ mask = X86GetScalar(context, maskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+ }
+ else if (op.Size == 1)
+ {
+ const long maskE0 = 01L << 56 | 00L << 48 | 03L << 40 | 02L << 32 | 05L << 24 | 04L << 16 | 07L << 8 | 06L << 0;
+ const long maskE1 = 09L << 56 | 08L << 48 | 11L << 40 | 10L << 32 | 13L << 24 | 12L << 16 | 15L << 8 | 14L << 0;
+
+ mask = X86GetScalar(context, maskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+ }
+ else /* if (op.Size == 2) */
+ {
+ const long maskE0 = 03L << 56 | 02L << 48 | 01L << 40 | 00L << 32 | 07L << 24 | 06L << 16 | 05L << 8 | 04L << 0;
+ const long maskE1 = 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 15L << 24 | 14L << 16 | 13L << 8 | 12L << 0;
+
+ mask = X86GetScalar(context, maskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+ }
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitRev_V(context, containerSize: 3);
+ }
+ }
+
+ private static void EmitRev_V(ArmEmitterContext context, int containerSize)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ int containerMask = (1 << (containerSize - op.Size)) - 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ int revIndex = index ^ containerMask;
+
+ Operand ne = EmitVectorExtractZx(context, op.Rn, revIndex, op.Size);
+
+ res = EmitVectorInsert(context, res, ne, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdLogical32.cs b/src/ARMeilleure/Instructions/InstEmitSimdLogical32.cs
new file mode 100644
index 00000000..68ef4ed1
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdLogical32.cs
@@ -0,0 +1,266 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper32;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ public static void Vand_I(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64AndV | Intrinsic.Arm64V128, n, m));
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Pand, n, m));
+ }
+ else
+ {
+ EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseAnd(op1, op2));
+ }
+ }
+
+ public static void Vbic_I(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64BicV | Intrinsic.Arm64V128, n, m));
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Pandn, m, n));
+ }
+ else
+ {
+ EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseAnd(op1, context.BitwiseNot(op2)));
+ }
+ }
+
+ public static void Vbic_II(ArmEmitterContext context)
+ {
+ OpCode32SimdImm op = (OpCode32SimdImm)context.CurrOp;
+
+ long immediate = op.Immediate;
+
+ // Replicate fields to fill the 64-bits, if size is < 64-bits.
+ switch (op.Size)
+ {
+ case 0: immediate *= 0x0101010101010101L; break;
+ case 1: immediate *= 0x0001000100010001L; break;
+ case 2: immediate *= 0x0000000100000001L; break;
+ }
+
+ Operand imm = Const(immediate);
+ Operand res = GetVecA32(op.Qd);
+
+ if (op.Q)
+ {
+ for (int elem = 0; elem < 2; elem++)
+ {
+ Operand de = EmitVectorExtractZx(context, op.Qd, elem, 3);
+
+ res = EmitVectorInsert(context, res, context.BitwiseAnd(de, context.BitwiseNot(imm)), elem, 3);
+ }
+ }
+ else
+ {
+ Operand de = EmitVectorExtractZx(context, op.Qd, op.Vd & 1, 3);
+
+ res = EmitVectorInsert(context, res, context.BitwiseAnd(de, context.BitwiseNot(imm)), op.Vd & 1, 3);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void Vbif(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(Intrinsic.Arm64BifV | Intrinsic.Arm64V128, d, n, m));
+ }
+ else
+ {
+ EmitBifBit(context, true);
+ }
+ }
+
+ public static void Vbit(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(Intrinsic.Arm64BitV | Intrinsic.Arm64V128, d, n, m));
+ }
+ else
+ {
+ EmitBifBit(context, false);
+ }
+ }
+
+ public static void Vbsl(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(Intrinsic.Arm64BslV | Intrinsic.Arm64V128, d, n, m));
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitVectorTernaryOpSimd32(context, (d, n, m) =>
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Pand, res, d);
+ return context.AddIntrinsic(Intrinsic.X86Pxor, res, m);
+ });
+ }
+ else
+ {
+ EmitVectorTernaryOpZx32(context, (op1, op2, op3) =>
+ {
+ return context.BitwiseExclusiveOr(
+ context.BitwiseAnd(op1,
+ context.BitwiseExclusiveOr(op2, op3)), op3);
+ });
+ }
+ }
+
+ public static void Veor_I(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64EorV | Intrinsic.Arm64V128, n, m));
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Pxor, n, m));
+ }
+ else
+ {
+ EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseExclusiveOr(op1, op2));
+ }
+ }
+
+ public static void Vorn_I(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64OrnV | Intrinsic.Arm64V128, n, m));
+ }
+ else if (Optimizations.UseAvx512Ortho)
+ {
+ EmitVectorBinaryOpSimd32(context, (n, m) =>
+ {
+ return context.AddIntrinsic(Intrinsic.X86Vpternlogd, n, m, Const(0b11001100 | ~0b10101010));
+ });
+ }
+ else if (Optimizations.UseSse2)
+ {
+ Operand mask = context.VectorOne();
+
+ EmitVectorBinaryOpSimd32(context, (n, m) =>
+ {
+ m = context.AddIntrinsic(Intrinsic.X86Pandn, m, mask);
+ return context.AddIntrinsic(Intrinsic.X86Por, n, m);
+ });
+ }
+ else
+ {
+ EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseOr(op1, context.BitwiseNot(op2)));
+ }
+ }
+
+ public static void Vorr_I(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64OrrV | Intrinsic.Arm64V128, n, m));
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Por, n, m));
+ }
+ else
+ {
+ EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseOr(op1, op2));
+ }
+ }
+
+ public static void Vorr_II(ArmEmitterContext context)
+ {
+ OpCode32SimdImm op = (OpCode32SimdImm)context.CurrOp;
+
+ long immediate = op.Immediate;
+
+ // Replicate fields to fill the 64-bits, if size is < 64-bits.
+ switch (op.Size)
+ {
+ case 0: immediate *= 0x0101010101010101L; break;
+ case 1: immediate *= 0x0001000100010001L; break;
+ case 2: immediate *= 0x0000000100000001L; break;
+ }
+
+ Operand imm = Const(immediate);
+ Operand res = GetVecA32(op.Qd);
+
+ if (op.Q)
+ {
+ for (int elem = 0; elem < 2; elem++)
+ {
+ Operand de = EmitVectorExtractZx(context, op.Qd, elem, 3);
+
+ res = EmitVectorInsert(context, res, context.BitwiseOr(de, imm), elem, 3);
+ }
+ }
+ else
+ {
+ Operand de = EmitVectorExtractZx(context, op.Qd, op.Vd & 1, 3);
+
+ res = EmitVectorInsert(context, res, context.BitwiseOr(de, imm), op.Vd & 1, 3);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void Vtst(ArmEmitterContext context)
+ {
+ EmitVectorBinaryOpZx32(context, (op1, op2) =>
+ {
+ Operand isZero = context.ICompareEqual(context.BitwiseAnd(op1, op2), Const(0));
+ return context.ConditionalSelect(isZero, Const(0), Const(-1));
+ });
+ }
+
+ private static void EmitBifBit(ArmEmitterContext context, bool notRm)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse2)
+ {
+ EmitVectorTernaryOpSimd32(context, (d, n, m) =>
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, d);
+ res = context.AddIntrinsic((notRm) ? Intrinsic.X86Pandn : Intrinsic.X86Pand, m, res);
+ return context.AddIntrinsic(Intrinsic.X86Pxor, d, res);
+ });
+ }
+ else
+ {
+ EmitVectorTernaryOpZx32(context, (d, n, m) =>
+ {
+ if (notRm)
+ {
+ m = context.BitwiseNot(m);
+ }
+ return context.BitwiseExclusiveOr(
+ context.BitwiseAnd(m,
+ context.BitwiseExclusiveOr(d, n)), d);
+ });
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdMemory.cs b/src/ARMeilleure/Instructions/InstEmitSimdMemory.cs
new file mode 100644
index 00000000..9b19872a
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdMemory.cs
@@ -0,0 +1,160 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System.Diagnostics;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitMemoryHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Ld__Vms(ArmEmitterContext context)
+ {
+ EmitSimdMemMs(context, isLoad: true);
+ }
+
+ public static void Ld__Vss(ArmEmitterContext context)
+ {
+ EmitSimdMemSs(context, isLoad: true);
+ }
+
+ public static void St__Vms(ArmEmitterContext context)
+ {
+ EmitSimdMemMs(context, isLoad: false);
+ }
+
+ public static void St__Vss(ArmEmitterContext context)
+ {
+ EmitSimdMemSs(context, isLoad: false);
+ }
+
+ private static void EmitSimdMemMs(ArmEmitterContext context, bool isLoad)
+ {
+ OpCodeSimdMemMs op = (OpCodeSimdMemMs)context.CurrOp;
+
+ Operand n = GetIntOrSP(context, op.Rn);
+
+ long offset = 0;
+
+ for (int rep = 0; rep < op.Reps; rep++)
+ for (int elem = 0; elem < op.Elems; elem++)
+ for (int sElem = 0; sElem < op.SElems; sElem++)
+ {
+ int rtt = (op.Rt + rep + sElem) & 0x1f;
+
+ Operand tt = GetVec(rtt);
+
+ Operand address = context.Add(n, Const(offset));
+
+ if (isLoad)
+ {
+ EmitLoadSimd(context, address, tt, rtt, elem, op.Size);
+
+ if (op.RegisterSize == RegisterSize.Simd64 && elem == op.Elems - 1)
+ {
+ context.Copy(tt, context.VectorZeroUpper64(tt));
+ }
+ }
+ else
+ {
+ EmitStoreSimd(context, address, rtt, elem, op.Size);
+ }
+
+ offset += 1 << op.Size;
+ }
+
+ if (op.WBack)
+ {
+ EmitSimdMemWBack(context, offset);
+ }
+ }
+
+ private static void EmitSimdMemSs(ArmEmitterContext context, bool isLoad)
+ {
+ OpCodeSimdMemSs op = (OpCodeSimdMemSs)context.CurrOp;
+
+ Operand n = GetIntOrSP(context, op.Rn);
+
+ long offset = 0;
+
+ if (op.Replicate)
+ {
+ // Only loads uses the replicate mode.
+ Debug.Assert(isLoad, "Replicate mode is not valid for stores.");
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int sElem = 0; sElem < op.SElems; sElem++)
+ {
+ int rt = (op.Rt + sElem) & 0x1f;
+
+ Operand t = GetVec(rt);
+
+ Operand address = context.Add(n, Const(offset));
+
+ for (int index = 0; index < elems; index++)
+ {
+ EmitLoadSimd(context, address, t, rt, index, op.Size);
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ context.Copy(t, context.VectorZeroUpper64(t));
+ }
+
+ offset += 1 << op.Size;
+ }
+ }
+ else
+ {
+ for (int sElem = 0; sElem < op.SElems; sElem++)
+ {
+ int rt = (op.Rt + sElem) & 0x1f;
+
+ Operand t = GetVec(rt);
+
+ Operand address = context.Add(n, Const(offset));
+
+ if (isLoad)
+ {
+ EmitLoadSimd(context, address, t, rt, op.Index, op.Size);
+ }
+ else
+ {
+ EmitStoreSimd(context, address, rt, op.Index, op.Size);
+ }
+
+ offset += 1 << op.Size;
+ }
+ }
+
+ if (op.WBack)
+ {
+ EmitSimdMemWBack(context, offset);
+ }
+ }
+
+ private static void EmitSimdMemWBack(ArmEmitterContext context, long offset)
+ {
+ OpCodeMemReg op = (OpCodeMemReg)context.CurrOp;
+
+ Operand n = GetIntOrSP(context, op.Rn);
+ Operand m;
+
+ if (op.Rm != RegisterAlias.Zr)
+ {
+ m = GetIntOrZR(context, op.Rm);
+ }
+ else
+ {
+ m = Const(offset);
+ }
+
+ context.Copy(n, context.Add(n, m));
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdMemory32.cs b/src/ARMeilleure/Instructions/InstEmitSimdMemory32.cs
new file mode 100644
index 00000000..b774bd06
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdMemory32.cs
@@ -0,0 +1,352 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitMemoryHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ public static void Vld1(ArmEmitterContext context)
+ {
+ EmitVStoreOrLoadN(context, 1, true);
+ }
+
+ public static void Vld2(ArmEmitterContext context)
+ {
+ EmitVStoreOrLoadN(context, 2, true);
+ }
+
+ public static void Vld3(ArmEmitterContext context)
+ {
+ EmitVStoreOrLoadN(context, 3, true);
+ }
+
+ public static void Vld4(ArmEmitterContext context)
+ {
+ EmitVStoreOrLoadN(context, 4, true);
+ }
+
+ public static void Vst1(ArmEmitterContext context)
+ {
+ EmitVStoreOrLoadN(context, 1, false);
+ }
+
+ public static void Vst2(ArmEmitterContext context)
+ {
+ EmitVStoreOrLoadN(context, 2, false);
+ }
+
+ public static void Vst3(ArmEmitterContext context)
+ {
+ EmitVStoreOrLoadN(context, 3, false);
+ }
+
+ public static void Vst4(ArmEmitterContext context)
+ {
+ EmitVStoreOrLoadN(context, 4, false);
+ }
+
+ public static void EmitVStoreOrLoadN(ArmEmitterContext context, int count, bool load)
+ {
+ if (context.CurrOp is OpCode32SimdMemSingle)
+ {
+ OpCode32SimdMemSingle op = (OpCode32SimdMemSingle)context.CurrOp;
+
+ int eBytes = 1 << op.Size;
+
+ Operand n = context.Copy(GetIntA32(context, op.Rn));
+
+ // TODO: Check alignment.
+ int offset = 0;
+ int d = op.Vd;
+
+ for (int i = 0; i < count; i++)
+ {
+ // Accesses an element from a double simd register.
+ Operand address = context.Add(n, Const(offset));
+ if (eBytes == 8)
+ {
+ if (load)
+ {
+ EmitDVectorLoad(context, address, d);
+ }
+ else
+ {
+ EmitDVectorStore(context, address, d);
+ }
+ }
+ else
+ {
+ int index = ((d & 1) << (3 - op.Size)) + op.Index;
+ if (load)
+ {
+ if (op.Replicate)
+ {
+ var regs = (count > 1) ? 1 : op.Increment;
+ for (int reg = 0; reg < regs; reg++)
+ {
+ int dreg = reg + d;
+ int rIndex = ((dreg & 1) << (3 - op.Size));
+ int limit = rIndex + (1 << (3 - op.Size));
+
+ while (rIndex < limit)
+ {
+ EmitLoadSimd(context, address, GetVecA32(dreg >> 1), dreg >> 1, rIndex++, op.Size);
+ }
+ }
+ }
+ else
+ {
+ EmitLoadSimd(context, address, GetVecA32(d >> 1), d >> 1, index, op.Size);
+ }
+ }
+ else
+ {
+ EmitStoreSimd(context, address, d >> 1, index, op.Size);
+ }
+ }
+ offset += eBytes;
+ d += op.Increment;
+ }
+
+ if (op.WBack)
+ {
+ if (op.RegisterIndex)
+ {
+ Operand m = GetIntA32(context, op.Rm);
+ SetIntA32(context, op.Rn, context.Add(n, m));
+ }
+ else
+ {
+ SetIntA32(context, op.Rn, context.Add(n, Const(count * eBytes)));
+ }
+ }
+ }
+ else
+ {
+ OpCode32SimdMemPair op = (OpCode32SimdMemPair)context.CurrOp;
+
+ int increment = count > 1 ? op.Increment : 1;
+ int eBytes = 1 << op.Size;
+
+ Operand n = context.Copy(GetIntA32(context, op.Rn));
+ int offset = 0;
+ int d = op.Vd;
+
+ for (int reg = 0; reg < op.Regs; reg++)
+ {
+ for (int elem = 0; elem < op.Elems; elem++)
+ {
+ int elemD = d + reg;
+ for (int i = 0; i < count; i++)
+ {
+ // Accesses an element from a double simd register,
+ // add ebytes for each element.
+ Operand address = context.Add(n, Const(offset));
+ int index = ((elemD & 1) << (3 - op.Size)) + elem;
+ if (eBytes == 8)
+ {
+ if (load)
+ {
+ EmitDVectorLoad(context, address, elemD);
+ }
+ else
+ {
+ EmitDVectorStore(context, address, elemD);
+ }
+ }
+ else
+ {
+ if (load)
+ {
+ EmitLoadSimd(context, address, GetVecA32(elemD >> 1), elemD >> 1, index, op.Size);
+ }
+ else
+ {
+ EmitStoreSimd(context, address, elemD >> 1, index, op.Size);
+ }
+ }
+
+ offset += eBytes;
+ elemD += increment;
+ }
+ }
+ }
+
+ if (op.WBack)
+ {
+ if (op.RegisterIndex)
+ {
+ Operand m = GetIntA32(context, op.Rm);
+ SetIntA32(context, op.Rn, context.Add(n, m));
+ }
+ else
+ {
+ SetIntA32(context, op.Rn, context.Add(n, Const(count * 8 * op.Regs)));
+ }
+ }
+ }
+ }
+
+ public static void Vldm(ArmEmitterContext context)
+ {
+ OpCode32SimdMemMult op = (OpCode32SimdMemMult)context.CurrOp;
+
+ Operand n = context.Copy(GetIntA32(context, op.Rn));
+
+ Operand baseAddress = context.Add(n, Const(op.Offset));
+
+ bool writeBack = op.PostOffset != 0;
+
+ if (writeBack)
+ {
+ SetIntA32(context, op.Rn, context.Add(n, Const(op.PostOffset)));
+ }
+
+ int range = op.RegisterRange;
+
+ int sReg = (op.DoubleWidth) ? (op.Vd << 1) : op.Vd;
+ int offset = 0;
+ int byteSize = 4;
+
+ for (int num = 0; num < range; num++, sReg++)
+ {
+ Operand address = context.Add(baseAddress, Const(offset));
+ Operand vec = GetVecA32(sReg >> 2);
+
+ EmitLoadSimd(context, address, vec, sReg >> 2, sReg & 3, WordSizeLog2);
+ offset += byteSize;
+ }
+ }
+
+ public static void Vstm(ArmEmitterContext context)
+ {
+ OpCode32SimdMemMult op = (OpCode32SimdMemMult)context.CurrOp;
+
+ Operand n = context.Copy(GetIntA32(context, op.Rn));
+
+ Operand baseAddress = context.Add(n, Const(op.Offset));
+
+ bool writeBack = op.PostOffset != 0;
+
+ if (writeBack)
+ {
+ SetIntA32(context, op.Rn, context.Add(n, Const(op.PostOffset)));
+ }
+
+ int offset = 0;
+
+ int range = op.RegisterRange;
+ int sReg = (op.DoubleWidth) ? (op.Vd << 1) : op.Vd;
+ int byteSize = 4;
+
+ for (int num = 0; num < range; num++, sReg++)
+ {
+ Operand address = context.Add(baseAddress, Const(offset));
+
+ EmitStoreSimd(context, address, sReg >> 2, sReg & 3, WordSizeLog2);
+
+ offset += byteSize;
+ }
+ }
+
+ public static void Vldr(ArmEmitterContext context)
+ {
+ EmitVLoadOrStore(context, AccessType.Load);
+ }
+
+ public static void Vstr(ArmEmitterContext context)
+ {
+ EmitVLoadOrStore(context, AccessType.Store);
+ }
+
+ private static void EmitDVectorStore(ArmEmitterContext context, Operand address, int vecD)
+ {
+ int vecQ = vecD >> 1;
+ int vecSElem = (vecD & 1) << 1;
+ Operand lblBigEndian = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag));
+
+ EmitStoreSimd(context, address, vecQ, vecSElem, WordSizeLog2);
+ EmitStoreSimd(context, context.Add(address, Const(4)), vecQ, vecSElem | 1, WordSizeLog2);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblBigEndian);
+
+ EmitStoreSimd(context, address, vecQ, vecSElem | 1, WordSizeLog2);
+ EmitStoreSimd(context, context.Add(address, Const(4)), vecQ, vecSElem, WordSizeLog2);
+
+ context.MarkLabel(lblEnd);
+ }
+
+ private static void EmitDVectorLoad(ArmEmitterContext context, Operand address, int vecD)
+ {
+ int vecQ = vecD >> 1;
+ int vecSElem = (vecD & 1) << 1;
+ Operand vec = GetVecA32(vecQ);
+
+ Operand lblBigEndian = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag));
+
+ EmitLoadSimd(context, address, vec, vecQ, vecSElem, WordSizeLog2);
+ EmitLoadSimd(context, context.Add(address, Const(4)), vec, vecQ, vecSElem | 1, WordSizeLog2);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblBigEndian);
+
+ EmitLoadSimd(context, address, vec, vecQ, vecSElem | 1, WordSizeLog2);
+ EmitLoadSimd(context, context.Add(address, Const(4)), vec, vecQ, vecSElem, WordSizeLog2);
+
+ context.MarkLabel(lblEnd);
+ }
+
+ private static void EmitVLoadOrStore(ArmEmitterContext context, AccessType accType)
+ {
+ OpCode32SimdMemImm op = (OpCode32SimdMemImm)context.CurrOp;
+
+ Operand n = context.Copy(GetIntA32(context, op.Rn));
+ Operand m = GetMemM(context, setCarry: false);
+
+ Operand address = op.Add
+ ? context.Add(n, m)
+ : context.Subtract(n, m);
+
+ int size = op.Size;
+
+ if ((accType & AccessType.Load) != 0)
+ {
+ if (size == DWordSizeLog2)
+ {
+ EmitDVectorLoad(context, address, op.Vd);
+ }
+ else
+ {
+ Operand vec = GetVecA32(op.Vd >> 2);
+ EmitLoadSimd(context, address, vec, op.Vd >> 2, (op.Vd & 3) << (2 - size), size);
+ }
+ }
+ else
+ {
+ if (size == DWordSizeLog2)
+ {
+ EmitDVectorStore(context, address, op.Vd);
+ }
+ else
+ {
+ EmitStoreSimd(context, address, op.Vd >> 2, (op.Vd & 3) << (2 - size), size);
+ }
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdMove.cs b/src/ARMeilleure/Instructions/InstEmitSimdMove.cs
new file mode 100644
index 00000000..b58a32f6
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdMove.cs
@@ -0,0 +1,850 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System.Collections.Generic;
+using System.Reflection;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+#region "Masks"
+ private static readonly long[] _masksE0_Uzp = new long[]
+ {
+ 13L << 56 | 09L << 48 | 05L << 40 | 01L << 32 | 12L << 24 | 08L << 16 | 04L << 8 | 00L << 0,
+ 11L << 56 | 10L << 48 | 03L << 40 | 02L << 32 | 09L << 24 | 08L << 16 | 01L << 8 | 00L << 0
+ };
+
+ private static readonly long[] _masksE1_Uzp = new long[]
+ {
+ 15L << 56 | 11L << 48 | 07L << 40 | 03L << 32 | 14L << 24 | 10L << 16 | 06L << 8 | 02L << 0,
+ 15L << 56 | 14L << 48 | 07L << 40 | 06L << 32 | 13L << 24 | 12L << 16 | 05L << 8 | 04L << 0
+ };
+#endregion
+
+ public static void Dup_Gp(ArmEmitterContext context)
+ {
+ OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ if (Optimizations.UseSse2)
+ {
+ switch (op.Size)
+ {
+ case 0: n = context.ZeroExtend8 (n.Type, n); n = context.Multiply(n, Const(n.Type, 0x01010101)); break;
+ case 1: n = context.ZeroExtend16(n.Type, n); n = context.Multiply(n, Const(n.Type, 0x00010001)); break;
+ case 2: n = context.ZeroExtend32(n.Type, n); break;
+ }
+
+ Operand res = context.VectorInsert(context.VectorZero(), n, 0);
+
+ if (op.Size < 3)
+ {
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0xf0));
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0));
+ }
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Movlhps, res, res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ res = EmitVectorInsert(context, res, n, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ public static void Dup_S(ArmEmitterContext context)
+ {
+ OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
+
+ Operand ne = EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size);
+
+ context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), ne, 0, op.Size));
+ }
+
+ public static void Dup_V(ArmEmitterContext context)
+ {
+ OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
+
+ if (Optimizations.UseSse2)
+ {
+ Operand res = GetVec(op.Rn);
+
+ if (op.Size == 0)
+ {
+ if (op.DstIndex != 0)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Psrldq, res, Const(op.DstIndex));
+ }
+
+ res = context.AddIntrinsic(Intrinsic.X86Punpcklbw, res, res);
+ res = context.AddIntrinsic(Intrinsic.X86Punpcklwd, res, res);
+ res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0));
+ }
+ else if (op.Size == 1)
+ {
+ if (op.DstIndex != 0)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Psrldq, res, Const(op.DstIndex * 2));
+ }
+
+ res = context.AddIntrinsic(Intrinsic.X86Punpcklwd, res, res);
+ res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0));
+ }
+ else if (op.Size == 2)
+ {
+ int mask = op.DstIndex * 0b01010101;
+
+ res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(mask));
+ }
+ else if (op.DstIndex == 0 && op.RegisterSize != RegisterSize.Simd64)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Movlhps, res, res);
+ }
+ else if (op.DstIndex == 1)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Movhlps, res, res);
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size);
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ res = EmitVectorInsert(context, res, ne, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ public static void Ext_V(ArmEmitterContext context)
+ {
+ OpCodeSimdExt op = (OpCodeSimdExt)context.CurrOp;
+
+ if (Optimizations.UseSse2)
+ {
+ Operand nShifted = GetVec(op.Rn);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ nShifted = context.VectorZeroUpper64(nShifted);
+ }
+
+ nShifted = context.AddIntrinsic(Intrinsic.X86Psrldq, nShifted, Const(op.Imm4));
+
+ Operand mShifted = GetVec(op.Rm);
+
+ mShifted = context.AddIntrinsic(Intrinsic.X86Pslldq, mShifted, Const(op.GetBytesCount() - op.Imm4));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ mShifted = context.VectorZeroUpper64(mShifted);
+ }
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, mShifted);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand res = context.VectorZero();
+
+ int bytes = op.GetBytesCount();
+
+ int position = op.Imm4 & (bytes - 1);
+
+ for (int index = 0; index < bytes; index++)
+ {
+ int reg = op.Imm4 + index < bytes ? op.Rn : op.Rm;
+
+ Operand e = EmitVectorExtractZx(context, reg, position, 0);
+
+ position = (position + 1) & (bytes - 1);
+
+ res = EmitVectorInsert(context, res, e, index, 0);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ public static void Fcsel_S(ArmEmitterContext context)
+ {
+ OpCodeSimdFcond op = (OpCodeSimdFcond)context.CurrOp;
+
+ Operand lblTrue = Label();
+ Operand lblEnd = Label();
+
+ Operand isTrue = InstEmitFlowHelper.GetCondTrue(context, op.Cond);
+
+ context.BranchIfTrue(lblTrue, isTrue);
+
+ OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64;
+
+ Operand me = context.VectorExtract(type, GetVec(op.Rm), 0);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), me, 0));
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblTrue);
+
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), ne, 0));
+
+ context.MarkLabel(lblEnd);
+ }
+
+ public static void Fmov_Ftoi(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand ne = EmitVectorExtractZx(context, op.Rn, 0, op.Size + 2);
+
+ SetIntOrZR(context, op.Rd, ne);
+ }
+
+ public static void Fmov_Ftoi1(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand ne = EmitVectorExtractZx(context, op.Rn, 1, 3);
+
+ SetIntOrZR(context, op.Rd, ne);
+ }
+
+ public static void Fmov_Itof(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), n, 0, op.Size + 2));
+ }
+
+ public static void Fmov_Itof1(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ context.Copy(d, EmitVectorInsert(context, d, n, 1, 3));
+ }
+
+ public static void Fmov_S(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64;
+
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), ne, 0));
+ }
+
+ public static void Fmov_Si(ArmEmitterContext context)
+ {
+ OpCodeSimdFmov op = (OpCodeSimdFmov)context.CurrOp;
+
+ if (Optimizations.UseSse2)
+ {
+ if (op.Size == 0)
+ {
+ context.Copy(GetVec(op.Rd), X86GetScalar(context, (int)op.Immediate));
+ }
+ else
+ {
+ context.Copy(GetVec(op.Rd), X86GetScalar(context, op.Immediate));
+ }
+ }
+ else
+ {
+ Operand e = Const(op.Immediate);
+
+ Operand res = context.VectorZero();
+
+ res = EmitVectorInsert(context, res, e, 0, op.Size + 2);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ public static void Fmov_Vi(ArmEmitterContext context)
+ {
+ OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
+
+ if (Optimizations.UseSse2)
+ {
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Copy(GetVec(op.Rd), X86GetAllElements(context, op.Immediate));
+ }
+ else
+ {
+ context.Copy(GetVec(op.Rd), X86GetScalar(context, op.Immediate));
+ }
+ }
+ else
+ {
+ Operand e = Const(op.Immediate);
+
+ Operand res = context.VectorZero();
+
+ int elems = op.RegisterSize == RegisterSize.Simd128 ? 2 : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ res = EmitVectorInsert(context, res, e, index, 3);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ public static void Ins_Gp(ArmEmitterContext context)
+ {
+ OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ context.Copy(d, EmitVectorInsert(context, d, n, op.DstIndex, op.Size));
+ }
+
+ public static void Ins_V(ArmEmitterContext context)
+ {
+ OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand ne = EmitVectorExtractZx(context, op.Rn, op.SrcIndex, op.Size);
+
+ context.Copy(d, EmitVectorInsert(context, d, ne, op.DstIndex, op.Size));
+ }
+
+ public static void Movi_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ EmitSse2VectorMoviMvniOp(context, not: false);
+ }
+ else
+ {
+ EmitVectorImmUnaryOp(context, (op1) => op1);
+ }
+ }
+
+ public static void Mvni_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ EmitSse2VectorMoviMvniOp(context, not: true);
+ }
+ else
+ {
+ EmitVectorImmUnaryOp(context, (op1) => context.BitwiseNot(op1));
+ }
+ }
+
+ public static void Smov_S(ArmEmitterContext context)
+ {
+ OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
+
+ Operand ne = EmitVectorExtractSx(context, op.Rn, op.DstIndex, op.Size);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ ne = context.ZeroExtend32(OperandType.I64, ne);
+ }
+
+ SetIntOrZR(context, op.Rd, ne);
+ }
+
+ public static void Tbl_V(ArmEmitterContext context)
+ {
+ EmitTableVectorLookup(context, isTbl: true);
+ }
+
+ public static void Tbx_V(ArmEmitterContext context)
+ {
+ EmitTableVectorLookup(context, isTbl: false);
+ }
+
+ public static void Trn1_V(ArmEmitterContext context)
+ {
+ EmitVectorTranspose(context, part: 0);
+ }
+
+ public static void Trn2_V(ArmEmitterContext context)
+ {
+ EmitVectorTranspose(context, part: 1);
+ }
+
+ public static void Umov_S(ArmEmitterContext context)
+ {
+ OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
+
+ Operand ne = EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size);
+
+ SetIntOrZR(context, op.Rd, ne);
+ }
+
+ public static void Uzp1_V(ArmEmitterContext context)
+ {
+ EmitVectorUnzip(context, part: 0);
+ }
+
+ public static void Uzp2_V(ArmEmitterContext context)
+ {
+ EmitVectorUnzip(context, part: 1);
+ }
+
+ public static void Xtn_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ if (Optimizations.UseSsse3)
+ {
+ Operand d = GetVec(op.Rd);
+
+ Operand res = context.VectorZeroUpper64(d);
+
+ Operand mask = X86GetAllElements(context, EvenMasks[op.Size]);
+
+ Operand res2 = context.AddIntrinsic(Intrinsic.X86Pshufb, GetVec(op.Rn), mask);
+
+ Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128
+ ? Intrinsic.X86Movlhps
+ : Intrinsic.X86Movhlps;
+
+ res = context.AddIntrinsic(movInst, res, res2);
+
+ context.Copy(d, res);
+ }
+ else
+ {
+ int elems = 8 >> op.Size;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ Operand d = GetVec(op.Rd);
+
+ Operand res = part == 0 ? context.VectorZero() : context.Copy(d);
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size + 1);
+
+ res = EmitVectorInsert(context, res, ne, part + index, op.Size);
+ }
+
+ context.Copy(d, res);
+ }
+ }
+
+ public static void Zip1_V(ArmEmitterContext context)
+ {
+ EmitVectorZip(context, part: 0);
+ }
+
+ public static void Zip2_V(ArmEmitterContext context)
+ {
+ EmitVectorZip(context, part: 1);
+ }
+
+ private static void EmitSse2VectorMoviMvniOp(ArmEmitterContext context, bool not)
+ {
+ OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
+
+ long imm = op.Immediate;
+
+ switch (op.Size)
+ {
+ case 0: imm *= 0x01010101; break;
+ case 1: imm *= 0x00010001; break;
+ }
+
+ if (not)
+ {
+ imm = ~imm;
+ }
+
+ Operand mask;
+
+ if (op.Size < 3)
+ {
+ mask = X86GetAllElements(context, (int)imm);
+ }
+ else
+ {
+ mask = X86GetAllElements(context, imm);
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ mask = context.VectorZeroUpper64(mask);
+ }
+
+ context.Copy(GetVec(op.Rd), mask);
+ }
+
+ private static void EmitTableVectorLookup(ArmEmitterContext context, bool isTbl)
+ {
+ OpCodeSimdTbl op = (OpCodeSimdTbl)context.CurrOp;
+
+ if (Optimizations.UseSsse3)
+ {
+ Operand d = GetVec(op.Rd);
+ Operand m = GetVec(op.Rm);
+
+ Operand res;
+
+ Operand mask = X86GetAllElements(context, 0x0F0F0F0F0F0F0F0FL);
+
+ // Fast path for single register table.
+ {
+ Operand n = GetVec(op.Rn);
+
+ Operand mMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, m, mask);
+ mMask = context.AddIntrinsic(Intrinsic.X86Por, mMask, m);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mMask);
+ }
+
+ for (int index = 1; index < op.Size; index++)
+ {
+ Operand ni = GetVec((op.Rn + index) & 0x1F);
+
+ Operand idxMask = X86GetAllElements(context, 0x1010101010101010L * index);
+
+ Operand mSubMask = context.AddIntrinsic(Intrinsic.X86Psubb, m, idxMask);
+
+ Operand mMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, mSubMask, mask);
+ mMask = context.AddIntrinsic(Intrinsic.X86Por, mMask, mSubMask);
+
+ Operand res2 = context.AddIntrinsic(Intrinsic.X86Pshufb, ni, mMask);
+
+ res = context.AddIntrinsic(Intrinsic.X86Por, res, res2);
+ }
+
+ if (!isTbl)
+ {
+ Operand idxMask = X86GetAllElements(context, (0x1010101010101010L * op.Size) - 0x0101010101010101L);
+ Operand zeroMask = context.VectorZero();
+
+ Operand mPosMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, m, idxMask);
+ Operand mNegMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, zeroMask, m);
+
+ Operand mMask = context.AddIntrinsic(Intrinsic.X86Por, mPosMask, mNegMask);
+
+ Operand dMask = context.AddIntrinsic(Intrinsic.X86Pand, d, mMask);
+
+ res = context.AddIntrinsic(Intrinsic.X86Por, res, dMask);
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else
+ {
+ Operand d = GetVec(op.Rd);
+
+ List<Operand> args = new List<Operand>();
+
+ if (!isTbl)
+ {
+ args.Add(d);
+ }
+
+ args.Add(GetVec(op.Rm));
+
+ args.Add(Const(op.RegisterSize == RegisterSize.Simd64 ? 8 : 16));
+
+ for (int index = 0; index < op.Size; index++)
+ {
+ args.Add(GetVec((op.Rn + index) & 0x1F));
+ }
+
+ MethodInfo info = null;
+
+ if (isTbl)
+ {
+ switch (op.Size)
+ {
+ case 1: info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbl1)); break;
+ case 2: info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbl2)); break;
+ case 3: info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbl3)); break;
+ case 4: info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbl4)); break;
+ }
+ }
+ else
+ {
+ switch (op.Size)
+ {
+ case 1: info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbx1)); break;
+ case 2: info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbx2)); break;
+ case 3: info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbx3)); break;
+ case 4: info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbx4)); break;
+ }
+ }
+
+ context.Copy(d, context.Call(info, args.ToArray()));
+ }
+ }
+
+ private static void EmitVectorTranspose(ArmEmitterContext context, int part)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSsse3)
+ {
+ Operand mask = default;
+
+ if (op.Size < 3)
+ {
+ long maskE0 = EvenMasks[op.Size];
+ long maskE1 = OddMasks [op.Size];
+
+ mask = X86GetScalar(context, maskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+ }
+
+ Operand n = GetVec(op.Rn);
+
+ if (op.Size < 3)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask);
+ }
+
+ Operand m = GetVec(op.Rm);
+
+ if (op.Size < 3)
+ {
+ m = context.AddIntrinsic(Intrinsic.X86Pshufb, m, mask);
+ }
+
+ Intrinsic punpckInst = part == 0
+ ? X86PunpcklInstruction[op.Size]
+ : X86PunpckhInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(punpckInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand res = context.VectorZero();
+
+ int pairs = op.GetPairsCount() >> op.Size;
+
+ for (int index = 0; index < pairs; index++)
+ {
+ int pairIndex = index << 1;
+
+ Operand ne = EmitVectorExtractZx(context, op.Rn, pairIndex + part, op.Size);
+ Operand me = EmitVectorExtractZx(context, op.Rm, pairIndex + part, op.Size);
+
+ res = EmitVectorInsert(context, res, ne, pairIndex, op.Size);
+ res = EmitVectorInsert(context, res, me, pairIndex + 1, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ private static void EmitVectorUnzip(ArmEmitterContext context, int part)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSsse3)
+ {
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ Operand mask = default;
+
+ if (op.Size < 3)
+ {
+ long maskE0 = EvenMasks[op.Size];
+ long maskE1 = OddMasks [op.Size];
+
+ mask = X86GetScalar(context, maskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+ }
+
+ Operand n = GetVec(op.Rn);
+
+ if (op.Size < 3)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask);
+ }
+
+ Operand m = GetVec(op.Rm);
+
+ if (op.Size < 3)
+ {
+ m = context.AddIntrinsic(Intrinsic.X86Pshufb, m, mask);
+ }
+
+ Intrinsic punpckInst = part == 0
+ ? Intrinsic.X86Punpcklqdq
+ : Intrinsic.X86Punpckhqdq;
+
+ Operand res = context.AddIntrinsic(punpckInst, n, m);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic punpcklInst = X86PunpcklInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(punpcklInst, n, m);
+
+ if (op.Size < 2)
+ {
+ long maskE0 = _masksE0_Uzp[op.Size];
+ long maskE1 = _masksE1_Uzp[op.Size];
+
+ Operand mask = X86GetScalar(context, maskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pshufb, res, mask);
+ }
+
+ Intrinsic punpckInst = part == 0
+ ? Intrinsic.X86Punpcklqdq
+ : Intrinsic.X86Punpckhqdq;
+
+ res = context.AddIntrinsic(punpckInst, res, context.VectorZero());
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else
+ {
+ Operand res = context.VectorZero();
+
+ int pairs = op.GetPairsCount() >> op.Size;
+
+ for (int index = 0; index < pairs; index++)
+ {
+ int idx = index << 1;
+
+ Operand ne = EmitVectorExtractZx(context, op.Rn, idx + part, op.Size);
+ Operand me = EmitVectorExtractZx(context, op.Rm, idx + part, op.Size);
+
+ res = EmitVectorInsert(context, res, ne, index, op.Size);
+ res = EmitVectorInsert(context, res, me, pairs + index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ private static void EmitVectorZip(ArmEmitterContext context, int part)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse2)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ Intrinsic punpckInst = part == 0
+ ? X86PunpcklInstruction[op.Size]
+ : X86PunpckhInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(punpckInst, n, m);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand res = context.AddIntrinsic(X86PunpcklInstruction[op.Size], n, m);
+
+ Intrinsic punpckInst = part == 0
+ ? Intrinsic.X86Punpcklqdq
+ : Intrinsic.X86Punpckhqdq;
+
+ res = context.AddIntrinsic(punpckInst, res, context.VectorZero());
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else
+ {
+ Operand res = context.VectorZero();
+
+ int pairs = op.GetPairsCount() >> op.Size;
+
+ int baseIndex = part != 0 ? pairs : 0;
+
+ for (int index = 0; index < pairs; index++)
+ {
+ int pairIndex = index << 1;
+
+ Operand ne = EmitVectorExtractZx(context, op.Rn, baseIndex + index, op.Size);
+ Operand me = EmitVectorExtractZx(context, op.Rm, baseIndex + index, op.Size);
+
+ res = EmitVectorInsert(context, res, ne, pairIndex, op.Size);
+ res = EmitVectorInsert(context, res, me, pairIndex + 1, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdMove32.cs b/src/ARMeilleure/Instructions/InstEmitSimdMove32.cs
new file mode 100644
index 00000000..b8b91b31
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdMove32.cs
@@ -0,0 +1,656 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper32;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ #region "Masks"
+ // Same as InstEmitSimdMove, as the instructions do the same thing.
+ private static readonly long[] _masksE0_Uzp = new long[]
+ {
+ 13L << 56 | 09L << 48 | 05L << 40 | 01L << 32 | 12L << 24 | 08L << 16 | 04L << 8 | 00L << 0,
+ 11L << 56 | 10L << 48 | 03L << 40 | 02L << 32 | 09L << 24 | 08L << 16 | 01L << 8 | 00L << 0
+ };
+
+ private static readonly long[] _masksE1_Uzp = new long[]
+ {
+ 15L << 56 | 11L << 48 | 07L << 40 | 03L << 32 | 14L << 24 | 10L << 16 | 06L << 8 | 02L << 0,
+ 15L << 56 | 14L << 48 | 07L << 40 | 06L << 32 | 13L << 24 | 12L << 16 | 05L << 8 | 04L << 0
+ };
+ #endregion
+
+ public static void Vmov_I(ArmEmitterContext context)
+ {
+ EmitVectorImmUnaryOp32(context, (op1) => op1);
+ }
+
+ public static void Vmvn_I(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAvx512Ortho)
+ {
+ EmitVectorUnaryOpSimd32(context, (op1) =>
+ {
+ return context.AddIntrinsic(Intrinsic.X86Vpternlogd, op1, op1, Const(0b01010101));
+ });
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitVectorUnaryOpSimd32(context, (op1) =>
+ {
+ Operand mask = X86GetAllElements(context, -1L);
+ return context.AddIntrinsic(Intrinsic.X86Pandn, op1, mask);
+ });
+ }
+ else
+ {
+ EmitVectorUnaryOpZx32(context, (op1) => context.BitwiseNot(op1));
+ }
+ }
+
+ public static void Vmvn_II(ArmEmitterContext context)
+ {
+ EmitVectorImmUnaryOp32(context, (op1) => context.BitwiseNot(op1));
+ }
+
+ public static void Vmov_GS(ArmEmitterContext context)
+ {
+ OpCode32SimdMovGp op = (OpCode32SimdMovGp)context.CurrOp;
+
+ Operand vec = GetVecA32(op.Vn >> 2);
+ if (op.Op == 1)
+ {
+ // To general purpose.
+ Operand value = context.VectorExtract(OperandType.I32, vec, op.Vn & 0x3);
+ SetIntA32(context, op.Rt, value);
+ }
+ else
+ {
+ // From general purpose.
+ Operand value = GetIntA32(context, op.Rt);
+ context.Copy(vec, context.VectorInsert(vec, value, op.Vn & 0x3));
+ }
+ }
+
+ public static void Vmov_G1(ArmEmitterContext context)
+ {
+ OpCode32SimdMovGpElem op = (OpCode32SimdMovGpElem)context.CurrOp;
+
+ int index = op.Index + ((op.Vd & 1) << (3 - op.Size));
+ if (op.Op == 1)
+ {
+ // To general purpose.
+ Operand value = EmitVectorExtract32(context, op.Vd >> 1, index, op.Size, !op.U);
+ SetIntA32(context, op.Rt, value);
+ }
+ else
+ {
+ // From general purpose.
+ Operand vec = GetVecA32(op.Vd >> 1);
+ Operand value = GetIntA32(context, op.Rt);
+ context.Copy(vec, EmitVectorInsert(context, vec, value, index, op.Size));
+ }
+ }
+
+ public static void Vmov_G2(ArmEmitterContext context)
+ {
+ OpCode32SimdMovGpDouble op = (OpCode32SimdMovGpDouble)context.CurrOp;
+
+ Operand vec = GetVecA32(op.Vm >> 2);
+ int vm1 = op.Vm + 1;
+ bool sameOwnerVec = (op.Vm >> 2) == (vm1 >> 2);
+ Operand vec2 = sameOwnerVec ? vec : GetVecA32(vm1 >> 2);
+ if (op.Op == 1)
+ {
+ // To general purpose.
+ Operand lowValue = context.VectorExtract(OperandType.I32, vec, op.Vm & 3);
+ SetIntA32(context, op.Rt, lowValue);
+
+ Operand highValue = context.VectorExtract(OperandType.I32, vec2, vm1 & 3);
+ SetIntA32(context, op.Rt2, highValue);
+ }
+ else
+ {
+ // From general purpose.
+ Operand lowValue = GetIntA32(context, op.Rt);
+ Operand resultVec = context.VectorInsert(vec, lowValue, op.Vm & 3);
+
+ Operand highValue = GetIntA32(context, op.Rt2);
+
+ if (sameOwnerVec)
+ {
+ context.Copy(vec, context.VectorInsert(resultVec, highValue, vm1 & 3));
+ }
+ else
+ {
+ context.Copy(vec, resultVec);
+ context.Copy(vec2, context.VectorInsert(vec2, highValue, vm1 & 3));
+ }
+ }
+ }
+
+ public static void Vmov_GD(ArmEmitterContext context)
+ {
+ OpCode32SimdMovGpDouble op = (OpCode32SimdMovGpDouble)context.CurrOp;
+
+ Operand vec = GetVecA32(op.Vm >> 1);
+ if (op.Op == 1)
+ {
+ // To general purpose.
+ Operand value = context.VectorExtract(OperandType.I64, vec, op.Vm & 1);
+ SetIntA32(context, op.Rt, context.ConvertI64ToI32(value));
+ SetIntA32(context, op.Rt2, context.ConvertI64ToI32(context.ShiftRightUI(value, Const(32))));
+ }
+ else
+ {
+ // From general purpose.
+ Operand lowValue = GetIntA32(context, op.Rt);
+ Operand highValue = GetIntA32(context, op.Rt2);
+
+ Operand value = context.BitwiseOr(
+ context.ZeroExtend32(OperandType.I64, lowValue),
+ context.ShiftLeft(context.ZeroExtend32(OperandType.I64, highValue), Const(32)));
+
+ context.Copy(vec, context.VectorInsert(vec, value, op.Vm & 1));
+ }
+ }
+
+ public static void Vmovl(ArmEmitterContext context)
+ {
+ OpCode32SimdLong op = (OpCode32SimdLong)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, !op.U);
+
+ if (op.Size == 2)
+ {
+ if (op.U)
+ {
+ me = context.ZeroExtend32(OperandType.I64, me);
+ }
+ else
+ {
+ me = context.SignExtend32(OperandType.I64, me);
+ }
+ }
+
+ res = EmitVectorInsert(context, res, me, index, op.Size + 1);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void Vtbl(ArmEmitterContext context)
+ {
+ OpCode32SimdTbl op = (OpCode32SimdTbl)context.CurrOp;
+
+ bool extension = op.Opc == 1;
+ int length = op.Length + 1;
+
+ if (Optimizations.UseSsse3)
+ {
+ Operand d = GetVecA32(op.Qd);
+ Operand m = EmitMoveDoubleWordToSide(context, GetVecA32(op.Qm), op.Vm, 0);
+
+ Operand res;
+ Operand mask = X86GetAllElements(context, 0x0707070707070707L);
+
+ // Fast path for single register table.
+ {
+ Operand n = EmitMoveDoubleWordToSide(context, GetVecA32(op.Qn), op.Vn, 0);
+
+ Operand mMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, m, mask);
+ mMask = context.AddIntrinsic(Intrinsic.X86Por, mMask, m);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mMask);
+ }
+
+ for (int index = 1; index < length; index++)
+ {
+ int newVn = (op.Vn + index) & 0x1F;
+ (int qn, int ind) = GetQuadwordAndSubindex(newVn, op.RegisterSize);
+ Operand ni = EmitMoveDoubleWordToSide(context, GetVecA32(qn), newVn, 0);
+
+ Operand idxMask = X86GetAllElements(context, 0x0808080808080808L * index);
+
+ Operand mSubMask = context.AddIntrinsic(Intrinsic.X86Psubb, m, idxMask);
+
+ Operand mMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, mSubMask, mask);
+ mMask = context.AddIntrinsic(Intrinsic.X86Por, mMask, mSubMask);
+
+ Operand res2 = context.AddIntrinsic(Intrinsic.X86Pshufb, ni, mMask);
+
+ res = context.AddIntrinsic(Intrinsic.X86Por, res, res2);
+ }
+
+ if (extension)
+ {
+ Operand idxMask = X86GetAllElements(context, (0x0808080808080808L * length) - 0x0101010101010101L);
+ Operand zeroMask = context.VectorZero();
+
+ Operand mPosMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, m, idxMask);
+ Operand mNegMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, zeroMask, m);
+
+ Operand mMask = context.AddIntrinsic(Intrinsic.X86Por, mPosMask, mNegMask);
+
+ Operand dMask = context.AddIntrinsic(Intrinsic.X86Pand, EmitMoveDoubleWordToSide(context, d, op.Vd, 0), mMask);
+
+ res = context.AddIntrinsic(Intrinsic.X86Por, res, dMask);
+ }
+
+ res = EmitMoveDoubleWordToSide(context, res, 0, op.Vd);
+
+ context.Copy(d, EmitDoubleWordInsert(context, d, res, op.Vd));
+ }
+ else
+ {
+ int elems = op.GetBytesCount() >> op.Size;
+
+ (int Qx, int Ix)[] tableTuples = new (int, int)[length];
+ for (int i = 0; i < length; i++)
+ {
+ tableTuples[i] = GetQuadwordAndSubindex(op.Vn + i, op.RegisterSize);
+ }
+
+ int byteLength = length * 8;
+
+ Operand res = GetVecA32(op.Qd);
+ Operand m = GetVecA32(op.Qm);
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand selectedIndex = context.ZeroExtend8(OperandType.I32, context.VectorExtract8(m, index + op.Im));
+
+ Operand inRange = context.ICompareLess(selectedIndex, Const(byteLength));
+ Operand elemRes = default; // Note: This is I64 for ease of calculation.
+
+ // TODO: Branching rather than conditional select.
+
+ // Get indexed byte.
+ // To simplify (ha) the il, we get bytes from every vector and use a nested conditional select to choose the right result.
+ // This does have to extract `length` times for every element but certainly not as bad as it could be.
+
+ // Which vector number is the index on.
+ Operand vecIndex = context.ShiftRightUI(selectedIndex, Const(3));
+ // What should we shift by to extract it.
+ Operand subVecIndexShift = context.ShiftLeft(context.BitwiseAnd(selectedIndex, Const(7)), Const(3));
+
+ for (int i = 0; i < length; i++)
+ {
+ (int qx, int ix) = tableTuples[i];
+ // Get the whole vector, we'll get a byte out of it.
+ Operand lookupResult;
+ if (qx == op.Qd)
+ {
+ // Result contains the current state of the vector.
+ lookupResult = context.VectorExtract(OperandType.I64, res, ix);
+ }
+ else
+ {
+ lookupResult = EmitVectorExtract32(context, qx, ix, 3, false); // I64
+ }
+
+ lookupResult = context.ShiftRightUI(lookupResult, subVecIndexShift); // Get the relevant byte from this vector.
+
+ if (i == 0)
+ {
+ elemRes = lookupResult; // First result is always default.
+ }
+ else
+ {
+ Operand isThisElem = context.ICompareEqual(vecIndex, Const(i));
+ elemRes = context.ConditionalSelect(isThisElem, lookupResult, elemRes);
+ }
+ }
+
+ Operand fallback = (extension) ? context.ZeroExtend32(OperandType.I64, EmitVectorExtract32(context, op.Qd, index + op.Id, 0, false)) : Const(0L);
+
+ res = EmitVectorInsert(context, res, context.ConditionalSelect(inRange, elemRes, fallback), index + op.Id, 0);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+ }
+
+ public static void Vtrn(ArmEmitterContext context)
+ {
+ OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp;
+
+ if (Optimizations.UseSsse3)
+ {
+ EmitVectorShuffleOpSimd32(context, (m, d) =>
+ {
+ Operand mask = default;
+
+ if (op.Size < 3)
+ {
+ long maskE0 = EvenMasks[op.Size];
+ long maskE1 = OddMasks[op.Size];
+
+ mask = X86GetScalar(context, maskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+ }
+
+ if (op.Size < 3)
+ {
+ d = context.AddIntrinsic(Intrinsic.X86Pshufb, d, mask);
+ m = context.AddIntrinsic(Intrinsic.X86Pshufb, m, mask);
+ }
+
+ Operand resD = context.AddIntrinsic(X86PunpcklInstruction[op.Size], d, m);
+ Operand resM = context.AddIntrinsic(X86PunpckhInstruction[op.Size], d, m);
+
+ return (resM, resD);
+ });
+ }
+ else
+ {
+ int elems = op.GetBytesCount() >> op.Size;
+ int pairs = elems >> 1;
+
+ bool overlap = op.Qm == op.Qd;
+
+ Operand resD = GetVecA32(op.Qd);
+ Operand resM = GetVecA32(op.Qm);
+
+ for (int index = 0; index < pairs; index++)
+ {
+ int pairIndex = index << 1;
+ Operand d2 = EmitVectorExtract32(context, op.Qd, pairIndex + 1 + op.Id, op.Size, false);
+ Operand m1 = EmitVectorExtract32(context, op.Qm, pairIndex + op.Im, op.Size, false);
+
+ resD = EmitVectorInsert(context, resD, m1, pairIndex + 1 + op.Id, op.Size);
+
+ if (overlap)
+ {
+ resM = resD;
+ }
+
+ resM = EmitVectorInsert(context, resM, d2, pairIndex + op.Im, op.Size);
+
+ if (overlap)
+ {
+ resD = resM;
+ }
+ }
+
+ context.Copy(GetVecA32(op.Qd), resD);
+ if (!overlap)
+ {
+ context.Copy(GetVecA32(op.Qm), resM);
+ }
+ }
+ }
+
+ public static void Vzip(ArmEmitterContext context)
+ {
+ OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ EmitVectorZipUzpOpSimd32(context, Intrinsic.Arm64Zip1V, Intrinsic.Arm64Zip2V);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitVectorShuffleOpSimd32(context, (m, d) =>
+ {
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ Operand resD = context.AddIntrinsic(X86PunpcklInstruction[op.Size], d, m);
+ Operand resM = context.AddIntrinsic(X86PunpckhInstruction[op.Size], d, m);
+
+ return (resM, resD);
+ }
+ else
+ {
+ Operand res = context.AddIntrinsic(X86PunpcklInstruction[op.Size], d, m);
+
+ Operand resD = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, res, context.VectorZero());
+ Operand resM = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, res, context.VectorZero());
+ return (resM, resD);
+ }
+ });
+ }
+ else
+ {
+ int elems = op.GetBytesCount() >> op.Size;
+ int pairs = elems >> 1;
+
+ bool overlap = op.Qm == op.Qd;
+
+ Operand resD = GetVecA32(op.Qd);
+ Operand resM = GetVecA32(op.Qm);
+
+ for (int index = 0; index < pairs; index++)
+ {
+ int pairIndex = index << 1;
+ Operand dRowD = EmitVectorExtract32(context, op.Qd, index + op.Id, op.Size, false);
+ Operand mRowD = EmitVectorExtract32(context, op.Qm, index + op.Im, op.Size, false);
+
+ Operand dRowM = EmitVectorExtract32(context, op.Qd, index + op.Id + pairs, op.Size, false);
+ Operand mRowM = EmitVectorExtract32(context, op.Qm, index + op.Im + pairs, op.Size, false);
+
+ resD = EmitVectorInsert(context, resD, dRowD, pairIndex + op.Id, op.Size);
+ resD = EmitVectorInsert(context, resD, mRowD, pairIndex + 1 + op.Id, op.Size);
+
+ if (overlap)
+ {
+ resM = resD;
+ }
+
+ resM = EmitVectorInsert(context, resM, dRowM, pairIndex + op.Im, op.Size);
+ resM = EmitVectorInsert(context, resM, mRowM, pairIndex + 1 + op.Im, op.Size);
+
+ if (overlap)
+ {
+ resD = resM;
+ }
+ }
+
+ context.Copy(GetVecA32(op.Qd), resD);
+ if (!overlap)
+ {
+ context.Copy(GetVecA32(op.Qm), resM);
+ }
+ }
+ }
+
+ public static void Vuzp(ArmEmitterContext context)
+ {
+ OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ EmitVectorZipUzpOpSimd32(context, Intrinsic.Arm64Uzp1V, Intrinsic.Arm64Uzp2V);
+ }
+ else if (Optimizations.UseSsse3)
+ {
+ EmitVectorShuffleOpSimd32(context, (m, d) =>
+ {
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ Operand mask = default;
+
+ if (op.Size < 3)
+ {
+ long maskE0 = EvenMasks[op.Size];
+ long maskE1 = OddMasks[op.Size];
+
+ mask = X86GetScalar(context, maskE0);
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+
+ d = context.AddIntrinsic(Intrinsic.X86Pshufb, d, mask);
+ m = context.AddIntrinsic(Intrinsic.X86Pshufb, m, mask);
+ }
+
+ Operand resD = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, d, m);
+ Operand resM = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, d, m);
+
+ return (resM, resD);
+ }
+ else
+ {
+ Intrinsic punpcklInst = X86PunpcklInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(punpcklInst, d, m);
+
+ if (op.Size < 2)
+ {
+ long maskE0 = _masksE0_Uzp[op.Size];
+ long maskE1 = _masksE1_Uzp[op.Size];
+
+ Operand mask = X86GetScalar(context, maskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pshufb, res, mask);
+ }
+
+ Operand resD = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, res, context.VectorZero());
+ Operand resM = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, res, context.VectorZero());
+
+ return (resM, resD);
+ }
+ });
+ }
+ else
+ {
+ int elems = op.GetBytesCount() >> op.Size;
+ int pairs = elems >> 1;
+
+ bool overlap = op.Qm == op.Qd;
+
+ Operand resD = GetVecA32(op.Qd);
+ Operand resM = GetVecA32(op.Qm);
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand dIns, mIns;
+ if (index >= pairs)
+ {
+ int pairIndex = index - pairs;
+ dIns = EmitVectorExtract32(context, op.Qm, (pairIndex << 1) + op.Im, op.Size, false);
+ mIns = EmitVectorExtract32(context, op.Qm, ((pairIndex << 1) | 1) + op.Im, op.Size, false);
+ }
+ else
+ {
+ dIns = EmitVectorExtract32(context, op.Qd, (index << 1) + op.Id, op.Size, false);
+ mIns = EmitVectorExtract32(context, op.Qd, ((index << 1) | 1) + op.Id, op.Size, false);
+ }
+
+ resD = EmitVectorInsert(context, resD, dIns, index + op.Id, op.Size);
+
+ if (overlap)
+ {
+ resM = resD;
+ }
+
+ resM = EmitVectorInsert(context, resM, mIns, index + op.Im, op.Size);
+
+ if (overlap)
+ {
+ resD = resM;
+ }
+ }
+
+ context.Copy(GetVecA32(op.Qd), resD);
+ if (!overlap)
+ {
+ context.Copy(GetVecA32(op.Qm), resM);
+ }
+ }
+ }
+
+ private static void EmitVectorZipUzpOpSimd32(ArmEmitterContext context, Intrinsic inst1, Intrinsic inst2)
+ {
+ OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp;
+
+ bool overlap = op.Qm == op.Qd;
+
+ Operand d = GetVecA32(op.Qd);
+ Operand m = GetVecA32(op.Qm);
+
+ Operand dPart = d;
+ Operand mPart = m;
+
+ if (!op.Q) // Register swap: move relevant doubleword to destination side.
+ {
+ dPart = InstEmitSimdHelper32Arm64.EmitMoveDoubleWordToSide(context, d, op.Vd, 0);
+ mPart = InstEmitSimdHelper32Arm64.EmitMoveDoubleWordToSide(context, m, op.Vm, 0);
+ }
+
+ Intrinsic vSize = op.Q ? Intrinsic.Arm64V128 : Intrinsic.Arm64V64;
+
+ vSize |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ Operand resD = context.AddIntrinsic(inst1 | vSize, dPart, mPart);
+ Operand resM = context.AddIntrinsic(inst2 | vSize, dPart, mPart);
+
+ if (!op.Q) // Register insert.
+ {
+ resD = context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, d, Const(op.Vd & 1), resD, Const(0));
+
+ if (overlap)
+ {
+ resD = context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, resD, Const(op.Vm & 1), resM, Const(0));
+ }
+ else
+ {
+ resM = context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, m, Const(op.Vm & 1), resM, Const(0));
+ }
+ }
+
+ context.Copy(d, resD);
+ if (!overlap)
+ {
+ context.Copy(m, resM);
+ }
+ }
+
+ private static void EmitVectorShuffleOpSimd32(ArmEmitterContext context, Func<Operand, Operand, (Operand, Operand)> shuffleFunc)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ Operand m = GetVecA32(op.Qm);
+ Operand d = GetVecA32(op.Qd);
+ Operand initialM = m;
+ Operand initialD = d;
+
+ if (!op.Q) // Register swap: move relevant doubleword to side 0, for consistency.
+ {
+ m = EmitMoveDoubleWordToSide(context, m, op.Vm, 0);
+ d = EmitMoveDoubleWordToSide(context, d, op.Vd, 0);
+ }
+
+ (Operand resM, Operand resD) = shuffleFunc(m, d);
+
+ bool overlap = op.Qm == op.Qd;
+
+ if (!op.Q) // Register insert.
+ {
+ resM = EmitDoubleWordInsert(context, initialM, EmitMoveDoubleWordToSide(context, resM, 0, op.Vm), op.Vm);
+ resD = EmitDoubleWordInsert(context, overlap ? resM : initialD, EmitMoveDoubleWordToSide(context, resD, 0, op.Vd), op.Vd);
+ }
+
+ if (!overlap)
+ {
+ context.Copy(initialM, resM);
+ }
+
+ context.Copy(initialD, resD);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdShift.cs b/src/ARMeilleure/Instructions/InstEmitSimdShift.cs
new file mode 100644
index 00000000..19e41119
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdShift.cs
@@ -0,0 +1,1827 @@
+// https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h
+
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+using System.Reflection;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ using Func2I = Func<Operand, Operand, Operand>;
+
+ static partial class InstEmit
+ {
+#region "Masks"
+ private static readonly long[] _masks_SliSri = new long[] // Replication masks.
+ {
+ 0x0101010101010101L, 0x0001000100010001L, 0x0000000100000001L, 0x0000000000000001L
+ };
+#endregion
+
+ public static void Rshrn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64RshrnV, shift);
+ }
+ else if (Optimizations.UseSsse3)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ long roundConst = 1L << (shift - 1);
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Operand dLow = context.VectorZeroUpper64(d);
+
+ Operand mask = default;
+
+ switch (op.Size + 1)
+ {
+ case 1: mask = X86GetAllElements(context, (int)roundConst * 0x00010001); break;
+ case 2: mask = X86GetAllElements(context, (int)roundConst); break;
+ case 3: mask = X86GetAllElements(context, roundConst); break;
+ }
+
+ Intrinsic addInst = X86PaddInstruction[op.Size + 1];
+
+ Operand res = context.AddIntrinsic(addInst, n, mask);
+
+ Intrinsic srlInst = X86PsrlInstruction[op.Size + 1];
+
+ res = context.AddIntrinsic(srlInst, res, Const(shift));
+
+ Operand mask2 = X86GetAllElements(context, EvenMasks[op.Size]);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pshufb, res, mask2);
+
+ Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128
+ ? Intrinsic.X86Movlhps
+ : Intrinsic.X86Movhlps;
+
+ res = context.AddIntrinsic(movInst, dLow, res);
+
+ context.Copy(d, res);
+ }
+ else
+ {
+ EmitVectorShrImmNarrowOpZx(context, round: true);
+ }
+ }
+
+ public static void Shl_S(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShl(op);
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64ShlS, shift);
+ }
+ else
+ {
+ EmitScalarUnaryOpZx(context, (op1) => context.ShiftLeft(op1, Const(shift)));
+ }
+ }
+
+ public static void Shl_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShl(op);
+ int eSize = 8 << op.Size;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64ShlV, shift);
+ }
+ else if (shift >= eSize)
+ {
+ if ((op.RegisterSize == RegisterSize.Simd64))
+ {
+ Operand res = context.VectorZeroUpper64(GetVec(op.Rd));
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else if (Optimizations.UseGfni && op.Size == 0)
+ {
+ Operand n = GetVec(op.Rn);
+
+ ulong bitMatrix = X86GetGf2p8LogicalShiftLeft(shift);
+
+ Operand vBitMatrix = X86GetElements(context, bitMatrix, bitMatrix);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Gf2p8affineqb, n, vBitMatrix, Const(0));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else if (Optimizations.UseSse2 && op.Size > 0)
+ {
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic sllInst = X86PsllInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(sllInst, n, Const(shift));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorUnaryOpZx(context, (op1) => context.ShiftLeft(op1, Const(shift)));
+ }
+ }
+
+ public static void Shll_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int shift = 8 << op.Size;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64ShllV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ Operand n = GetVec(op.Rn);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ }
+
+ Intrinsic movsxInst = X86PmovsxInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(movsxInst, n);
+
+ Intrinsic sllInst = X86PsllInstruction[op.Size + 1];
+
+ res = context.AddIntrinsic(sllInst, res, Const(shift));
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorShImmWidenBinaryZx(context, (op1, op2) => context.ShiftLeft(op1, op2), shift);
+ }
+ }
+
+ public static void Shrn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64ShrnV, shift);
+ }
+ else if (Optimizations.UseSsse3)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Operand dLow = context.VectorZeroUpper64(d);
+
+ Intrinsic srlInst = X86PsrlInstruction[op.Size + 1];
+
+ Operand nShifted = context.AddIntrinsic(srlInst, n, Const(shift));
+
+ Operand mask = X86GetAllElements(context, EvenMasks[op.Size]);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, nShifted, mask);
+
+ Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128
+ ? Intrinsic.X86Movlhps
+ : Intrinsic.X86Movhlps;
+
+ res = context.AddIntrinsic(movInst, dLow, res);
+
+ context.Copy(d, res);
+ }
+ else
+ {
+ EmitVectorShrImmNarrowOpZx(context, round: false);
+ }
+ }
+
+ public static void Sli_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShl(op);
+
+ InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64SliS, shift);
+ }
+ else
+ {
+ EmitSli(context, scalar: true);
+ }
+ }
+
+ public static void Sli_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShl(op);
+
+ InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64SliV, shift);
+ }
+ else
+ {
+ EmitSli(context, scalar: false);
+ }
+ }
+
+ public static void Sqrshl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqrshlV);
+ }
+ else
+ {
+ EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Round | ShlRegFlags.Saturating);
+ }
+ }
+
+ public static void Sqrshrn_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqrshrnS, shift);
+ }
+ else
+ {
+ EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx);
+ }
+ }
+
+ public static void Sqrshrn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqrshrnV, shift);
+ }
+ else
+ {
+ EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx);
+ }
+ }
+
+ public static void Sqrshrun_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqrshrunS, shift);
+ }
+ else
+ {
+ EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx);
+ }
+ }
+
+ public static void Sqrshrun_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqrshrunV, shift);
+ }
+ else
+ {
+ EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
+ }
+ }
+
+ public static void Sqshl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqshlV);
+ }
+ else
+ {
+ EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Saturating);
+ }
+ }
+
+ public static void Sqshrn_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqshrnS, shift);
+ }
+ else
+ {
+ EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx);
+ }
+ }
+
+ public static void Sqshrn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqshrnV, shift);
+ }
+ else
+ {
+ EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx);
+ }
+ }
+
+ public static void Sqshrun_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqshrunS, shift);
+ }
+ else
+ {
+ EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx);
+ }
+ }
+
+ public static void Sqshrun_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqshrunV, shift);
+ }
+ else
+ {
+ EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
+ }
+ }
+
+ public static void Sri_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64SriS, shift);
+ }
+ else
+ {
+ EmitSri(context, scalar: true);
+ }
+ }
+
+ public static void Sri_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64SriV, shift);
+ }
+ else
+ {
+ EmitSri(context, scalar: false);
+ }
+ }
+
+ public static void Srshl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SrshlV);
+ }
+ else
+ {
+ EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Round);
+ }
+ }
+
+ public static void Srshr_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64SrshrS, shift);
+ }
+ else
+ {
+ EmitScalarShrImmOpSx(context, ShrImmFlags.Round);
+ }
+ }
+
+ public static void Srshr_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64SrshrV, shift);
+ }
+ else if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
+ {
+ int shift = GetImmShr(op);
+ int eSize = 8 << op.Size;
+
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic sllInst = X86PsllInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift));
+
+ Intrinsic srlInst = X86PsrlInstruction[op.Size];
+
+ res = context.AddIntrinsic(srlInst, res, Const(eSize - 1));
+
+ Intrinsic sraInst = X86PsraInstruction[op.Size];
+
+ Operand nSra = context.AddIntrinsic(sraInst, n, Const(shift));
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, res, nSra);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorShrImmOpSx(context, ShrImmFlags.Round);
+ }
+ }
+
+ public static void Srsra_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64SrsraS, shift);
+ }
+ else
+ {
+ EmitScalarShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
+ }
+ }
+
+ public static void Srsra_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64SrsraV, shift);
+ }
+ else if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
+ {
+ int shift = GetImmShr(op);
+ int eSize = 8 << op.Size;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic sllInst = X86PsllInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift));
+
+ Intrinsic srlInst = X86PsrlInstruction[op.Size];
+
+ res = context.AddIntrinsic(srlInst, res, Const(eSize - 1));
+
+ Intrinsic sraInst = X86PsraInstruction[op.Size];
+
+ Operand nSra = context.AddIntrinsic(sraInst, n, Const(shift));
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, res, nSra);
+ res = context.AddIntrinsic(addInst, res, d);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else
+ {
+ EmitVectorShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
+ }
+ }
+
+ public static void Sshl_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOp(context, Intrinsic.Arm64SshlS);
+ }
+ else
+ {
+ EmitShlRegOp(context, ShlRegFlags.Scalar | ShlRegFlags.Signed);
+ }
+ }
+
+ public static void Sshl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SshlV);
+ }
+ else
+ {
+ EmitShlRegOp(context, ShlRegFlags.Signed);
+ }
+ }
+
+ public static void Sshll_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShl(op);
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64SshllV, shift);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ Operand n = GetVec(op.Rn);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ }
+
+ Intrinsic movsxInst = X86PmovsxInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(movsxInst, n);
+
+ if (shift != 0)
+ {
+ Intrinsic sllInst = X86PsllInstruction[op.Size + 1];
+
+ res = context.AddIntrinsic(sllInst, res, Const(shift));
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorShImmWidenBinarySx(context, (op1, op2) => context.ShiftLeft(op1, op2), shift);
+ }
+ }
+
+ public static void Sshr_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64SshrS, shift);
+ }
+ else
+ {
+ EmitShrImmOp(context, ShrImmFlags.ScalarSx);
+ }
+ }
+
+ public static void Sshr_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64SshrV, shift);
+ }
+ else if (Optimizations.UseGfni && op.Size == 0)
+ {
+ Operand n = GetVec(op.Rn);
+
+ ulong bitMatrix;
+
+ if (shift < 8)
+ {
+ bitMatrix = X86GetGf2p8LogicalShiftLeft(-shift);
+
+ // Extend sign-bit
+ bitMatrix |= 0x8080808080808080UL >> (64 - shift * 8);
+ }
+ else
+ {
+ // Replicate sign-bit into all bits
+ bitMatrix = 0x8080808080808080UL;
+ }
+
+ Operand vBitMatrix = X86GetElements(context, bitMatrix, bitMatrix);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Gf2p8affineqb, n, vBitMatrix, Const(0));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
+ {
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic sraInst = X86PsraInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(sraInst, n, Const(shift));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitShrImmOp(context, ShrImmFlags.VectorSx);
+ }
+ }
+
+ public static void Ssra_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64SsraS, shift);
+ }
+ else
+ {
+ EmitScalarShrImmOpSx(context, ShrImmFlags.Accumulate);
+ }
+ }
+
+ public static void Ssra_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64SsraV, shift);
+ }
+ else if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
+ {
+ int shift = GetImmShr(op);
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic sraInst = X86PsraInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(sraInst, n, Const(shift));
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, res, d);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else
+ {
+ EmitVectorShrImmOpSx(context, ShrImmFlags.Accumulate);
+ }
+ }
+
+ public static void Uqrshl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64UqrshlV);
+ }
+ else
+ {
+ EmitShlRegOp(context, ShlRegFlags.Round | ShlRegFlags.Saturating);
+ }
+ }
+
+ public static void Uqrshrn_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64UqrshrnS, shift);
+ }
+ else
+ {
+ EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx);
+ }
+ }
+
+ public static void Uqrshrn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64UqrshrnV, shift);
+ }
+ else
+ {
+ EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx);
+ }
+ }
+
+ public static void Uqshl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64UqshlV);
+ }
+ else
+ {
+ EmitShlRegOp(context, ShlRegFlags.Saturating);
+ }
+ }
+
+ public static void Uqshrn_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64UqshrnS, shift);
+ }
+ else
+ {
+ EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx);
+ }
+ }
+
+ public static void Uqshrn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64UqshrnV, shift);
+ }
+ else
+ {
+ EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx);
+ }
+ }
+
+ public static void Urshl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UrshlV);
+ }
+ else
+ {
+ EmitShlRegOp(context, ShlRegFlags.Round);
+ }
+ }
+
+ public static void Urshr_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64UrshrS, shift);
+ }
+ else
+ {
+ EmitScalarShrImmOpZx(context, ShrImmFlags.Round);
+ }
+ }
+
+ public static void Urshr_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64UrshrV, shift);
+ }
+ else if (Optimizations.UseSse2 && op.Size > 0)
+ {
+ int shift = GetImmShr(op);
+ int eSize = 8 << op.Size;
+
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic sllInst = X86PsllInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift));
+
+ Intrinsic srlInst = X86PsrlInstruction[op.Size];
+
+ res = context.AddIntrinsic(srlInst, res, Const(eSize - 1));
+
+ Operand nSrl = context.AddIntrinsic(srlInst, n, Const(shift));
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, res, nSrl);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorShrImmOpZx(context, ShrImmFlags.Round);
+ }
+ }
+
+ public static void Ursra_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64UrsraS, shift);
+ }
+ else
+ {
+ EmitScalarShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
+ }
+ }
+
+ public static void Ursra_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64UrsraV, shift);
+ }
+ else if (Optimizations.UseSse2 && op.Size > 0)
+ {
+ int shift = GetImmShr(op);
+ int eSize = 8 << op.Size;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic sllInst = X86PsllInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift));
+
+ Intrinsic srlInst = X86PsrlInstruction[op.Size];
+
+ res = context.AddIntrinsic(srlInst, res, Const(eSize - 1));
+
+ Operand nSrl = context.AddIntrinsic(srlInst, n, Const(shift));
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, res, nSrl);
+ res = context.AddIntrinsic(addInst, res, d);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else
+ {
+ EmitVectorShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
+ }
+ }
+
+ public static void Ushl_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOp(context, Intrinsic.Arm64UshlS);
+ }
+ else
+ {
+ EmitShlRegOp(context, ShlRegFlags.Scalar);
+ }
+ }
+
+ public static void Ushl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UshlV);
+ }
+ else
+ {
+ EmitShlRegOp(context, ShlRegFlags.None);
+ }
+ }
+
+ public static void Ushll_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShl(op);
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64UshllV, shift);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ Operand n = GetVec(op.Rn);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ }
+
+ Intrinsic movzxInst = X86PmovzxInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(movzxInst, n);
+
+ if (shift != 0)
+ {
+ Intrinsic sllInst = X86PsllInstruction[op.Size + 1];
+
+ res = context.AddIntrinsic(sllInst, res, Const(shift));
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorShImmWidenBinaryZx(context, (op1, op2) => context.ShiftLeft(op1, op2), shift);
+ }
+ }
+
+ public static void Ushr_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64UshrS, shift);
+ }
+ else
+ {
+ EmitShrImmOp(context, ShrImmFlags.ScalarZx);
+ }
+ }
+
+ public static void Ushr_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64UshrV, shift);
+ }
+ else if (Optimizations.UseSse2 && op.Size > 0)
+ {
+ int shift = GetImmShr(op);
+
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic srlInst = X86PsrlInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(srlInst, n, Const(shift));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitShrImmOp(context, ShrImmFlags.VectorZx);
+ }
+ }
+
+ public static void Usra_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64UsraS, shift);
+ }
+ else
+ {
+ EmitScalarShrImmOpZx(context, ShrImmFlags.Accumulate);
+ }
+ }
+
+ public static void Usra_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64UsraV, shift);
+ }
+ else if (Optimizations.UseSse2 && op.Size > 0)
+ {
+ int shift = GetImmShr(op);
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic srlInst = X86PsrlInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(srlInst, n, Const(shift));
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, res, d);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else
+ {
+ EmitVectorShrImmOpZx(context, ShrImmFlags.Accumulate);
+ }
+ }
+
+ [Flags]
+ private enum ShrImmFlags
+ {
+ Scalar = 1 << 0,
+ Signed = 1 << 1,
+
+ Round = 1 << 2,
+ Accumulate = 1 << 3,
+
+ ScalarSx = Scalar | Signed,
+ ScalarZx = Scalar,
+
+ VectorSx = Signed,
+ VectorZx = 0
+ }
+
+ private static void EmitScalarShrImmOpSx(ArmEmitterContext context, ShrImmFlags flags)
+ {
+ EmitShrImmOp(context, ShrImmFlags.ScalarSx | flags);
+ }
+
+ private static void EmitScalarShrImmOpZx(ArmEmitterContext context, ShrImmFlags flags)
+ {
+ EmitShrImmOp(context, ShrImmFlags.ScalarZx | flags);
+ }
+
+ private static void EmitVectorShrImmOpSx(ArmEmitterContext context, ShrImmFlags flags)
+ {
+ EmitShrImmOp(context, ShrImmFlags.VectorSx | flags);
+ }
+
+ private static void EmitVectorShrImmOpZx(ArmEmitterContext context, ShrImmFlags flags)
+ {
+ EmitShrImmOp(context, ShrImmFlags.VectorZx | flags);
+ }
+
+ private static void EmitShrImmOp(ArmEmitterContext context, ShrImmFlags flags)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ bool scalar = (flags & ShrImmFlags.Scalar) != 0;
+ bool signed = (flags & ShrImmFlags.Signed) != 0;
+ bool round = (flags & ShrImmFlags.Round) != 0;
+ bool accumulate = (flags & ShrImmFlags.Accumulate) != 0;
+
+ int shift = GetImmShr(op);
+
+ long roundConst = 1L << (shift - 1);
+
+ int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand e = EmitVectorExtract(context, op.Rn, index, op.Size, signed);
+
+ if (op.Size <= 2)
+ {
+ if (round)
+ {
+ e = context.Add(e, Const(roundConst));
+ }
+
+ e = signed ? context.ShiftRightSI(e, Const(shift)) : context.ShiftRightUI(e, Const(shift));
+ }
+ else /* if (op.Size == 3) */
+ {
+ e = EmitShrImm64(context, e, signed, round ? roundConst : 0L, shift);
+ }
+
+ if (accumulate)
+ {
+ Operand de = EmitVectorExtract(context, op.Rd, index, op.Size, signed);
+
+ e = context.Add(e, de);
+ }
+
+ res = EmitVectorInsert(context, res, e, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static void EmitVectorShrImmNarrowOpZx(ArmEmitterContext context, bool round)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ long roundConst = 1L << (shift - 1);
+
+ int elems = 8 >> op.Size;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ Operand d = GetVec(op.Rd);
+
+ Operand res = part == 0 ? context.VectorZero() : context.Copy(d);
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand e = EmitVectorExtractZx(context, op.Rn, index, op.Size + 1);
+
+ if (round)
+ {
+ e = context.Add(e, Const(roundConst));
+ }
+
+ e = context.ShiftRightUI(e, Const(shift));
+
+ res = EmitVectorInsert(context, res, e, part + index, op.Size);
+ }
+
+ context.Copy(d, res);
+ }
+
+ [Flags]
+ private enum ShrImmSaturatingNarrowFlags
+ {
+ Scalar = 1 << 0,
+ SignedSrc = 1 << 1,
+ SignedDst = 1 << 2,
+
+ Round = 1 << 3,
+
+ ScalarSxSx = Scalar | SignedSrc | SignedDst,
+ ScalarSxZx = Scalar | SignedSrc,
+ ScalarZxZx = Scalar,
+
+ VectorSxSx = SignedSrc | SignedDst,
+ VectorSxZx = SignedSrc,
+ VectorZxZx = 0
+ }
+
+ private static void EmitRoundShrImmSaturatingNarrowOp(ArmEmitterContext context, ShrImmSaturatingNarrowFlags flags)
+ {
+ EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.Round | flags);
+ }
+
+ private static void EmitShrImmSaturatingNarrowOp(ArmEmitterContext context, ShrImmSaturatingNarrowFlags flags)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ bool scalar = (flags & ShrImmSaturatingNarrowFlags.Scalar) != 0;
+ bool signedSrc = (flags & ShrImmSaturatingNarrowFlags.SignedSrc) != 0;
+ bool signedDst = (flags & ShrImmSaturatingNarrowFlags.SignedDst) != 0;
+ bool round = (flags & ShrImmSaturatingNarrowFlags.Round) != 0;
+
+ int shift = GetImmShr(op);
+
+ long roundConst = 1L << (shift - 1);
+
+ int elems = !scalar ? 8 >> op.Size : 1;
+
+ int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0;
+
+ Operand d = GetVec(op.Rd);
+
+ Operand res = part == 0 ? context.VectorZero() : context.Copy(d);
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand e = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signedSrc);
+
+ if (op.Size <= 1 || !round)
+ {
+ if (round)
+ {
+ e = context.Add(e, Const(roundConst));
+ }
+
+ e = signedSrc ? context.ShiftRightSI(e, Const(shift)) : context.ShiftRightUI(e, Const(shift));
+ }
+ else /* if (op.Size == 2 && round) */
+ {
+ e = EmitShrImm64(context, e, signedSrc, roundConst, shift); // shift <= 32
+ }
+
+ e = signedSrc ? EmitSignedSrcSatQ(context, e, op.Size, signedDst) : EmitUnsignedSrcSatQ(context, e, op.Size, signedDst);
+
+ res = EmitVectorInsert(context, res, e, part + index, op.Size);
+ }
+
+ context.Copy(d, res);
+ }
+
+ // dst64 = (Int(src64, signed) + roundConst) >> shift;
+ private static Operand EmitShrImm64(
+ ArmEmitterContext context,
+ Operand value,
+ bool signed,
+ long roundConst,
+ int shift)
+ {
+ MethodInfo info = signed
+ ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SignedShrImm64))
+ : typeof(SoftFallback).GetMethod(nameof(SoftFallback.UnsignedShrImm64));
+
+ return context.Call(info, value, Const(roundConst), Const(shift));
+ }
+
+ private static void EmitVectorShImmWidenBinarySx(ArmEmitterContext context, Func2I emit, int imm)
+ {
+ EmitVectorShImmWidenBinaryOp(context, emit, imm, signed: true);
+ }
+
+ private static void EmitVectorShImmWidenBinaryZx(ArmEmitterContext context, Func2I emit, int imm)
+ {
+ EmitVectorShImmWidenBinaryOp(context, emit, imm, signed: false);
+ }
+
+ private static void EmitVectorShImmWidenBinaryOp(ArmEmitterContext context, Func2I emit, int imm, bool signed)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = 8 >> op.Size;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(ne, Const(imm)), index, op.Size + 1);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static void EmitSli(ArmEmitterContext context, bool scalar)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShl(op);
+ int eSize = 8 << op.Size;
+
+ ulong mask = shift != 0 ? ulong.MaxValue >> (64 - shift) : 0UL;
+
+ if (shift >= eSize)
+ {
+ if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
+ {
+ Operand res = context.VectorZeroUpper64(GetVec(op.Rd));
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else if (Optimizations.UseGfni && op.Size == 0)
+ {
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ ulong bitMatrix = X86GetGf2p8LogicalShiftLeft(shift);
+
+ Operand vBitMatrix = X86GetElements(context, bitMatrix, bitMatrix);
+
+ Operand nShifted = context.AddIntrinsic(Intrinsic.X86Gf2p8affineqb, n, vBitMatrix, Const(0));
+
+ Operand dMask = X86GetAllElements(context, (long)mask * _masks_SliSri[op.Size]);
+
+ Operand dMasked = context.AddIntrinsic(Intrinsic.X86Pand, d, dMask);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, dMasked);
+
+ if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else if (Optimizations.UseSse2 && op.Size > 0)
+ {
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic sllInst = X86PsllInstruction[op.Size];
+
+ Operand nShifted = context.AddIntrinsic(sllInst, n, Const(shift));
+
+ Operand dMask = X86GetAllElements(context, (long)mask * _masks_SliSri[op.Size]);
+
+ Operand dMasked = context.AddIntrinsic(Intrinsic.X86Pand, d, dMask);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, dMasked);
+
+ if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else
+ {
+ Operand res = context.VectorZero();
+
+ int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+
+ Operand neShifted = context.ShiftLeft(ne, Const(shift));
+
+ Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
+
+ Operand deMasked = context.BitwiseAnd(de, Const(mask));
+
+ Operand e = context.BitwiseOr(neShifted, deMasked);
+
+ res = EmitVectorInsert(context, res, e, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ private static void EmitSri(ArmEmitterContext context, bool scalar)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+ int eSize = 8 << op.Size;
+
+ ulong mask = (ulong.MaxValue << (eSize - shift)) & (ulong.MaxValue >> (64 - eSize));
+
+ if (shift >= eSize)
+ {
+ if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
+ {
+ Operand res = context.VectorZeroUpper64(GetVec(op.Rd));
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else if (Optimizations.UseGfni && op.Size == 0)
+ {
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ ulong bitMatrix = X86GetGf2p8LogicalShiftLeft(-shift);
+
+ Operand vBitMatrix = X86GetElements(context, bitMatrix, bitMatrix);
+
+ Operand nShifted = context.AddIntrinsic(Intrinsic.X86Gf2p8affineqb, n, vBitMatrix, Const(0));
+
+ Operand dMask = X86GetAllElements(context, (long)mask * _masks_SliSri[op.Size]);
+
+ Operand dMasked = context.AddIntrinsic(Intrinsic.X86Pand, d, dMask);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, dMasked);
+
+ if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else if (Optimizations.UseSse2 && op.Size > 0)
+ {
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic srlInst = X86PsrlInstruction[op.Size];
+
+ Operand nShifted = context.AddIntrinsic(srlInst, n, Const(shift));
+
+ Operand dMask = X86GetAllElements(context, (long)mask * _masks_SliSri[op.Size]);
+
+ Operand dMasked = context.AddIntrinsic(Intrinsic.X86Pand, d, dMask);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, dMasked);
+
+ if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else
+ {
+ Operand res = context.VectorZero();
+
+ int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+
+ Operand neShifted = shift != 64 ? context.ShiftRightUI(ne, Const(shift)) : Const(0UL);
+
+ Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
+
+ Operand deMasked = context.BitwiseAnd(de, Const(mask));
+
+ Operand e = context.BitwiseOr(neShifted, deMasked);
+
+ res = EmitVectorInsert(context, res, e, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ [Flags]
+ private enum ShlRegFlags
+ {
+ None = 0,
+ Scalar = 1 << 0,
+ Signed = 1 << 1,
+ Round = 1 << 2,
+ Saturating = 1 << 3
+ }
+
+ private static void EmitShlRegOp(ArmEmitterContext context, ShlRegFlags flags = ShlRegFlags.None)
+ {
+ bool scalar = flags.HasFlag(ShlRegFlags.Scalar);
+ bool signed = flags.HasFlag(ShlRegFlags.Signed);
+ bool round = flags.HasFlag(ShlRegFlags.Round);
+ bool saturating = flags.HasFlag(ShlRegFlags.Saturating);
+
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size, signed);
+ Operand me = EmitVectorExtractSx(context, op.Rm, index << op.Size, size: 0);
+
+ Operand e = !saturating
+ ? EmitShlReg(context, ne, context.ConvertI64ToI32(me), round, op.Size, signed)
+ : EmitShlRegSatQ(context, ne, context.ConvertI64ToI32(me), round, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, e, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ // long SignedShlReg(long op, int shiftLsB, bool round, int size);
+ // ulong UnsignedShlReg(ulong op, int shiftLsB, bool round, int size);
+ private static Operand EmitShlReg(ArmEmitterContext context, Operand op, Operand shiftLsB, bool round, int size, bool signed)
+ {
+ int eSize = 8 << size;
+
+ Debug.Assert(op.Type == OperandType.I64);
+ Debug.Assert(shiftLsB.Type == OperandType.I32);
+ Debug.Assert(eSize == 8 || eSize == 16 || eSize == 32 || eSize == 64);
+
+ Operand lbl1 = Label();
+ Operand lblEnd = Label();
+
+ Operand eSizeOp = Const(eSize);
+ Operand zero = Const(0);
+ Operand zeroL = Const(0L);
+
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), op);
+
+ context.BranchIf(lbl1, shiftLsB, zero, Comparison.GreaterOrEqual);
+ context.Copy(res, signed
+ ? EmitSignedShrReg(context, op, context.Negate(shiftLsB), round, eSize)
+ : EmitUnsignedShrReg(context, op, context.Negate(shiftLsB), round, eSize));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lbl1);
+ context.BranchIf(lblEnd, shiftLsB, zero, Comparison.LessOrEqual);
+ Operand shl = context.ShiftLeft(op, shiftLsB);
+ Operand isGreaterOrEqual = context.ICompareGreaterOrEqual(shiftLsB, eSizeOp);
+ context.Copy(res, context.ConditionalSelect(isGreaterOrEqual, zeroL, shl));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+
+ // long SignedShlRegSatQ(long op, int shiftLsB, bool round, int size);
+ // ulong UnsignedShlRegSatQ(ulong op, int shiftLsB, bool round, int size);
+ private static Operand EmitShlRegSatQ(ArmEmitterContext context, Operand op, Operand shiftLsB, bool round, int size, bool signed)
+ {
+ int eSize = 8 << size;
+
+ Debug.Assert(op.Type == OperandType.I64);
+ Debug.Assert(shiftLsB.Type == OperandType.I32);
+ Debug.Assert(eSize == 8 || eSize == 16 || eSize == 32 || eSize == 64);
+
+ Operand lbl1 = Label();
+ Operand lbl2 = Label();
+ Operand lblEnd = Label();
+
+ Operand eSizeOp = Const(eSize);
+ Operand zero = Const(0);
+
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), op);
+
+ context.BranchIf(lbl1, shiftLsB, zero, Comparison.GreaterOrEqual);
+ context.Copy(res, signed
+ ? EmitSignedShrReg(context, op, context.Negate(shiftLsB), round, eSize)
+ : EmitUnsignedShrReg(context, op, context.Negate(shiftLsB), round, eSize));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lbl1);
+ context.BranchIf(lblEnd, shiftLsB, zero, Comparison.LessOrEqual);
+ context.BranchIf(lbl2, shiftLsB, eSizeOp, Comparison.Less);
+ context.Copy(res, signed
+ ? EmitSignedSignSatQ(context, op, size)
+ : EmitUnsignedSignSatQ(context, op, size));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lbl2);
+ Operand shl = context.ShiftLeft(op, shiftLsB);
+ if (eSize == 64)
+ {
+ Operand sarOrShr = signed
+ ? context.ShiftRightSI(shl, shiftLsB)
+ : context.ShiftRightUI(shl, shiftLsB);
+ context.Copy(res, shl);
+ context.BranchIf(lblEnd, sarOrShr, op, Comparison.Equal);
+ context.Copy(res, signed
+ ? EmitSignedSignSatQ(context, op, size)
+ : EmitUnsignedSignSatQ(context, op, size));
+ }
+ else
+ {
+ context.Copy(res, signed
+ ? EmitSignedSrcSatQ(context, shl, size, signedDst: true)
+ : EmitUnsignedSrcSatQ(context, shl, size, signedDst: false));
+ }
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+
+ // shift := [1, 128]; eSize := {8, 16, 32, 64}.
+ // long SignedShrReg(long op, int shift, bool round, int eSize);
+ private static Operand EmitSignedShrReg(ArmEmitterContext context, Operand op, Operand shift, bool round, int eSize)
+ {
+ if (round)
+ {
+ Operand lblEnd = Label();
+
+ Operand eSizeOp = Const(eSize);
+ Operand zeroL = Const(0L);
+ Operand one = Const(1);
+ Operand oneL = Const(1L);
+
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), zeroL);
+
+ context.BranchIf(lblEnd, shift, eSizeOp, Comparison.GreaterOrEqual);
+ Operand roundConst = context.ShiftLeft(oneL, context.Subtract(shift, one));
+ Operand add = context.Add(op, roundConst);
+ Operand sar = context.ShiftRightSI(add, shift);
+ if (eSize == 64)
+ {
+ Operand shr = context.ShiftRightUI(add, shift);
+ Operand left = context.BitwiseAnd(context.Negate(op), context.BitwiseExclusiveOr(op, add));
+ Operand isLess = context.ICompareLess(left, zeroL);
+ context.Copy(res, context.ConditionalSelect(isLess, shr, sar));
+ }
+ else
+ {
+ context.Copy(res, sar);
+ }
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+ else
+ {
+ Operand lblEnd = Label();
+
+ Operand eSizeOp = Const(eSize);
+ Operand zeroL = Const(0L);
+ Operand negOneL = Const(-1L);
+
+ Operand sar = context.ShiftRightSI(op, shift);
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), sar);
+
+ context.BranchIf(lblEnd, shift, eSizeOp, Comparison.Less);
+ Operand isLess = context.ICompareLess(op, zeroL);
+ context.Copy(res, context.ConditionalSelect(isLess, negOneL, zeroL));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+ }
+
+ // shift := [1, 128]; eSize := {8, 16, 32, 64}.
+ // ulong UnsignedShrReg(ulong op, int shift, bool round, int eSize);
+ private static Operand EmitUnsignedShrReg(ArmEmitterContext context, Operand op, Operand shift, bool round, int eSize)
+ {
+ if (round)
+ {
+ Operand lblEnd = Label();
+
+ Operand zeroUL = Const(0UL);
+ Operand one = Const(1);
+ Operand oneUL = Const(1UL);
+ Operand eSizeMaxOp = Const(64);
+ Operand oneShl63UL = Const(1UL << 63);
+
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), zeroUL);
+
+ context.BranchIf(lblEnd, shift, eSizeMaxOp, Comparison.Greater);
+ Operand roundConst = context.ShiftLeft(oneUL, context.Subtract(shift, one));
+ Operand add = context.Add(op, roundConst);
+ Operand shr = context.ShiftRightUI(add, shift);
+ Operand isEqual = context.ICompareEqual(shift, eSizeMaxOp);
+ context.Copy(res, context.ConditionalSelect(isEqual, zeroUL, shr));
+ if (eSize == 64)
+ {
+ context.BranchIf(lblEnd, add, op, Comparison.GreaterOrEqualUI);
+ Operand right = context.BitwiseOr(shr, context.ShiftRightUI(oneShl63UL, context.Subtract(shift, one)));
+ context.Copy(res, context.ConditionalSelect(isEqual, oneUL, right));
+ }
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+ else
+ {
+ Operand lblEnd = Label();
+
+ Operand eSizeOp = Const(eSize);
+ Operand zeroUL = Const(0UL);
+
+ Operand shr = context.ShiftRightUI(op, shift);
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), shr);
+
+ context.BranchIf(lblEnd, shift, eSizeOp, Comparison.Less);
+ context.Copy(res, zeroUL);
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdShift32.cs b/src/ARMeilleure/Instructions/InstEmitSimdShift32.cs
new file mode 100644
index 00000000..9ac68088
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdShift32.cs
@@ -0,0 +1,389 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+using System.Reflection;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper32;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ public static void Vqrshrn(ArmEmitterContext context)
+ {
+ OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
+
+ EmitRoundShrImmSaturatingNarrowOp(context, op.U ? ShrImmSaturatingNarrowFlags.VectorZxZx : ShrImmSaturatingNarrowFlags.VectorSxSx);
+ }
+
+ public static void Vqrshrun(ArmEmitterContext context)
+ {
+ EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
+ }
+
+ public static void Vqshrn(ArmEmitterContext context)
+ {
+ OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
+
+ EmitShrImmSaturatingNarrowOp(context, op.U ? ShrImmSaturatingNarrowFlags.VectorZxZx : ShrImmSaturatingNarrowFlags.VectorSxSx);
+ }
+
+ public static void Vqshrun(ArmEmitterContext context)
+ {
+ EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
+ }
+
+ public static void Vrshr(ArmEmitterContext context)
+ {
+ EmitRoundShrImmOp(context, accumulate: false);
+ }
+
+ public static void Vrshrn(ArmEmitterContext context)
+ {
+ EmitRoundShrImmNarrowOp(context, signed: false);
+ }
+
+ public static void Vrsra(ArmEmitterContext context)
+ {
+ EmitRoundShrImmOp(context, accumulate: true);
+ }
+
+ public static void Vshl(ArmEmitterContext context)
+ {
+ OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
+
+ EmitVectorUnaryOpZx32(context, (op1) => context.ShiftLeft(op1, Const(op.Shift)));
+ }
+
+ public static void Vshl_I(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ if (op.U)
+ {
+ EmitVectorBinaryOpZx32(context, (op1, op2) => EmitShlRegOp(context, op2, op1, op.Size, true));
+ }
+ else
+ {
+ EmitVectorBinaryOpSx32(context, (op1, op2) => EmitShlRegOp(context, op2, op1, op.Size, false));
+ }
+ }
+
+ public static void Vshll(ArmEmitterContext context)
+ {
+ OpCode32SimdShImmLong op = (OpCode32SimdShImmLong)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, !op.U);
+
+ if (op.Size == 2)
+ {
+ if (op.U)
+ {
+ me = context.ZeroExtend32(OperandType.I64, me);
+ }
+ else
+ {
+ me = context.SignExtend32(OperandType.I64, me);
+ }
+ }
+
+ me = context.ShiftLeft(me, Const(op.Shift));
+
+ res = EmitVectorInsert(context, res, me, index, op.Size + 1);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void Vshr(ArmEmitterContext context)
+ {
+ OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
+ int shift = GetImmShr(op);
+ int maxShift = (8 << op.Size) - 1;
+
+ if (op.U)
+ {
+ EmitVectorUnaryOpZx32(context, (op1) => (shift > maxShift) ? Const(op1.Type, 0) : context.ShiftRightUI(op1, Const(shift)));
+ }
+ else
+ {
+ EmitVectorUnaryOpSx32(context, (op1) => context.ShiftRightSI(op1, Const(Math.Min(maxShift, shift))));
+ }
+ }
+
+ public static void Vshrn(ArmEmitterContext context)
+ {
+ OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
+ int shift = GetImmShr(op);
+
+ EmitVectorUnaryNarrowOp32(context, (op1) => context.ShiftRightUI(op1, Const(shift)));
+ }
+
+ public static void Vsra(ArmEmitterContext context)
+ {
+ OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
+ int shift = GetImmShr(op);
+ int maxShift = (8 << op.Size) - 1;
+
+ if (op.U)
+ {
+ EmitVectorImmBinaryQdQmOpZx32(context, (op1, op2) =>
+ {
+ Operand shiftRes = shift > maxShift ? Const(op2.Type, 0) : context.ShiftRightUI(op2, Const(shift));
+
+ return context.Add(op1, shiftRes);
+ });
+ }
+ else
+ {
+ EmitVectorImmBinaryQdQmOpSx32(context, (op1, op2) => context.Add(op1, context.ShiftRightSI(op2, Const(Math.Min(maxShift, shift)))));
+ }
+ }
+
+ public static void EmitRoundShrImmOp(ArmEmitterContext context, bool accumulate)
+ {
+ OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
+ int shift = GetImmShr(op);
+ long roundConst = 1L << (shift - 1);
+
+ if (op.U)
+ {
+ if (op.Size < 2)
+ {
+ EmitVectorUnaryOpZx32(context, (op1) =>
+ {
+ op1 = context.Add(op1, Const(op1.Type, roundConst));
+
+ return context.ShiftRightUI(op1, Const(shift));
+ }, accumulate);
+ }
+ else if (op.Size == 2)
+ {
+ EmitVectorUnaryOpZx32(context, (op1) =>
+ {
+ op1 = context.ZeroExtend32(OperandType.I64, op1);
+ op1 = context.Add(op1, Const(op1.Type, roundConst));
+
+ return context.ConvertI64ToI32(context.ShiftRightUI(op1, Const(shift)));
+ }, accumulate);
+ }
+ else /* if (op.Size == 3) */
+ {
+ EmitVectorUnaryOpZx32(context, (op1) => EmitShrImm64(context, op1, signed: false, roundConst, shift), accumulate);
+ }
+ }
+ else
+ {
+ if (op.Size < 2)
+ {
+ EmitVectorUnaryOpSx32(context, (op1) =>
+ {
+ op1 = context.Add(op1, Const(op1.Type, roundConst));
+
+ return context.ShiftRightSI(op1, Const(shift));
+ }, accumulate);
+ }
+ else if (op.Size == 2)
+ {
+ EmitVectorUnaryOpSx32(context, (op1) =>
+ {
+ op1 = context.SignExtend32(OperandType.I64, op1);
+ op1 = context.Add(op1, Const(op1.Type, roundConst));
+
+ return context.ConvertI64ToI32(context.ShiftRightSI(op1, Const(shift)));
+ }, accumulate);
+ }
+ else /* if (op.Size == 3) */
+ {
+ EmitVectorUnaryOpZx32(context, (op1) => EmitShrImm64(context, op1, signed: true, roundConst, shift), accumulate);
+ }
+ }
+ }
+
+ private static void EmitRoundShrImmNarrowOp(ArmEmitterContext context, bool signed)
+ {
+ OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+ long roundConst = 1L << (shift - 1);
+
+ EmitVectorUnaryNarrowOp32(context, (op1) =>
+ {
+ if (op.Size <= 1)
+ {
+ op1 = context.Add(op1, Const(op1.Type, roundConst));
+ op1 = signed ? context.ShiftRightSI(op1, Const(shift)) : context.ShiftRightUI(op1, Const(shift));
+ }
+ else /* if (op.Size == 2 && round) */
+ {
+ op1 = EmitShrImm64(context, op1, signed, roundConst, shift); // shift <= 32
+ }
+
+ return op1;
+ }, signed);
+ }
+
+ private static Operand EmitShlRegOp(ArmEmitterContext context, Operand op, Operand shiftLsB, int size, bool unsigned)
+ {
+ if (shiftLsB.Type == OperandType.I64)
+ {
+ shiftLsB = context.ConvertI64ToI32(shiftLsB);
+ }
+
+ shiftLsB = context.SignExtend8(OperandType.I32, shiftLsB);
+ Debug.Assert((uint)size < 4u);
+
+ Operand negShiftLsB = context.Negate(shiftLsB);
+
+ Operand isPositive = context.ICompareGreaterOrEqual(shiftLsB, Const(0));
+
+ Operand shl = context.ShiftLeft(op, shiftLsB);
+ Operand shr = unsigned ? context.ShiftRightUI(op, negShiftLsB) : context.ShiftRightSI(op, negShiftLsB);
+
+ Operand res = context.ConditionalSelect(isPositive, shl, shr);
+
+ if (unsigned)
+ {
+ Operand isOutOfRange = context.BitwiseOr(
+ context.ICompareGreaterOrEqual(shiftLsB, Const(8 << size)),
+ context.ICompareGreaterOrEqual(negShiftLsB, Const(8 << size)));
+
+ return context.ConditionalSelect(isOutOfRange, Const(op.Type, 0), res);
+ }
+ else
+ {
+ Operand isOutOfRange0 = context.ICompareGreaterOrEqual(shiftLsB, Const(8 << size));
+ Operand isOutOfRangeN = context.ICompareGreaterOrEqual(negShiftLsB, Const(8 << size));
+
+ // Also zero if shift is too negative, but value was positive.
+ isOutOfRange0 = context.BitwiseOr(isOutOfRange0, context.BitwiseAnd(isOutOfRangeN, context.ICompareGreaterOrEqual(op, Const(op.Type, 0))));
+
+ Operand min = (op.Type == OperandType.I64) ? Const(-1L) : Const(-1);
+
+ return context.ConditionalSelect(isOutOfRange0, Const(op.Type, 0), context.ConditionalSelect(isOutOfRangeN, min, res));
+ }
+ }
+
+ [Flags]
+ private enum ShrImmSaturatingNarrowFlags
+ {
+ Scalar = 1 << 0,
+ SignedSrc = 1 << 1,
+ SignedDst = 1 << 2,
+
+ Round = 1 << 3,
+
+ ScalarSxSx = Scalar | SignedSrc | SignedDst,
+ ScalarSxZx = Scalar | SignedSrc,
+ ScalarZxZx = Scalar,
+
+ VectorSxSx = SignedSrc | SignedDst,
+ VectorSxZx = SignedSrc,
+ VectorZxZx = 0
+ }
+
+ private static void EmitRoundShrImmSaturatingNarrowOp(ArmEmitterContext context, ShrImmSaturatingNarrowFlags flags)
+ {
+ EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.Round | flags);
+ }
+
+ private static void EmitShrImmSaturatingNarrowOp(ArmEmitterContext context, ShrImmSaturatingNarrowFlags flags)
+ {
+ OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
+
+ bool scalar = (flags & ShrImmSaturatingNarrowFlags.Scalar) != 0;
+ bool signedSrc = (flags & ShrImmSaturatingNarrowFlags.SignedSrc) != 0;
+ bool signedDst = (flags & ShrImmSaturatingNarrowFlags.SignedDst) != 0;
+ bool round = (flags & ShrImmSaturatingNarrowFlags.Round) != 0;
+
+ if (scalar)
+ {
+ // TODO: Support scalar operation.
+ throw new NotImplementedException();
+ }
+
+ int shift = GetImmShr(op);
+ long roundConst = 1L << (shift - 1);
+
+ EmitVectorUnaryNarrowOp32(context, (op1) =>
+ {
+ if (op.Size <= 1 || !round)
+ {
+ if (round)
+ {
+ op1 = context.Add(op1, Const(op1.Type, roundConst));
+ }
+
+ op1 = signedSrc ? context.ShiftRightSI(op1, Const(shift)) : context.ShiftRightUI(op1, Const(shift));
+ }
+ else /* if (op.Size == 2 && round) */
+ {
+ op1 = EmitShrImm64(context, op1, signedSrc, roundConst, shift); // shift <= 32
+ }
+
+ return EmitSatQ(context, op1, 8 << op.Size, signedSrc, signedDst);
+ }, signedSrc);
+ }
+
+ private static int GetImmShr(OpCode32SimdShImm op)
+ {
+ return (8 << op.Size) - op.Shift; // Shr amount is flipped.
+ }
+
+ // dst64 = (Int(src64, signed) + roundConst) >> shift;
+ private static Operand EmitShrImm64(
+ ArmEmitterContext context,
+ Operand value,
+ bool signed,
+ long roundConst,
+ int shift)
+ {
+ MethodInfo info = signed
+ ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SignedShrImm64))
+ : typeof(SoftFallback).GetMethod(nameof(SoftFallback.UnsignedShrImm64));
+
+ return context.Call(info, value, Const(roundConst), Const(shift));
+ }
+
+ private static Operand EmitSatQ(ArmEmitterContext context, Operand value, int eSize, bool signedSrc, bool signedDst)
+ {
+ Debug.Assert(eSize <= 32);
+
+ long intMin = signedDst ? -(1L << (eSize - 1)) : 0;
+ long intMax = signedDst ? (1L << (eSize - 1)) - 1 : (1L << eSize) - 1;
+
+ Operand gt = signedSrc
+ ? context.ICompareGreater(value, Const(value.Type, intMax))
+ : context.ICompareGreaterUI(value, Const(value.Type, intMax));
+
+ Operand lt = signedSrc
+ ? context.ICompareLess(value, Const(value.Type, intMin))
+ : context.ICompareLessUI(value, Const(value.Type, intMin));
+
+ value = context.ConditionalSelect(gt, Const(value.Type, intMax), value);
+ value = context.ConditionalSelect(lt, Const(value.Type, intMin), value);
+
+ Operand lblNoSat = Label();
+
+ context.BranchIfFalse(lblNoSat, context.BitwiseOr(gt, lt));
+
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+
+ context.MarkLabel(lblNoSat);
+
+ return value;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSystem.cs b/src/ARMeilleure/Instructions/InstEmitSystem.cs
new file mode 100644
index 00000000..f84829aa
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSystem.cs
@@ -0,0 +1,248 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+using System.Reflection;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ private const int DczSizeLog2 = 4; // Log2 size in words
+ public const int DczSizeInBytes = 4 << DczSizeLog2;
+
+ public static void Isb(ArmEmitterContext context)
+ {
+ // Execute as no-op.
+ }
+
+ public static void Mrs(ArmEmitterContext context)
+ {
+ OpCodeSystem op = (OpCodeSystem)context.CurrOp;
+
+ MethodInfo info;
+
+ switch (GetPackedId(op))
+ {
+ case 0b11_011_0000_0000_001: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetCtrEl0)); break;
+ case 0b11_011_0000_0000_111: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetDczidEl0)); break;
+ case 0b11_011_0100_0010_000: EmitGetNzcv(context); return;
+ case 0b11_011_0100_0100_000: EmitGetFpcr(context); return;
+ case 0b11_011_0100_0100_001: EmitGetFpsr(context); return;
+ case 0b11_011_1101_0000_010: EmitGetTpidrEl0(context); return;
+ case 0b11_011_1101_0000_011: EmitGetTpidrroEl0(context); return;
+ case 0b11_011_1110_0000_000: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetCntfrqEl0)); break;
+ case 0b11_011_1110_0000_001: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetCntpctEl0)); break;
+ case 0b11_011_1110_0000_010: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetCntvctEl0)); break;
+
+ default: throw new NotImplementedException($"Unknown MRS 0x{op.RawOpCode:X8} at 0x{op.Address:X16}.");
+ }
+
+ SetIntOrZR(context, op.Rt, context.Call(info));
+ }
+
+ public static void Msr(ArmEmitterContext context)
+ {
+ OpCodeSystem op = (OpCodeSystem)context.CurrOp;
+
+ switch (GetPackedId(op))
+ {
+ case 0b11_011_0100_0010_000: EmitSetNzcv(context); return;
+ case 0b11_011_0100_0100_000: EmitSetFpcr(context); return;
+ case 0b11_011_0100_0100_001: EmitSetFpsr(context); return;
+ case 0b11_011_1101_0000_010: EmitSetTpidrEl0(context); return;
+
+ default: throw new NotImplementedException($"Unknown MSR 0x{op.RawOpCode:X8} at 0x{op.Address:X16}.");
+ }
+ }
+
+ public static void Nop(ArmEmitterContext context)
+ {
+ // Do nothing.
+ }
+
+ public static void Sys(ArmEmitterContext context)
+ {
+ // This instruction is used to do some operations on the CPU like cache invalidation,
+ // address translation and the like.
+ // We treat it as no-op here since we don't have any cache being emulated anyway.
+ OpCodeSystem op = (OpCodeSystem)context.CurrOp;
+
+ switch (GetPackedId(op))
+ {
+ case 0b11_011_0111_0100_001:
+ {
+ // DC ZVA
+ Operand t = GetIntOrZR(context, op.Rt);
+
+ for (long offset = 0; offset < DczSizeInBytes; offset += 8)
+ {
+ Operand address = context.Add(t, Const(offset));
+
+ InstEmitMemoryHelper.EmitStore(context, address, RegisterConsts.ZeroIndex, 3);
+ }
+
+ break;
+ }
+
+ // No-op
+ case 0b11_011_0111_1110_001: // DC CIVAC
+ break;
+
+ case 0b11_011_0111_0101_001: // IC IVAU
+ Operand target = Register(op.Rt, RegisterType.Integer, OperandType.I64);
+ context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.InvalidateCacheLine)), target);
+ break;
+ }
+ }
+
+ private static int GetPackedId(OpCodeSystem op)
+ {
+ int id;
+
+ id = op.Op2 << 0;
+ id |= op.CRm << 3;
+ id |= op.CRn << 7;
+ id |= op.Op1 << 11;
+ id |= op.Op0 << 14;
+
+ return id;
+ }
+
+ private static void EmitGetNzcv(ArmEmitterContext context)
+ {
+ OpCodeSystem op = (OpCodeSystem)context.CurrOp;
+
+ Operand nzcv = context.ShiftLeft(GetFlag(PState.VFlag), Const((int)PState.VFlag));
+ nzcv = context.BitwiseOr(nzcv, context.ShiftLeft(GetFlag(PState.CFlag), Const((int)PState.CFlag)));
+ nzcv = context.BitwiseOr(nzcv, context.ShiftLeft(GetFlag(PState.ZFlag), Const((int)PState.ZFlag)));
+ nzcv = context.BitwiseOr(nzcv, context.ShiftLeft(GetFlag(PState.NFlag), Const((int)PState.NFlag)));
+
+ SetIntOrZR(context, op.Rt, nzcv);
+ }
+
+ private static void EmitGetFpcr(ArmEmitterContext context)
+ {
+ OpCodeSystem op = (OpCodeSystem)context.CurrOp;
+
+ Operand fpcr = Const(0);
+
+ for (int flag = 0; flag < RegisterConsts.FpFlagsCount; flag++)
+ {
+ if (FPCR.Mask.HasFlag((FPCR)(1u << flag)))
+ {
+ fpcr = context.BitwiseOr(fpcr, context.ShiftLeft(GetFpFlag((FPState)flag), Const(flag)));
+ }
+ }
+
+ SetIntOrZR(context, op.Rt, fpcr);
+ }
+
+ private static void EmitGetFpsr(ArmEmitterContext context)
+ {
+ OpCodeSystem op = (OpCodeSystem)context.CurrOp;
+
+ context.SyncQcFlag();
+
+ Operand fpsr = Const(0);
+
+ for (int flag = 0; flag < RegisterConsts.FpFlagsCount; flag++)
+ {
+ if (FPSR.Mask.HasFlag((FPSR)(1u << flag)))
+ {
+ fpsr = context.BitwiseOr(fpsr, context.ShiftLeft(GetFpFlag((FPState)flag), Const(flag)));
+ }
+ }
+
+ SetIntOrZR(context, op.Rt, fpsr);
+ }
+
+ private static void EmitGetTpidrEl0(ArmEmitterContext context)
+ {
+ OpCodeSystem op = (OpCodeSystem)context.CurrOp;
+
+ Operand nativeContext = context.LoadArgument(OperandType.I64, 0);
+
+ Operand result = context.Load(OperandType.I64, context.Add(nativeContext, Const((ulong)NativeContext.GetTpidrEl0Offset())));
+
+ SetIntOrZR(context, op.Rt, result);
+ }
+
+ private static void EmitGetTpidrroEl0(ArmEmitterContext context)
+ {
+ OpCodeSystem op = (OpCodeSystem)context.CurrOp;
+
+ Operand nativeContext = context.LoadArgument(OperandType.I64, 0);
+
+ Operand result = context.Load(OperandType.I64, context.Add(nativeContext, Const((ulong)NativeContext.GetTpidrroEl0Offset())));
+
+ SetIntOrZR(context, op.Rt, result);
+ }
+
+ private static void EmitSetNzcv(ArmEmitterContext context)
+ {
+ OpCodeSystem op = (OpCodeSystem)context.CurrOp;
+
+ Operand nzcv = GetIntOrZR(context, op.Rt);
+ nzcv = context.ConvertI64ToI32(nzcv);
+
+ SetFlag(context, PState.VFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const((int)PState.VFlag)), Const(1)));
+ SetFlag(context, PState.CFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const((int)PState.CFlag)), Const(1)));
+ SetFlag(context, PState.ZFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const((int)PState.ZFlag)), Const(1)));
+ SetFlag(context, PState.NFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const((int)PState.NFlag)), Const(1)));
+ }
+
+ private static void EmitSetFpcr(ArmEmitterContext context)
+ {
+ OpCodeSystem op = (OpCodeSystem)context.CurrOp;
+
+ Operand fpcr = GetIntOrZR(context, op.Rt);
+ fpcr = context.ConvertI64ToI32(fpcr);
+
+ for (int flag = 0; flag < RegisterConsts.FpFlagsCount; flag++)
+ {
+ if (FPCR.Mask.HasFlag((FPCR)(1u << flag)))
+ {
+ SetFpFlag(context, (FPState)flag, context.BitwiseAnd(context.ShiftRightUI(fpcr, Const(flag)), Const(1)));
+ }
+ }
+
+ context.UpdateArmFpMode();
+ }
+
+ private static void EmitSetFpsr(ArmEmitterContext context)
+ {
+ OpCodeSystem op = (OpCodeSystem)context.CurrOp;
+
+ context.ClearQcFlagIfModified();
+
+ Operand fpsr = GetIntOrZR(context, op.Rt);
+ fpsr = context.ConvertI64ToI32(fpsr);
+
+ for (int flag = 0; flag < RegisterConsts.FpFlagsCount; flag++)
+ {
+ if (FPSR.Mask.HasFlag((FPSR)(1u << flag)))
+ {
+ SetFpFlag(context, (FPState)flag, context.BitwiseAnd(context.ShiftRightUI(fpsr, Const(flag)), Const(1)));
+ }
+ }
+
+ context.UpdateArmFpMode();
+ }
+
+ private static void EmitSetTpidrEl0(ArmEmitterContext context)
+ {
+ OpCodeSystem op = (OpCodeSystem)context.CurrOp;
+
+ Operand value = GetIntOrZR(context, op.Rt);
+
+ Operand nativeContext = context.LoadArgument(OperandType.I64, 0);
+
+ context.Store(context.Add(nativeContext, Const((ulong)NativeContext.GetTpidrEl0Offset())), value);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSystem32.cs b/src/ARMeilleure/Instructions/InstEmitSystem32.cs
new file mode 100644
index 00000000..f2732c99
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSystem32.cs
@@ -0,0 +1,351 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+using System.Reflection;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ public static void Mcr(ArmEmitterContext context)
+ {
+ OpCode32System op = (OpCode32System)context.CurrOp;
+
+ if (op.Coproc != 15 || op.Opc1 != 0)
+ {
+ InstEmit.Und(context);
+
+ return;
+ }
+
+ switch (op.CRn)
+ {
+ case 13: // Process and Thread Info.
+ if (op.CRm != 0)
+ {
+ throw new NotImplementedException($"Unknown MRC CRm 0x{op.CRm:X} at 0x{op.Address:X} (0x{op.RawOpCode:X}).");
+ }
+
+ switch (op.Opc2)
+ {
+ case 2:
+ EmitSetTpidrEl0(context); return;
+
+ default:
+ throw new NotImplementedException($"Unknown MRC Opc2 0x{op.Opc2:X} at 0x{op.Address:X} (0x{op.RawOpCode:X}).");
+ }
+
+ case 7:
+ switch (op.CRm) // Cache and Memory barrier.
+ {
+ case 10:
+ switch (op.Opc2)
+ {
+ case 5: // Data Memory Barrier Register.
+ return; // No-op.
+
+ default:
+ throw new NotImplementedException($"Unknown MRC Opc2 0x{op.Opc2:X16} at 0x{op.Address:X16} (0x{op.RawOpCode:X}).");
+ }
+
+ default:
+ throw new NotImplementedException($"Unknown MRC CRm 0x{op.CRm:X16} at 0x{op.Address:X16} (0x{op.RawOpCode:X}).");
+ }
+
+ default:
+ throw new NotImplementedException($"Unknown MRC 0x{op.RawOpCode:X8} at 0x{op.Address:X16}.");
+ }
+ }
+
+ public static void Mrc(ArmEmitterContext context)
+ {
+ OpCode32System op = (OpCode32System)context.CurrOp;
+
+ if (op.Coproc != 15 || op.Opc1 != 0)
+ {
+ InstEmit.Und(context);
+
+ return;
+ }
+
+ Operand result;
+
+ switch (op.CRn)
+ {
+ case 13: // Process and Thread Info.
+ if (op.CRm != 0)
+ {
+ throw new NotImplementedException($"Unknown MRC CRm 0x{op.CRm:X} at 0x{op.Address:X} (0x{op.RawOpCode:X}).");
+ }
+
+ switch (op.Opc2)
+ {
+ case 2:
+ result = EmitGetTpidrEl0(context); break;
+
+ case 3:
+ result = EmitGetTpidrroEl0(context); break;
+
+ default:
+ throw new NotImplementedException($"Unknown MRC Opc2 0x{op.Opc2:X} at 0x{op.Address:X} (0x{op.RawOpCode:X}).");
+ }
+
+ break;
+
+ default:
+ throw new NotImplementedException($"Unknown MRC 0x{op.RawOpCode:X} at 0x{op.Address:X}.");
+ }
+
+ if (op.Rt == RegisterAlias.Aarch32Pc)
+ {
+ // Special behavior: copy NZCV flags into APSR.
+ EmitSetNzcv(context, result);
+
+ return;
+ }
+ else
+ {
+ SetIntA32(context, op.Rt, result);
+ }
+ }
+
+ public static void Mrrc(ArmEmitterContext context)
+ {
+ OpCode32System op = (OpCode32System)context.CurrOp;
+
+ if (op.Coproc != 15)
+ {
+ InstEmit.Und(context);
+
+ return;
+ }
+
+ int opc = op.MrrcOp;
+
+ MethodInfo info;
+
+ switch (op.CRm)
+ {
+ case 14: // Timer.
+ switch (opc)
+ {
+ case 0:
+ info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetCntpctEl0)); break;
+
+ default:
+ throw new NotImplementedException($"Unknown MRRC Opc1 0x{opc:X} at 0x{op.Address:X} (0x{op.RawOpCode:X}).");
+ }
+
+ break;
+
+ default:
+ throw new NotImplementedException($"Unknown MRRC 0x{op.RawOpCode:X} at 0x{op.Address:X}.");
+ }
+
+ Operand result = context.Call(info);
+
+ SetIntA32(context, op.Rt, context.ConvertI64ToI32(result));
+ SetIntA32(context, op.CRn, context.ConvertI64ToI32(context.ShiftRightUI(result, Const(32))));
+ }
+
+ public static void Mrs(ArmEmitterContext context)
+ {
+ OpCode32Mrs op = (OpCode32Mrs)context.CurrOp;
+
+ if (op.R)
+ {
+ throw new NotImplementedException("SPSR");
+ }
+ else
+ {
+ Operand spsr = context.ShiftLeft(GetFlag(PState.VFlag), Const((int)PState.VFlag));
+ spsr = context.BitwiseOr(spsr, context.ShiftLeft(GetFlag(PState.CFlag), Const((int)PState.CFlag)));
+ spsr = context.BitwiseOr(spsr, context.ShiftLeft(GetFlag(PState.ZFlag), Const((int)PState.ZFlag)));
+ spsr = context.BitwiseOr(spsr, context.ShiftLeft(GetFlag(PState.NFlag), Const((int)PState.NFlag)));
+ spsr = context.BitwiseOr(spsr, context.ShiftLeft(GetFlag(PState.QFlag), Const((int)PState.QFlag)));
+
+ // TODO: Remaining flags.
+
+ SetIntA32(context, op.Rd, spsr);
+ }
+ }
+
+ public static void Msr(ArmEmitterContext context)
+ {
+ OpCode32MsrReg op = (OpCode32MsrReg)context.CurrOp;
+
+ if (op.R)
+ {
+ throw new NotImplementedException("SPSR");
+ }
+ else
+ {
+ if ((op.Mask & 8) != 0)
+ {
+ Operand value = GetIntA32(context, op.Rn);
+
+ EmitSetNzcv(context, value);
+
+ Operand q = context.BitwiseAnd(context.ShiftRightUI(value, Const((int)PState.QFlag)), Const(1));
+
+ SetFlag(context, PState.QFlag, q);
+ }
+
+ if ((op.Mask & 4) != 0)
+ {
+ throw new NotImplementedException("APSR_g");
+ }
+
+ if ((op.Mask & 2) != 0)
+ {
+ throw new NotImplementedException("CPSR_x");
+ }
+
+ if ((op.Mask & 1) != 0)
+ {
+ throw new NotImplementedException("CPSR_c");
+ }
+ }
+ }
+
+ public static void Nop(ArmEmitterContext context) { }
+
+ public static void Vmrs(ArmEmitterContext context)
+ {
+ OpCode32SimdSpecial op = (OpCode32SimdSpecial)context.CurrOp;
+
+ if (op.Rt == RegisterAlias.Aarch32Pc && op.Sreg == 0b0001)
+ {
+ // Special behavior: copy NZCV flags into APSR.
+ SetFlag(context, PState.VFlag, GetFpFlag(FPState.VFlag));
+ SetFlag(context, PState.CFlag, GetFpFlag(FPState.CFlag));
+ SetFlag(context, PState.ZFlag, GetFpFlag(FPState.ZFlag));
+ SetFlag(context, PState.NFlag, GetFpFlag(FPState.NFlag));
+
+ return;
+ }
+
+ switch (op.Sreg)
+ {
+ case 0b0000: // FPSID
+ throw new NotImplementedException("Supervisor Only");
+ case 0b0001: // FPSCR
+ EmitGetFpscr(context); return;
+ case 0b0101: // MVFR2
+ throw new NotImplementedException("MVFR2");
+ case 0b0110: // MVFR1
+ throw new NotImplementedException("MVFR1");
+ case 0b0111: // MVFR0
+ throw new NotImplementedException("MVFR0");
+ case 0b1000: // FPEXC
+ throw new NotImplementedException("Supervisor Only");
+ default:
+ throw new NotImplementedException($"Unknown VMRS 0x{op.RawOpCode:X} at 0x{op.Address:X}.");
+ }
+ }
+
+ public static void Vmsr(ArmEmitterContext context)
+ {
+ OpCode32SimdSpecial op = (OpCode32SimdSpecial)context.CurrOp;
+
+ switch (op.Sreg)
+ {
+ case 0b0000: // FPSID
+ throw new NotImplementedException("Supervisor Only");
+ case 0b0001: // FPSCR
+ EmitSetFpscr(context); return;
+ case 0b0101: // MVFR2
+ throw new NotImplementedException("MVFR2");
+ case 0b0110: // MVFR1
+ throw new NotImplementedException("MVFR1");
+ case 0b0111: // MVFR0
+ throw new NotImplementedException("MVFR0");
+ case 0b1000: // FPEXC
+ throw new NotImplementedException("Supervisor Only");
+ default:
+ throw new NotImplementedException($"Unknown VMSR 0x{op.RawOpCode:X} at 0x{op.Address:X}.");
+ }
+ }
+
+ private static void EmitSetNzcv(ArmEmitterContext context, Operand t)
+ {
+ Operand v = context.BitwiseAnd(context.ShiftRightUI(t, Const((int)PState.VFlag)), Const(1));
+ Operand c = context.BitwiseAnd(context.ShiftRightUI(t, Const((int)PState.CFlag)), Const(1));
+ Operand z = context.BitwiseAnd(context.ShiftRightUI(t, Const((int)PState.ZFlag)), Const(1));
+ Operand n = context.BitwiseAnd(context.ShiftRightUI(t, Const((int)PState.NFlag)), Const(1));
+
+ SetFlag(context, PState.VFlag, v);
+ SetFlag(context, PState.CFlag, c);
+ SetFlag(context, PState.ZFlag, z);
+ SetFlag(context, PState.NFlag, n);
+ }
+
+ private static void EmitGetFpscr(ArmEmitterContext context)
+ {
+ OpCode32SimdSpecial op = (OpCode32SimdSpecial)context.CurrOp;
+
+ Operand fpscr = Const(0);
+
+ for (int flag = 0; flag < RegisterConsts.FpFlagsCount; flag++)
+ {
+ if (FPSCR.Mask.HasFlag((FPSCR)(1u << flag)))
+ {
+ fpscr = context.BitwiseOr(fpscr, context.ShiftLeft(GetFpFlag((FPState)flag), Const(flag)));
+ }
+ }
+
+ SetIntA32(context, op.Rt, fpscr);
+ }
+
+ private static void EmitSetFpscr(ArmEmitterContext context)
+ {
+ OpCode32SimdSpecial op = (OpCode32SimdSpecial)context.CurrOp;
+
+ Operand fpscr = GetIntA32(context, op.Rt);
+
+ for (int flag = 0; flag < RegisterConsts.FpFlagsCount; flag++)
+ {
+ if (FPSCR.Mask.HasFlag((FPSCR)(1u << flag)))
+ {
+ SetFpFlag(context, (FPState)flag, context.BitwiseAnd(context.ShiftRightUI(fpscr, Const(flag)), Const(1)));
+ }
+ }
+
+ context.UpdateArmFpMode();
+ }
+
+ private static Operand EmitGetTpidrEl0(ArmEmitterContext context)
+ {
+ OpCode32System op = (OpCode32System)context.CurrOp;
+
+ Operand nativeContext = context.LoadArgument(OperandType.I64, 0);
+
+ return context.Load(OperandType.I64, context.Add(nativeContext, Const((ulong)NativeContext.GetTpidrEl0Offset())));
+ }
+
+ private static Operand EmitGetTpidrroEl0(ArmEmitterContext context)
+ {
+ OpCode32System op = (OpCode32System)context.CurrOp;
+
+ Operand nativeContext = context.LoadArgument(OperandType.I64, 0);
+
+ return context.Load(OperandType.I64, context.Add(nativeContext, Const((ulong)NativeContext.GetTpidrroEl0Offset())));
+ }
+
+ private static void EmitSetTpidrEl0(ArmEmitterContext context)
+ {
+ OpCode32System op = (OpCode32System)context.CurrOp;
+
+ Operand value = GetIntA32(context, op.Rt);
+
+ Operand nativeContext = context.LoadArgument(OperandType.I64, 0);
+
+ context.Store(context.Add(nativeContext, Const((ulong)NativeContext.GetTpidrEl0Offset())), context.ZeroExtend32(OperandType.I64, value));
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstName.cs b/src/ARMeilleure/Instructions/InstName.cs
new file mode 100644
index 00000000..fd71d92e
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstName.cs
@@ -0,0 +1,685 @@
+namespace ARMeilleure.Instructions
+{
+ enum InstName
+ {
+ // Base (AArch64)
+ Adc,
+ Adcs,
+ Add,
+ Adds,
+ Adr,
+ Adrp,
+ And,
+ Ands,
+ Asrv,
+ B,
+ B_Cond,
+ Bfm,
+ Bic,
+ Bics,
+ Bl,
+ Blr,
+ Br,
+ Brk,
+ Cbnz,
+ Cbz,
+ Ccmn,
+ Ccmp,
+ Clrex,
+ Cls,
+ Clz,
+ Crc32b,
+ Crc32h,
+ Crc32w,
+ Crc32x,
+ Crc32cb,
+ Crc32ch,
+ Crc32cw,
+ Crc32cx,
+ Csdb,
+ Csel,
+ Csinc,
+ Csinv,
+ Csneg,
+ Dmb,
+ Dsb,
+ Eon,
+ Eor,
+ Esb,
+ Extr,
+ Hint,
+ Isb,
+ It,
+ Ldar,
+ Ldaxp,
+ Ldaxr,
+ Ldp,
+ Ldr,
+ Ldr_Literal,
+ Ldrs,
+ Ldxr,
+ Ldxp,
+ Lslv,
+ Lsrv,
+ Madd,
+ Movk,
+ Movn,
+ Movz,
+ Mrs,
+ Msr,
+ Msub,
+ Nop,
+ Orn,
+ Orr,
+ Prfm,
+ Rbit,
+ Ret,
+ Rev16,
+ Rev32,
+ Rev64,
+ Rorv,
+ Sbc,
+ Sbcs,
+ Sbfm,
+ Sdiv,
+ Sel,
+ Sev,
+ Sevl,
+ Shsub8,
+ Smaddl,
+ Smsubl,
+ Smulh,
+ Smull,
+ Smulw_,
+ Ssat,
+ Ssat16,
+ Stlr,
+ Stlxp,
+ Stlxr,
+ Stp,
+ Str,
+ Stxp,
+ Stxr,
+ Sub,
+ Subs,
+ Svc,
+ Sxtb,
+ Sxth,
+ Sys,
+ Tbnz,
+ Tbz,
+ Tsb,
+ Ubfm,
+ Udiv,
+ Umaddl,
+ Umsubl,
+ Umulh,
+ Und,
+ Wfe,
+ Wfi,
+ Yield,
+
+ // FP & SIMD (AArch64)
+ Abs_S,
+ Abs_V,
+ Add_S,
+ Add_V,
+ Addhn_V,
+ Addp_S,
+ Addp_V,
+ Addv_V,
+ Aesd_V,
+ Aese_V,
+ Aesimc_V,
+ Aesmc_V,
+ And_V,
+ Bic_V,
+ Bic_Vi,
+ Bif_V,
+ Bit_V,
+ Bsl_V,
+ Cls_V,
+ Clz_V,
+ Cmeq_S,
+ Cmeq_V,
+ Cmge_S,
+ Cmge_V,
+ Cmgt_S,
+ Cmgt_V,
+ Cmhi_S,
+ Cmhi_V,
+ Cmhs_S,
+ Cmhs_V,
+ Cmle_S,
+ Cmle_V,
+ Cmlt_S,
+ Cmlt_V,
+ Cmtst_S,
+ Cmtst_V,
+ Cnt_V,
+ Dup_Gp,
+ Dup_S,
+ Dup_V,
+ Eor_V,
+ Ext_V,
+ Fabd_S,
+ Fabd_V,
+ Fabs_S,
+ Fabs_V,
+ Facge_S,
+ Facge_V,
+ Facgt_S,
+ Facgt_V,
+ Fadd_S,
+ Fadd_V,
+ Faddp_S,
+ Faddp_V,
+ Fccmp_S,
+ Fccmpe_S,
+ Fcmeq_S,
+ Fcmeq_V,
+ Fcmge_S,
+ Fcmge_V,
+ Fcmgt_S,
+ Fcmgt_V,
+ Fcmle_S,
+ Fcmle_V,
+ Fcmlt_S,
+ Fcmlt_V,
+ Fcmp_S,
+ Fcmpe_S,
+ Fcsel_S,
+ Fcvt_S,
+ Fcvtas_Gp,
+ Fcvtas_S,
+ Fcvtas_V,
+ Fcvtau_Gp,
+ Fcvtau_S,
+ Fcvtau_V,
+ Fcvtl_V,
+ Fcvtms_Gp,
+ Fcvtms_V,
+ Fcvtmu_Gp,
+ Fcvtn_V,
+ Fcvtns_Gp,
+ Fcvtns_S,
+ Fcvtns_V,
+ Fcvtnu_S,
+ Fcvtnu_V,
+ Fcvtps_Gp,
+ Fcvtpu_Gp,
+ Fcvtzs_Gp,
+ Fcvtzs_Gp_Fixed,
+ Fcvtzs_S,
+ Fcvtzs_V,
+ Fcvtzs_V_Fixed,
+ Fcvtzu_Gp,
+ Fcvtzu_Gp_Fixed,
+ Fcvtzu_S,
+ Fcvtzu_V,
+ Fcvtzu_V_Fixed,
+ Fdiv_S,
+ Fdiv_V,
+ Fmadd_S,
+ Fmax_S,
+ Fmax_V,
+ Fmaxnm_S,
+ Fmaxnm_V,
+ Fmaxnmp_S,
+ Fmaxnmp_V,
+ Fmaxnmv_V,
+ Fmaxp_V,
+ Fmaxv_V,
+ Fmin_S,
+ Fmin_V,
+ Fminnm_S,
+ Fminnm_V,
+ Fminnmp_S,
+ Fminnmp_V,
+ Fminnmv_V,
+ Fminp_V,
+ Fminv_V,
+ Fmla_Se,
+ Fmla_V,
+ Fmla_Ve,
+ Fmls_Se,
+ Fmls_V,
+ Fmls_Ve,
+ Fmov_S,
+ Fmov_Si,
+ Fmov_Vi,
+ Fmov_Ftoi,
+ Fmov_Itof,
+ Fmov_Ftoi1,
+ Fmov_Itof1,
+ Fmsub_S,
+ Fmul_S,
+ Fmul_Se,
+ Fmul_V,
+ Fmul_Ve,
+ Fmulx_S,
+ Fmulx_Se,
+ Fmulx_V,
+ Fmulx_Ve,
+ Fneg_S,
+ Fneg_V,
+ Fnmadd_S,
+ Fnmsub_S,
+ Fnmul_S,
+ Frecpe_S,
+ Frecpe_V,
+ Frecps_S,
+ Frecps_V,
+ Frecpx_S,
+ Frinta_S,
+ Frinta_V,
+ Frinti_S,
+ Frinti_V,
+ Frintm_S,
+ Frintm_V,
+ Frintn_S,
+ Frintn_V,
+ Frintp_S,
+ Frintp_V,
+ Frintx_S,
+ Frintx_V,
+ Frintz_S,
+ Frintz_V,
+ Frsqrte_S,
+ Frsqrte_V,
+ Frsqrts_S,
+ Frsqrts_V,
+ Fsqrt_S,
+ Fsqrt_V,
+ Fsub_S,
+ Fsub_V,
+ Ins_Gp,
+ Ins_V,
+ Ld__Vms,
+ Ld__Vss,
+ Mla_V,
+ Mla_Ve,
+ Mls_V,
+ Mls_Ve,
+ Movi_V,
+ Mul_V,
+ Mul_Ve,
+ Mvni_V,
+ Neg_S,
+ Neg_V,
+ Not_V,
+ Orn_V,
+ Orr_V,
+ Orr_Vi,
+ Pmull_V,
+ Raddhn_V,
+ Rbit_V,
+ Rev16_V,
+ Rev32_V,
+ Rev64_V,
+ Rshrn_V,
+ Rsubhn_V,
+ Saba_V,
+ Sabal_V,
+ Sabd_V,
+ Sabdl_V,
+ Sadalp_V,
+ Saddl_V,
+ Saddlp_V,
+ Saddlv_V,
+ Saddw_V,
+ Scvtf_Gp,
+ Scvtf_Gp_Fixed,
+ Scvtf_S,
+ Scvtf_S_Fixed,
+ Scvtf_V,
+ Scvtf_V_Fixed,
+ Sha1c_V,
+ Sha1h_V,
+ Sha1m_V,
+ Sha1p_V,
+ Sha1su0_V,
+ Sha1su1_V,
+ Sha256h_V,
+ Sha256h2_V,
+ Sha256su0_V,
+ Sha256su1_V,
+ Shadd_V,
+ Shl_S,
+ Shl_V,
+ Shll_V,
+ Shrn_V,
+ Shsub_V,
+ Sli_S,
+ Sli_V,
+ Smax_V,
+ Smaxp_V,
+ Smaxv_V,
+ Smin_V,
+ Sminp_V,
+ Sminv_V,
+ Smlal_V,
+ Smlal_Ve,
+ Smlsl_V,
+ Smlsl_Ve,
+ Smov_S,
+ Smull_V,
+ Smull_Ve,
+ Sqabs_S,
+ Sqabs_V,
+ Sqadd_S,
+ Sqadd_V,
+ Sqdmulh_S,
+ Sqdmulh_V,
+ Sqdmulh_Ve,
+ Sqneg_S,
+ Sqneg_V,
+ Sqrdmulh_S,
+ Sqrdmulh_V,
+ Sqrdmulh_Ve,
+ Sqrshl_V,
+ Sqrshrn_S,
+ Sqrshrn_V,
+ Sqrshrun_S,
+ Sqrshrun_V,
+ Sqshl_V,
+ Sqshrn_S,
+ Sqshrn_V,
+ Sqshrun_S,
+ Sqshrun_V,
+ Sqsub_S,
+ Sqsub_V,
+ Sqxtn_S,
+ Sqxtn_V,
+ Sqxtun_S,
+ Sqxtun_V,
+ Srhadd_V,
+ Sri_S,
+ Sri_V,
+ Srshl_V,
+ Srshr_S,
+ Srshr_V,
+ Srsra_S,
+ Srsra_V,
+ Sshl_S,
+ Sshl_V,
+ Sshll_V,
+ Sshr_S,
+ Sshr_V,
+ Ssra_S,
+ Ssra_V,
+ Ssubl_V,
+ Ssubw_V,
+ St__Vms,
+ St__Vss,
+ Sub_S,
+ Sub_V,
+ Subhn_V,
+ Suqadd_S,
+ Suqadd_V,
+ Tbl_V,
+ Tbx_V,
+ Trn1_V,
+ Trn2_V,
+ Uaba_V,
+ Uabal_V,
+ Uabd_V,
+ Uabdl_V,
+ Uadalp_V,
+ Uaddl_V,
+ Uaddlp_V,
+ Uaddlv_V,
+ Uaddw_V,
+ Ucvtf_Gp,
+ Ucvtf_Gp_Fixed,
+ Ucvtf_S,
+ Ucvtf_S_Fixed,
+ Ucvtf_V,
+ Ucvtf_V_Fixed,
+ Uhadd_V,
+ Uhsub_V,
+ Umax_V,
+ Umaxp_V,
+ Umaxv_V,
+ Umin_V,
+ Uminp_V,
+ Uminv_V,
+ Umlal_V,
+ Umlal_Ve,
+ Umlsl_V,
+ Umlsl_Ve,
+ Umov_S,
+ Umull_V,
+ Umull_Ve,
+ Uqadd_S,
+ Uqadd_V,
+ Uqrshl_V,
+ Uqrshrn_S,
+ Uqrshrn_V,
+ Uqshl_V,
+ Uqshrn_S,
+ Uqshrn_V,
+ Uqsub_S,
+ Uqsub_V,
+ Uqxtn_S,
+ Uqxtn_V,
+ Urhadd_V,
+ Urshl_V,
+ Urshr_S,
+ Urshr_V,
+ Ursra_S,
+ Ursra_V,
+ Ushl_S,
+ Ushl_V,
+ Ushll_V,
+ Ushr_S,
+ Ushr_V,
+ Usqadd_S,
+ Usqadd_V,
+ Usra_S,
+ Usra_V,
+ Usubl_V,
+ Usubw_V,
+ Uzp1_V,
+ Uzp2_V,
+ Xtn_V,
+ Zip1_V,
+ Zip2_V,
+
+ // Base (AArch32)
+ Bfc,
+ Bfi,
+ Blx,
+ Bx,
+ Cmp,
+ Cmn,
+ Movt,
+ Mul,
+ Lda,
+ Ldab,
+ Ldaex,
+ Ldaexb,
+ Ldaexd,
+ Ldaexh,
+ Ldah,
+ Ldm,
+ Ldrb,
+ Ldrd,
+ Ldrex,
+ Ldrexb,
+ Ldrexd,
+ Ldrexh,
+ Ldrh,
+ Ldrsb,
+ Ldrsh,
+ Mcr,
+ Mla,
+ Mls,
+ Mov,
+ Mrc,
+ Mrrc,
+ Mvn,
+ Pkh,
+ Pld,
+ Pop,
+ Push,
+ Rev,
+ Revsh,
+ Rsb,
+ Rsc,
+ Sadd8,
+ Sbfx,
+ Shadd8,
+ Smla__,
+ Smlal,
+ Smlal__,
+ Smlaw_,
+ Smmla,
+ Smmls,
+ Smul__,
+ Smmul,
+ Ssub8,
+ Stl,
+ Stlb,
+ Stlex,
+ Stlexb,
+ Stlexd,
+ Stlexh,
+ Stlh,
+ Stm,
+ Strb,
+ Strd,
+ Strex,
+ Strexb,
+ Strexd,
+ Strexh,
+ Strh,
+ Sxtb16,
+ Tbb,
+ Tbh,
+ Teq,
+ Trap,
+ Tst,
+ Uadd8,
+ Ubfx,
+ Uhadd8,
+ Uhsub8,
+ Umaal,
+ Umlal,
+ Umull,
+ Usat,
+ Usat16,
+ Usub8,
+ Uxtb,
+ Uxtb16,
+ Uxth,
+
+ // FP & SIMD (AArch32)
+ Vabd,
+ Vabdl,
+ Vabs,
+ Vadd,
+ Vaddl,
+ Vaddw,
+ Vand,
+ Vbic,
+ Vbif,
+ Vbit,
+ Vbsl,
+ Vceq,
+ Vcge,
+ Vcgt,
+ Vcle,
+ Vclt,
+ Vcmp,
+ Vcmpe,
+ Vcnt,
+ Vcvt,
+ Vdiv,
+ Vdup,
+ Veor,
+ Vext,
+ Vfma,
+ Vfms,
+ Vfnma,
+ Vfnms,
+ Vhadd,
+ Vld1,
+ Vld2,
+ Vld3,
+ Vld4,
+ Vldm,
+ Vldr,
+ Vmax,
+ Vmaxnm,
+ Vmin,
+ Vminnm,
+ Vmla,
+ Vmlal,
+ Vmls,
+ Vmlsl,
+ Vmov,
+ Vmovl,
+ Vmovn,
+ Vmrs,
+ Vmsr,
+ Vmul,
+ Vmull,
+ Vmvn,
+ Vneg,
+ Vnmul,
+ Vnmla,
+ Vnmls,
+ Vorn,
+ Vorr,
+ Vpadd,
+ Vpaddl,
+ Vpmax,
+ Vpmin,
+ Vqadd,
+ Vqdmulh,
+ Vqmovn,
+ Vqmovun,
+ Vqrshrn,
+ Vqrshrun,
+ Vqshrn,
+ Vqshrun,
+ Vqsub,
+ Vrev,
+ Vrhadd,
+ Vrint,
+ Vrinta,
+ Vrintm,
+ Vrintn,
+ Vrintp,
+ Vrintx,
+ Vrshr,
+ Vrshrn,
+ Vsel,
+ Vshl,
+ Vshll,
+ Vshr,
+ Vshrn,
+ Vst1,
+ Vst2,
+ Vst3,
+ Vst4,
+ Vstm,
+ Vstr,
+ Vsqrt,
+ Vrecpe,
+ Vrecps,
+ Vrsqrte,
+ Vrsqrts,
+ Vrsra,
+ Vsra,
+ Vsub,
+ Vsubl,
+ Vsubw,
+ Vtbl,
+ Vtrn,
+ Vtst,
+ Vuzp,
+ Vzip,
+ }
+}
diff --git a/src/ARMeilleure/Instructions/NativeInterface.cs b/src/ARMeilleure/Instructions/NativeInterface.cs
new file mode 100644
index 00000000..2c35387a
--- /dev/null
+++ b/src/ARMeilleure/Instructions/NativeInterface.cs
@@ -0,0 +1,195 @@
+using ARMeilleure.Memory;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+
+namespace ARMeilleure.Instructions
+{
+ static class NativeInterface
+ {
+ private class ThreadContext
+ {
+ public ExecutionContext Context { get; }
+ public IMemoryManager Memory { get; }
+ public Translator Translator { get; }
+
+ public ThreadContext(ExecutionContext context, IMemoryManager memory, Translator translator)
+ {
+ Context = context;
+ Memory = memory;
+ Translator = translator;
+ }
+ }
+
+ [ThreadStatic]
+ private static ThreadContext Context;
+
+ public static void RegisterThread(ExecutionContext context, IMemoryManager memory, Translator translator)
+ {
+ Context = new ThreadContext(context, memory, translator);
+ }
+
+ public static void UnregisterThread()
+ {
+ Context = null;
+ }
+
+ public static void Break(ulong address, int imm)
+ {
+ Statistics.PauseTimer();
+
+ GetContext().OnBreak(address, imm);
+
+ Statistics.ResumeTimer();
+ }
+
+ public static void SupervisorCall(ulong address, int imm)
+ {
+ Statistics.PauseTimer();
+
+ GetContext().OnSupervisorCall(address, imm);
+
+ Statistics.ResumeTimer();
+ }
+
+ public static void Undefined(ulong address, int opCode)
+ {
+ Statistics.PauseTimer();
+
+ GetContext().OnUndefined(address, opCode);
+
+ Statistics.ResumeTimer();
+ }
+
+ #region "System registers"
+ public static ulong GetCtrEl0()
+ {
+ return (ulong)GetContext().CtrEl0;
+ }
+
+ public static ulong GetDczidEl0()
+ {
+ return (ulong)GetContext().DczidEl0;
+ }
+
+ public static ulong GetCntfrqEl0()
+ {
+ return GetContext().CntfrqEl0;
+ }
+
+ public static ulong GetCntpctEl0()
+ {
+ return GetContext().CntpctEl0;
+ }
+
+ public static ulong GetCntvctEl0()
+ {
+ return GetContext().CntvctEl0;
+ }
+ #endregion
+
+ #region "Read"
+ public static byte ReadByte(ulong address)
+ {
+ return GetMemoryManager().ReadTracked<byte>(address);
+ }
+
+ public static ushort ReadUInt16(ulong address)
+ {
+ return GetMemoryManager().ReadTracked<ushort>(address);
+ }
+
+ public static uint ReadUInt32(ulong address)
+ {
+ return GetMemoryManager().ReadTracked<uint>(address);
+ }
+
+ public static ulong ReadUInt64(ulong address)
+ {
+ return GetMemoryManager().ReadTracked<ulong>(address);
+ }
+
+ public static V128 ReadVector128(ulong address)
+ {
+ return GetMemoryManager().ReadTracked<V128>(address);
+ }
+ #endregion
+
+ #region "Write"
+ public static void WriteByte(ulong address, byte value)
+ {
+ GetMemoryManager().Write(address, value);
+ }
+
+ public static void WriteUInt16(ulong address, ushort value)
+ {
+ GetMemoryManager().Write(address, value);
+ }
+
+ public static void WriteUInt32(ulong address, uint value)
+ {
+ GetMemoryManager().Write(address, value);
+ }
+
+ public static void WriteUInt64(ulong address, ulong value)
+ {
+ GetMemoryManager().Write(address, value);
+ }
+
+ public static void WriteVector128(ulong address, V128 value)
+ {
+ GetMemoryManager().Write(address, value);
+ }
+ #endregion
+
+ public static void EnqueueForRejit(ulong address)
+ {
+ Context.Translator.EnqueueForRejit(address, GetContext().ExecutionMode);
+ }
+
+ public static void SignalMemoryTracking(ulong address, ulong size, bool write)
+ {
+ GetMemoryManager().SignalMemoryTracking(address, size, write);
+ }
+
+ public static void ThrowInvalidMemoryAccess(ulong address)
+ {
+ throw new InvalidAccessException(address);
+ }
+
+ public static ulong GetFunctionAddress(ulong address)
+ {
+ TranslatedFunction function = Context.Translator.GetOrTranslate(address, GetContext().ExecutionMode);
+
+ return (ulong)function.FuncPointer.ToInt64();
+ }
+
+ public static void InvalidateCacheLine(ulong address)
+ {
+ Context.Translator.InvalidateJitCacheRegion(address, InstEmit.DczSizeInBytes);
+ }
+
+ public static bool CheckSynchronization()
+ {
+ Statistics.PauseTimer();
+
+ ExecutionContext context = GetContext();
+
+ context.CheckInterrupt();
+
+ Statistics.ResumeTimer();
+
+ return context.Running;
+ }
+
+ public static ExecutionContext GetContext()
+ {
+ return Context.Context;
+ }
+
+ public static IMemoryManager GetMemoryManager()
+ {
+ return Context.Memory;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Instructions/SoftFallback.cs b/src/ARMeilleure/Instructions/SoftFallback.cs
new file mode 100644
index 00000000..06d76a67
--- /dev/null
+++ b/src/ARMeilleure/Instructions/SoftFallback.cs
@@ -0,0 +1,624 @@
+using ARMeilleure.State;
+using System;
+
+namespace ARMeilleure.Instructions
+{
+ static class SoftFallback
+ {
+#region "ShrImm64"
+ public static long SignedShrImm64(long value, long roundConst, int shift)
+ {
+ if (roundConst == 0L)
+ {
+ if (shift <= 63)
+ {
+ return value >> shift;
+ }
+ else /* if (shift == 64) */
+ {
+ if (value < 0L)
+ {
+ return -1L;
+ }
+ else /* if (value >= 0L) */
+ {
+ return 0L;
+ }
+ }
+ }
+ else /* if (roundConst == 1L << (shift - 1)) */
+ {
+ if (shift <= 63)
+ {
+ long add = value + roundConst;
+
+ if ((~value & (value ^ add)) < 0L)
+ {
+ return (long)((ulong)add >> shift);
+ }
+ else
+ {
+ return add >> shift;
+ }
+ }
+ else /* if (shift == 64) */
+ {
+ return 0L;
+ }
+ }
+ }
+
+ public static ulong UnsignedShrImm64(ulong value, long roundConst, int shift)
+ {
+ if (roundConst == 0L)
+ {
+ if (shift <= 63)
+ {
+ return value >> shift;
+ }
+ else /* if (shift == 64) */
+ {
+ return 0UL;
+ }
+ }
+ else /* if (roundConst == 1L << (shift - 1)) */
+ {
+ ulong add = value + (ulong)roundConst;
+
+ if ((add < value) && (add < (ulong)roundConst))
+ {
+ if (shift <= 63)
+ {
+ return (add >> shift) | (0x8000000000000000UL >> (shift - 1));
+ }
+ else /* if (shift == 64) */
+ {
+ return 1UL;
+ }
+ }
+ else
+ {
+ if (shift <= 63)
+ {
+ return add >> shift;
+ }
+ else /* if (shift == 64) */
+ {
+ return 0UL;
+ }
+ }
+ }
+ }
+#endregion
+
+#region "Saturation"
+ public static int SatF32ToS32(float value)
+ {
+ if (float.IsNaN(value)) return 0;
+
+ return value >= int.MaxValue ? int.MaxValue :
+ value <= int.MinValue ? int.MinValue : (int)value;
+ }
+
+ public static long SatF32ToS64(float value)
+ {
+ if (float.IsNaN(value)) return 0;
+
+ return value >= long.MaxValue ? long.MaxValue :
+ value <= long.MinValue ? long.MinValue : (long)value;
+ }
+
+ public static uint SatF32ToU32(float value)
+ {
+ if (float.IsNaN(value)) return 0;
+
+ return value >= uint.MaxValue ? uint.MaxValue :
+ value <= uint.MinValue ? uint.MinValue : (uint)value;
+ }
+
+ public static ulong SatF32ToU64(float value)
+ {
+ if (float.IsNaN(value)) return 0;
+
+ return value >= ulong.MaxValue ? ulong.MaxValue :
+ value <= ulong.MinValue ? ulong.MinValue : (ulong)value;
+ }
+
+ public static int SatF64ToS32(double value)
+ {
+ if (double.IsNaN(value)) return 0;
+
+ return value >= int.MaxValue ? int.MaxValue :
+ value <= int.MinValue ? int.MinValue : (int)value;
+ }
+
+ public static long SatF64ToS64(double value)
+ {
+ if (double.IsNaN(value)) return 0;
+
+ return value >= long.MaxValue ? long.MaxValue :
+ value <= long.MinValue ? long.MinValue : (long)value;
+ }
+
+ public static uint SatF64ToU32(double value)
+ {
+ if (double.IsNaN(value)) return 0;
+
+ return value >= uint.MaxValue ? uint.MaxValue :
+ value <= uint.MinValue ? uint.MinValue : (uint)value;
+ }
+
+ public static ulong SatF64ToU64(double value)
+ {
+ if (double.IsNaN(value)) return 0;
+
+ return value >= ulong.MaxValue ? ulong.MaxValue :
+ value <= ulong.MinValue ? ulong.MinValue : (ulong)value;
+ }
+#endregion
+
+#region "Count"
+ public static ulong CountLeadingSigns(ulong value, int size) // size is 8, 16, 32 or 64 (SIMD&FP or Base Inst.).
+ {
+ value ^= value >> 1;
+
+ int highBit = size - 2;
+
+ for (int bit = highBit; bit >= 0; bit--)
+ {
+ if (((int)(value >> bit) & 0b1) != 0)
+ {
+ return (ulong)(highBit - bit);
+ }
+ }
+
+ return (ulong)(size - 1);
+ }
+
+ private static ReadOnlySpan<byte> ClzNibbleTbl => new byte[] { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
+
+ public static ulong CountLeadingZeros(ulong value, int size) // size is 8, 16, 32 or 64 (SIMD&FP or Base Inst.).
+ {
+ if (value == 0ul)
+ {
+ return (ulong)size;
+ }
+
+ int nibbleIdx = size;
+ int preCount, count = 0;
+
+ do
+ {
+ nibbleIdx -= 4;
+ preCount = ClzNibbleTbl[(int)(value >> nibbleIdx) & 0b1111];
+ count += preCount;
+ }
+ while (preCount == 4);
+
+ return (ulong)count;
+ }
+#endregion
+
+#region "Table"
+ public static V128 Tbl1(V128 vector, int bytes, V128 tb0)
+ {
+ return TblOrTbx(default, vector, bytes, tb0);
+ }
+
+ public static V128 Tbl2(V128 vector, int bytes, V128 tb0, V128 tb1)
+ {
+ return TblOrTbx(default, vector, bytes, tb0, tb1);
+ }
+
+ public static V128 Tbl3(V128 vector, int bytes, V128 tb0, V128 tb1, V128 tb2)
+ {
+ return TblOrTbx(default, vector, bytes, tb0, tb1, tb2);
+ }
+
+ public static V128 Tbl4(V128 vector, int bytes, V128 tb0, V128 tb1, V128 tb2, V128 tb3)
+ {
+ return TblOrTbx(default, vector, bytes, tb0, tb1, tb2, tb3);
+ }
+
+ public static V128 Tbx1(V128 dest, V128 vector, int bytes, V128 tb0)
+ {
+ return TblOrTbx(dest, vector, bytes, tb0);
+ }
+
+ public static V128 Tbx2(V128 dest, V128 vector, int bytes, V128 tb0, V128 tb1)
+ {
+ return TblOrTbx(dest, vector, bytes, tb0, tb1);
+ }
+
+ public static V128 Tbx3(V128 dest, V128 vector, int bytes, V128 tb0, V128 tb1, V128 tb2)
+ {
+ return TblOrTbx(dest, vector, bytes, tb0, tb1, tb2);
+ }
+
+ public static V128 Tbx4(V128 dest, V128 vector, int bytes, V128 tb0, V128 tb1, V128 tb2, V128 tb3)
+ {
+ return TblOrTbx(dest, vector, bytes, tb0, tb1, tb2, tb3);
+ }
+
+ private static V128 TblOrTbx(V128 dest, V128 vector, int bytes, params V128[] tb)
+ {
+ byte[] res = new byte[16];
+
+ if (dest != default)
+ {
+ Buffer.BlockCopy(dest.ToArray(), 0, res, 0, bytes);
+ }
+
+ byte[] table = new byte[tb.Length * 16];
+
+ for (byte index = 0; index < tb.Length; index++)
+ {
+ Buffer.BlockCopy(tb[index].ToArray(), 0, table, index * 16, 16);
+ }
+
+ byte[] v = vector.ToArray();
+
+ for (byte index = 0; index < bytes; index++)
+ {
+ byte tblIndex = v[index];
+
+ if (tblIndex < table.Length)
+ {
+ res[index] = table[tblIndex];
+ }
+ }
+
+ return new V128(res);
+ }
+#endregion
+
+#region "Crc32"
+ private const uint Crc32RevPoly = 0xedb88320;
+ private const uint Crc32cRevPoly = 0x82f63b78;
+
+ public static uint Crc32b(uint crc, byte value) => Crc32 (crc, Crc32RevPoly, value);
+ public static uint Crc32h(uint crc, ushort value) => Crc32h(crc, Crc32RevPoly, value);
+ public static uint Crc32w(uint crc, uint value) => Crc32w(crc, Crc32RevPoly, value);
+ public static uint Crc32x(uint crc, ulong value) => Crc32x(crc, Crc32RevPoly, value);
+
+ public static uint Crc32cb(uint crc, byte value) => Crc32 (crc, Crc32cRevPoly, value);
+ public static uint Crc32ch(uint crc, ushort value) => Crc32h(crc, Crc32cRevPoly, value);
+ public static uint Crc32cw(uint crc, uint value) => Crc32w(crc, Crc32cRevPoly, value);
+ public static uint Crc32cx(uint crc, ulong value) => Crc32x(crc, Crc32cRevPoly, value);
+
+ private static uint Crc32h(uint crc, uint poly, ushort val)
+ {
+ crc = Crc32(crc, poly, (byte)(val >> 0));
+ crc = Crc32(crc, poly, (byte)(val >> 8));
+
+ return crc;
+ }
+
+ private static uint Crc32w(uint crc, uint poly, uint val)
+ {
+ crc = Crc32(crc, poly, (byte)(val >> 0));
+ crc = Crc32(crc, poly, (byte)(val >> 8));
+ crc = Crc32(crc, poly, (byte)(val >> 16));
+ crc = Crc32(crc, poly, (byte)(val >> 24));
+
+ return crc;
+ }
+
+ private static uint Crc32x(uint crc, uint poly, ulong val)
+ {
+ crc = Crc32(crc, poly, (byte)(val >> 0));
+ crc = Crc32(crc, poly, (byte)(val >> 8));
+ crc = Crc32(crc, poly, (byte)(val >> 16));
+ crc = Crc32(crc, poly, (byte)(val >> 24));
+ crc = Crc32(crc, poly, (byte)(val >> 32));
+ crc = Crc32(crc, poly, (byte)(val >> 40));
+ crc = Crc32(crc, poly, (byte)(val >> 48));
+ crc = Crc32(crc, poly, (byte)(val >> 56));
+
+ return crc;
+ }
+
+ private static uint Crc32(uint crc, uint poly, byte val)
+ {
+ crc ^= val;
+
+ for (int bit = 7; bit >= 0; bit--)
+ {
+ uint mask = (uint)(-(int)(crc & 1));
+
+ crc = (crc >> 1) ^ (poly & mask);
+ }
+
+ return crc;
+ }
+#endregion
+
+#region "Aes"
+ public static V128 Decrypt(V128 value, V128 roundKey)
+ {
+ return CryptoHelper.AesInvSubBytes(CryptoHelper.AesInvShiftRows(value ^ roundKey));
+ }
+
+ public static V128 Encrypt(V128 value, V128 roundKey)
+ {
+ return CryptoHelper.AesSubBytes(CryptoHelper.AesShiftRows(value ^ roundKey));
+ }
+
+ public static V128 InverseMixColumns(V128 value)
+ {
+ return CryptoHelper.AesInvMixColumns(value);
+ }
+
+ public static V128 MixColumns(V128 value)
+ {
+ return CryptoHelper.AesMixColumns(value);
+ }
+#endregion
+
+#region "Sha1"
+ public static V128 HashChoose(V128 hash_abcd, uint hash_e, V128 wk)
+ {
+ for (int e = 0; e <= 3; e++)
+ {
+ uint t = ShaChoose(hash_abcd.Extract<uint>(1),
+ hash_abcd.Extract<uint>(2),
+ hash_abcd.Extract<uint>(3));
+
+ hash_e += Rol(hash_abcd.Extract<uint>(0), 5) + t + wk.Extract<uint>(e);
+
+ t = Rol(hash_abcd.Extract<uint>(1), 30);
+
+ hash_abcd.Insert(1, t);
+
+ Rol32_160(ref hash_e, ref hash_abcd);
+ }
+
+ return hash_abcd;
+ }
+
+ public static uint FixedRotate(uint hash_e)
+ {
+ return hash_e.Rol(30);
+ }
+
+ public static V128 HashMajority(V128 hash_abcd, uint hash_e, V128 wk)
+ {
+ for (int e = 0; e <= 3; e++)
+ {
+ uint t = ShaMajority(hash_abcd.Extract<uint>(1),
+ hash_abcd.Extract<uint>(2),
+ hash_abcd.Extract<uint>(3));
+
+ hash_e += Rol(hash_abcd.Extract<uint>(0), 5) + t + wk.Extract<uint>(e);
+
+ t = Rol(hash_abcd.Extract<uint>(1), 30);
+
+ hash_abcd.Insert(1, t);
+
+ Rol32_160(ref hash_e, ref hash_abcd);
+ }
+
+ return hash_abcd;
+ }
+
+ public static V128 HashParity(V128 hash_abcd, uint hash_e, V128 wk)
+ {
+ for (int e = 0; e <= 3; e++)
+ {
+ uint t = ShaParity(hash_abcd.Extract<uint>(1),
+ hash_abcd.Extract<uint>(2),
+ hash_abcd.Extract<uint>(3));
+
+ hash_e += Rol(hash_abcd.Extract<uint>(0), 5) + t + wk.Extract<uint>(e);
+
+ t = Rol(hash_abcd.Extract<uint>(1), 30);
+
+ hash_abcd.Insert(1, t);
+
+ Rol32_160(ref hash_e, ref hash_abcd);
+ }
+
+ return hash_abcd;
+ }
+
+ public static V128 Sha1SchedulePart1(V128 w0_3, V128 w4_7, V128 w8_11)
+ {
+ ulong t2 = w4_7.Extract<ulong>(0);
+ ulong t1 = w0_3.Extract<ulong>(1);
+
+ V128 result = new V128(t1, t2);
+
+ return result ^ (w0_3 ^ w8_11);
+ }
+
+ public static V128 Sha1SchedulePart2(V128 tw0_3, V128 w12_15)
+ {
+ V128 t = tw0_3 ^ (w12_15 >> 32);
+
+ uint tE0 = t.Extract<uint>(0);
+ uint tE1 = t.Extract<uint>(1);
+ uint tE2 = t.Extract<uint>(2);
+ uint tE3 = t.Extract<uint>(3);
+
+ return new V128(tE0.Rol(1), tE1.Rol(1), tE2.Rol(1), tE3.Rol(1) ^ tE0.Rol(2));
+ }
+
+ private static void Rol32_160(ref uint y, ref V128 x)
+ {
+ uint xE3 = x.Extract<uint>(3);
+
+ x <<= 32;
+ x.Insert(0, y);
+
+ y = xE3;
+ }
+
+ private static uint ShaChoose(uint x, uint y, uint z)
+ {
+ return ((y ^ z) & x) ^ z;
+ }
+
+ private static uint ShaMajority(uint x, uint y, uint z)
+ {
+ return (x & y) | ((x | y) & z);
+ }
+
+ private static uint ShaParity(uint x, uint y, uint z)
+ {
+ return x ^ y ^ z;
+ }
+
+ private static uint Rol(this uint value, int count)
+ {
+ return (value << count) | (value >> (32 - count));
+ }
+#endregion
+
+#region "Sha256"
+ public static V128 HashLower(V128 hash_abcd, V128 hash_efgh, V128 wk)
+ {
+ return Sha256Hash(hash_abcd, hash_efgh, wk, part1: true);
+ }
+
+ public static V128 HashUpper(V128 hash_abcd, V128 hash_efgh, V128 wk)
+ {
+ return Sha256Hash(hash_abcd, hash_efgh, wk, part1: false);
+ }
+
+ public static V128 Sha256SchedulePart1(V128 w0_3, V128 w4_7)
+ {
+ V128 result = new V128();
+
+ for (int e = 0; e <= 3; e++)
+ {
+ uint elt = (e <= 2 ? w0_3 : w4_7).Extract<uint>(e <= 2 ? e + 1 : 0);
+
+ elt = elt.Ror(7) ^ elt.Ror(18) ^ elt.Lsr(3);
+
+ elt += w0_3.Extract<uint>(e);
+
+ result.Insert(e, elt);
+ }
+
+ return result;
+ }
+
+ public static V128 Sha256SchedulePart2(V128 w0_3, V128 w8_11, V128 w12_15)
+ {
+ V128 result = new V128();
+
+ ulong t1 = w12_15.Extract<ulong>(1);
+
+ for (int e = 0; e <= 1; e++)
+ {
+ uint elt = t1.ULongPart(e);
+
+ elt = elt.Ror(17) ^ elt.Ror(19) ^ elt.Lsr(10);
+
+ elt += w0_3.Extract<uint>(e) + w8_11.Extract<uint>(e + 1);
+
+ result.Insert(e, elt);
+ }
+
+ t1 = result.Extract<ulong>(0);
+
+ for (int e = 2; e <= 3; e++)
+ {
+ uint elt = t1.ULongPart(e - 2);
+
+ elt = elt.Ror(17) ^ elt.Ror(19) ^ elt.Lsr(10);
+
+ elt += w0_3.Extract<uint>(e) + (e == 2 ? w8_11 : w12_15).Extract<uint>(e == 2 ? 3 : 0);
+
+ result.Insert(e, elt);
+ }
+
+ return result;
+ }
+
+ private static V128 Sha256Hash(V128 x, V128 y, V128 w, bool part1)
+ {
+ for (int e = 0; e <= 3; e++)
+ {
+ uint chs = ShaChoose(y.Extract<uint>(0),
+ y.Extract<uint>(1),
+ y.Extract<uint>(2));
+
+ uint maj = ShaMajority(x.Extract<uint>(0),
+ x.Extract<uint>(1),
+ x.Extract<uint>(2));
+
+ uint t1 = y.Extract<uint>(3) + ShaHashSigma1(y.Extract<uint>(0)) + chs + w.Extract<uint>(e);
+
+ uint t2 = t1 + x.Extract<uint>(3);
+
+ x.Insert(3, t2);
+
+ t2 = t1 + ShaHashSigma0(x.Extract<uint>(0)) + maj;
+
+ y.Insert(3, t2);
+
+ Rol32_256(ref y, ref x);
+ }
+
+ return part1 ? x : y;
+ }
+
+ private static void Rol32_256(ref V128 y, ref V128 x)
+ {
+ uint yE3 = y.Extract<uint>(3);
+ uint xE3 = x.Extract<uint>(3);
+
+ y <<= 32;
+ x <<= 32;
+
+ y.Insert(0, xE3);
+ x.Insert(0, yE3);
+ }
+
+ private static uint ShaHashSigma0(uint x)
+ {
+ return x.Ror(2) ^ x.Ror(13) ^ x.Ror(22);
+ }
+
+ private static uint ShaHashSigma1(uint x)
+ {
+ return x.Ror(6) ^ x.Ror(11) ^ x.Ror(25);
+ }
+
+ private static uint Ror(this uint value, int count)
+ {
+ return (value >> count) | (value << (32 - count));
+ }
+
+ private static uint Lsr(this uint value, int count)
+ {
+ return value >> count;
+ }
+
+ private static uint ULongPart(this ulong value, int part)
+ {
+ return part == 0
+ ? (uint)(value & 0xFFFFFFFFUL)
+ : (uint)(value >> 32);
+ }
+#endregion
+
+ public static V128 PolynomialMult64_128(ulong op1, ulong op2)
+ {
+ V128 result = V128.Zero;
+
+ V128 op2_128 = new V128(op2, 0);
+
+ for (int i = 0; i < 64; i++)
+ {
+ if (((op1 >> i) & 1) == 1)
+ {
+ result ^= op2_128 << i;
+ }
+ }
+
+ return result;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/SoftFloat.cs b/src/ARMeilleure/Instructions/SoftFloat.cs
new file mode 100644
index 00000000..9e3db68d
--- /dev/null
+++ b/src/ARMeilleure/Instructions/SoftFloat.cs
@@ -0,0 +1,3480 @@
+using ARMeilleure.State;
+using System;
+using System.Diagnostics;
+
+namespace ARMeilleure.Instructions
+{
+ static class SoftFloat
+ {
+ static SoftFloat()
+ {
+ RecipEstimateTable = BuildRecipEstimateTable();
+ RecipSqrtEstimateTable = BuildRecipSqrtEstimateTable();
+ }
+
+ public static readonly byte[] RecipEstimateTable;
+ public static readonly byte[] RecipSqrtEstimateTable;
+
+ private static byte[] BuildRecipEstimateTable()
+ {
+ byte[] tbl = new byte[256];
+
+ for (int idx = 0; idx < 256; idx++)
+ {
+ uint src = (uint)idx + 256u;
+
+ Debug.Assert(256u <= src && src < 512u);
+
+ src = (src << 1) + 1u;
+
+ uint aux = (1u << 19) / src;
+
+ uint dst = (aux + 1u) >> 1;
+
+ Debug.Assert(256u <= dst && dst < 512u);
+
+ tbl[idx] = (byte)(dst - 256u);
+ }
+
+ return tbl;
+ }
+
+ private static byte[] BuildRecipSqrtEstimateTable()
+ {
+ byte[] tbl = new byte[384];
+
+ for (int idx = 0; idx < 384; idx++)
+ {
+ uint src = (uint)idx + 128u;
+
+ Debug.Assert(128u <= src && src < 512u);
+
+ if (src < 256u)
+ {
+ src = (src << 1) + 1u;
+ }
+ else
+ {
+ src = (src >> 1) << 1;
+ src = (src + 1u) << 1;
+ }
+
+ uint aux = 512u;
+
+ while (src * (aux + 1u) * (aux + 1u) < (1u << 28))
+ {
+ aux = aux + 1u;
+ }
+
+ uint dst = (aux + 1u) >> 1;
+
+ Debug.Assert(256u <= dst && dst < 512u);
+
+ tbl[idx] = (byte)(dst - 256u);
+ }
+
+ return tbl;
+ }
+
+ public static void FPProcessException(FPException exc, ExecutionContext context)
+ {
+ FPProcessException(exc, context, context.Fpcr);
+ }
+
+ public static void FPProcessException(FPException exc, ExecutionContext context, FPCR fpcr)
+ {
+ int enable = (int)exc + 8;
+
+ if ((fpcr & (FPCR)(1 << enable)) != 0)
+ {
+ throw new NotImplementedException("Floating-point trap handling.");
+ }
+ else
+ {
+ context.Fpsr |= (FPSR)(1 << (int)exc);
+ }
+ }
+
+ public static FPRoundingMode GetRoundingMode(this FPCR fpcr)
+ {
+ const int RModeShift = 22;
+
+ return (FPRoundingMode)(((uint)fpcr >> RModeShift) & 3u);
+ }
+ }
+
+ static class SoftFloat16
+ {
+ public static ushort FPDefaultNaN()
+ {
+ return (ushort)0x7E00u;
+ }
+
+ public static ushort FPInfinity(bool sign)
+ {
+ return sign ? (ushort)0xFC00u : (ushort)0x7C00u;
+ }
+
+ public static ushort FPZero(bool sign)
+ {
+ return sign ? (ushort)0x8000u : (ushort)0x0000u;
+ }
+
+ public static ushort FPMaxNormal(bool sign)
+ {
+ return sign ? (ushort)0xFBFFu : (ushort)0x7BFFu;
+ }
+
+ public static double FPUnpackCv(
+ this ushort valueBits,
+ out FPType type,
+ out bool sign,
+ ExecutionContext context)
+ {
+ sign = (~(uint)valueBits & 0x8000u) == 0u;
+
+ uint exp16 = ((uint)valueBits & 0x7C00u) >> 10;
+ uint frac16 = (uint)valueBits & 0x03FFu;
+
+ double real;
+
+ if (exp16 == 0u)
+ {
+ if (frac16 == 0u)
+ {
+ type = FPType.Zero;
+ real = 0d;
+ }
+ else
+ {
+ type = FPType.Nonzero; // Subnormal.
+ real = Math.Pow(2d, -14) * ((double)frac16 * Math.Pow(2d, -10));
+ }
+ }
+ else if (exp16 == 0x1Fu && (context.Fpcr & FPCR.Ahp) == 0)
+ {
+ if (frac16 == 0u)
+ {
+ type = FPType.Infinity;
+ real = Math.Pow(2d, 1000);
+ }
+ else
+ {
+ type = (~frac16 & 0x0200u) == 0u ? FPType.QNaN : FPType.SNaN;
+ real = 0d;
+ }
+ }
+ else
+ {
+ type = FPType.Nonzero; // Normal.
+ real = Math.Pow(2d, (int)exp16 - 15) * (1d + (double)frac16 * Math.Pow(2d, -10));
+ }
+
+ return sign ? -real : real;
+ }
+
+ public static ushort FPRoundCv(double real, ExecutionContext context)
+ {
+ const int minimumExp = -14;
+
+ const int e = 5;
+ const int f = 10;
+
+ bool sign;
+ double mantissa;
+
+ if (real < 0d)
+ {
+ sign = true;
+ mantissa = -real;
+ }
+ else
+ {
+ sign = false;
+ mantissa = real;
+ }
+
+ int exponent = 0;
+
+ while (mantissa < 1d)
+ {
+ mantissa *= 2d;
+ exponent--;
+ }
+
+ while (mantissa >= 2d)
+ {
+ mantissa /= 2d;
+ exponent++;
+ }
+
+ uint biasedExp = (uint)Math.Max(exponent - minimumExp + 1, 0);
+
+ if (biasedExp == 0u)
+ {
+ mantissa /= Math.Pow(2d, minimumExp - exponent);
+ }
+
+ uint intMant = (uint)Math.Floor(mantissa * Math.Pow(2d, f));
+ double error = mantissa * Math.Pow(2d, f) - (double)intMant;
+
+ if (biasedExp == 0u && (error != 0d || (context.Fpcr & FPCR.Ufe) != 0))
+ {
+ SoftFloat.FPProcessException(FPException.Underflow, context);
+ }
+
+ bool overflowToInf;
+ bool roundUp;
+
+ switch (context.Fpcr.GetRoundingMode())
+ {
+ default:
+ case FPRoundingMode.ToNearest:
+ roundUp = (error > 0.5d || (error == 0.5d && (intMant & 1u) == 1u));
+ overflowToInf = true;
+ break;
+
+ case FPRoundingMode.TowardsPlusInfinity:
+ roundUp = (error != 0d && !sign);
+ overflowToInf = !sign;
+ break;
+
+ case FPRoundingMode.TowardsMinusInfinity:
+ roundUp = (error != 0d && sign);
+ overflowToInf = sign;
+ break;
+
+ case FPRoundingMode.TowardsZero:
+ roundUp = false;
+ overflowToInf = false;
+ break;
+ }
+
+ if (roundUp)
+ {
+ intMant++;
+
+ if (intMant == 1u << f)
+ {
+ biasedExp = 1u;
+ }
+
+ if (intMant == 1u << (f + 1))
+ {
+ biasedExp++;
+ intMant >>= 1;
+ }
+ }
+
+ ushort resultBits;
+
+ if ((context.Fpcr & FPCR.Ahp) == 0)
+ {
+ if (biasedExp >= (1u << e) - 1u)
+ {
+ resultBits = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign);
+
+ SoftFloat.FPProcessException(FPException.Overflow, context);
+
+ error = 1d;
+ }
+ else
+ {
+ resultBits = (ushort)((sign ? 1u : 0u) << 15 | (biasedExp & 0x1Fu) << 10 | (intMant & 0x03FFu));
+ }
+ }
+ else
+ {
+ if (biasedExp >= 1u << e)
+ {
+ resultBits = (ushort)((sign ? 1u : 0u) << 15 | 0x7FFFu);
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context);
+
+ error = 0d;
+ }
+ else
+ {
+ resultBits = (ushort)((sign ? 1u : 0u) << 15 | (biasedExp & 0x1Fu) << 10 | (intMant & 0x03FFu));
+ }
+ }
+
+ if (error != 0d)
+ {
+ SoftFloat.FPProcessException(FPException.Inexact, context);
+ }
+
+ return resultBits;
+ }
+ }
+
+ static class SoftFloat16_32
+ {
+ public static float FPConvert(ushort valueBits)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ double real = valueBits.FPUnpackCv(out FPType type, out bool sign, context);
+
+ float result;
+
+ if (type == FPType.SNaN || type == FPType.QNaN)
+ {
+ if ((context.Fpcr & FPCR.Dn) != 0)
+ {
+ result = SoftFloat32.FPDefaultNaN();
+ }
+ else
+ {
+ result = FPConvertNaN(valueBits);
+ }
+
+ if (type == FPType.SNaN)
+ {
+ SoftFloat.FPProcessException(FPException.InvalidOp, context);
+ }
+ }
+ else if (type == FPType.Infinity)
+ {
+ result = SoftFloat32.FPInfinity(sign);
+ }
+ else if (type == FPType.Zero)
+ {
+ result = SoftFloat32.FPZero(sign);
+ }
+ else
+ {
+ result = FPRoundCv(real, context);
+ }
+
+ return result;
+ }
+
+ private static float FPRoundCv(double real, ExecutionContext context)
+ {
+ const int minimumExp = -126;
+
+ const int e = 8;
+ const int f = 23;
+
+ bool sign;
+ double mantissa;
+
+ if (real < 0d)
+ {
+ sign = true;
+ mantissa = -real;
+ }
+ else
+ {
+ sign = false;
+ mantissa = real;
+ }
+
+ int exponent = 0;
+
+ while (mantissa < 1d)
+ {
+ mantissa *= 2d;
+ exponent--;
+ }
+
+ while (mantissa >= 2d)
+ {
+ mantissa /= 2d;
+ exponent++;
+ }
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && exponent < minimumExp)
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ return SoftFloat32.FPZero(sign);
+ }
+
+ uint biasedExp = (uint)Math.Max(exponent - minimumExp + 1, 0);
+
+ if (biasedExp == 0u)
+ {
+ mantissa /= Math.Pow(2d, minimumExp - exponent);
+ }
+
+ uint intMant = (uint)Math.Floor(mantissa * Math.Pow(2d, f));
+ double error = mantissa * Math.Pow(2d, f) - (double)intMant;
+
+ if (biasedExp == 0u && (error != 0d || (context.Fpcr & FPCR.Ufe) != 0))
+ {
+ SoftFloat.FPProcessException(FPException.Underflow, context);
+ }
+
+ bool overflowToInf;
+ bool roundUp;
+
+ switch (context.Fpcr.GetRoundingMode())
+ {
+ default:
+ case FPRoundingMode.ToNearest:
+ roundUp = (error > 0.5d || (error == 0.5d && (intMant & 1u) == 1u));
+ overflowToInf = true;
+ break;
+
+ case FPRoundingMode.TowardsPlusInfinity:
+ roundUp = (error != 0d && !sign);
+ overflowToInf = !sign;
+ break;
+
+ case FPRoundingMode.TowardsMinusInfinity:
+ roundUp = (error != 0d && sign);
+ overflowToInf = sign;
+ break;
+
+ case FPRoundingMode.TowardsZero:
+ roundUp = false;
+ overflowToInf = false;
+ break;
+ }
+
+ if (roundUp)
+ {
+ intMant++;
+
+ if (intMant == 1u << f)
+ {
+ biasedExp = 1u;
+ }
+
+ if (intMant == 1u << (f + 1))
+ {
+ biasedExp++;
+ intMant >>= 1;
+ }
+ }
+
+ float result;
+
+ if (biasedExp >= (1u << e) - 1u)
+ {
+ result = overflowToInf ? SoftFloat32.FPInfinity(sign) : SoftFloat32.FPMaxNormal(sign);
+
+ SoftFloat.FPProcessException(FPException.Overflow, context);
+
+ error = 1d;
+ }
+ else
+ {
+ result = BitConverter.Int32BitsToSingle(
+ (int)((sign ? 1u : 0u) << 31 | (biasedExp & 0xFFu) << 23 | (intMant & 0x007FFFFFu)));
+ }
+
+ if (error != 0d)
+ {
+ SoftFloat.FPProcessException(FPException.Inexact, context);
+ }
+
+ return result;
+ }
+
+ private static float FPConvertNaN(ushort valueBits)
+ {
+ return BitConverter.Int32BitsToSingle(
+ (int)(((uint)valueBits & 0x8000u) << 16 | 0x7FC00000u | ((uint)valueBits & 0x01FFu) << 13));
+ }
+ }
+
+ static class SoftFloat16_64
+ {
+ public static double FPConvert(ushort valueBits)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ double real = valueBits.FPUnpackCv(out FPType type, out bool sign, context);
+
+ double result;
+
+ if (type == FPType.SNaN || type == FPType.QNaN)
+ {
+ if ((context.Fpcr & FPCR.Dn) != 0)
+ {
+ result = SoftFloat64.FPDefaultNaN();
+ }
+ else
+ {
+ result = FPConvertNaN(valueBits);
+ }
+
+ if (type == FPType.SNaN)
+ {
+ SoftFloat.FPProcessException(FPException.InvalidOp, context);
+ }
+ }
+ else if (type == FPType.Infinity)
+ {
+ result = SoftFloat64.FPInfinity(sign);
+ }
+ else if (type == FPType.Zero)
+ {
+ result = SoftFloat64.FPZero(sign);
+ }
+ else
+ {
+ result = FPRoundCv(real, context);
+ }
+
+ return result;
+ }
+
+ private static double FPRoundCv(double real, ExecutionContext context)
+ {
+ const int minimumExp = -1022;
+
+ const int e = 11;
+ const int f = 52;
+
+ bool sign;
+ double mantissa;
+
+ if (real < 0d)
+ {
+ sign = true;
+ mantissa = -real;
+ }
+ else
+ {
+ sign = false;
+ mantissa = real;
+ }
+
+ int exponent = 0;
+
+ while (mantissa < 1d)
+ {
+ mantissa *= 2d;
+ exponent--;
+ }
+
+ while (mantissa >= 2d)
+ {
+ mantissa /= 2d;
+ exponent++;
+ }
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && exponent < minimumExp)
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ return SoftFloat64.FPZero(sign);
+ }
+
+ uint biasedExp = (uint)Math.Max(exponent - minimumExp + 1, 0);
+
+ if (biasedExp == 0u)
+ {
+ mantissa /= Math.Pow(2d, minimumExp - exponent);
+ }
+
+ ulong intMant = (ulong)Math.Floor(mantissa * Math.Pow(2d, f));
+ double error = mantissa * Math.Pow(2d, f) - (double)intMant;
+
+ if (biasedExp == 0u && (error != 0d || (context.Fpcr & FPCR.Ufe) != 0))
+ {
+ SoftFloat.FPProcessException(FPException.Underflow, context);
+ }
+
+ bool overflowToInf;
+ bool roundUp;
+
+ switch (context.Fpcr.GetRoundingMode())
+ {
+ default:
+ case FPRoundingMode.ToNearest:
+ roundUp = (error > 0.5d || (error == 0.5d && (intMant & 1u) == 1u));
+ overflowToInf = true;
+ break;
+
+ case FPRoundingMode.TowardsPlusInfinity:
+ roundUp = (error != 0d && !sign);
+ overflowToInf = !sign;
+ break;
+
+ case FPRoundingMode.TowardsMinusInfinity:
+ roundUp = (error != 0d && sign);
+ overflowToInf = sign;
+ break;
+
+ case FPRoundingMode.TowardsZero:
+ roundUp = false;
+ overflowToInf = false;
+ break;
+ }
+
+ if (roundUp)
+ {
+ intMant++;
+
+ if (intMant == 1ul << f)
+ {
+ biasedExp = 1u;
+ }
+
+ if (intMant == 1ul << (f + 1))
+ {
+ biasedExp++;
+ intMant >>= 1;
+ }
+ }
+
+ double result;
+
+ if (biasedExp >= (1u << e) - 1u)
+ {
+ result = overflowToInf ? SoftFloat64.FPInfinity(sign) : SoftFloat64.FPMaxNormal(sign);
+
+ SoftFloat.FPProcessException(FPException.Overflow, context);
+
+ error = 1d;
+ }
+ else
+ {
+ result = BitConverter.Int64BitsToDouble(
+ (long)((sign ? 1ul : 0ul) << 63 | (biasedExp & 0x7FFul) << 52 | (intMant & 0x000FFFFFFFFFFFFFul)));
+ }
+
+ if (error != 0d)
+ {
+ SoftFloat.FPProcessException(FPException.Inexact, context);
+ }
+
+ return result;
+ }
+
+ private static double FPConvertNaN(ushort valueBits)
+ {
+ return BitConverter.Int64BitsToDouble(
+ (long)(((ulong)valueBits & 0x8000ul) << 48 | 0x7FF8000000000000ul | ((ulong)valueBits & 0x01FFul) << 42));
+ }
+ }
+
+ static class SoftFloat32_16
+ {
+ public static ushort FPConvert(float value)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ double real = value.FPUnpackCv(out FPType type, out bool sign, out uint valueBits, context);
+
+ bool altHp = (context.Fpcr & FPCR.Ahp) != 0;
+
+ ushort resultBits;
+
+ if (type == FPType.SNaN || type == FPType.QNaN)
+ {
+ if (altHp)
+ {
+ resultBits = SoftFloat16.FPZero(sign);
+ }
+ else if ((context.Fpcr & FPCR.Dn) != 0)
+ {
+ resultBits = SoftFloat16.FPDefaultNaN();
+ }
+ else
+ {
+ resultBits = FPConvertNaN(valueBits);
+ }
+
+ if (type == FPType.SNaN || altHp)
+ {
+ SoftFloat.FPProcessException(FPException.InvalidOp, context);
+ }
+ }
+ else if (type == FPType.Infinity)
+ {
+ if (altHp)
+ {
+ resultBits = (ushort)((sign ? 1u : 0u) << 15 | 0x7FFFu);
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context);
+ }
+ else
+ {
+ resultBits = SoftFloat16.FPInfinity(sign);
+ }
+ }
+ else if (type == FPType.Zero)
+ {
+ resultBits = SoftFloat16.FPZero(sign);
+ }
+ else
+ {
+ resultBits = SoftFloat16.FPRoundCv(real, context);
+ }
+
+ return resultBits;
+ }
+
+ private static double FPUnpackCv(
+ this float value,
+ out FPType type,
+ out bool sign,
+ out uint valueBits,
+ ExecutionContext context)
+ {
+ valueBits = (uint)BitConverter.SingleToInt32Bits(value);
+
+ sign = (~valueBits & 0x80000000u) == 0u;
+
+ uint exp32 = (valueBits & 0x7F800000u) >> 23;
+ uint frac32 = valueBits & 0x007FFFFFu;
+
+ double real;
+
+ if (exp32 == 0u)
+ {
+ if (frac32 == 0u || (context.Fpcr & FPCR.Fz) != 0)
+ {
+ type = FPType.Zero;
+ real = 0d;
+
+ if (frac32 != 0u)
+ {
+ SoftFloat.FPProcessException(FPException.InputDenorm, context);
+ }
+ }
+ else
+ {
+ type = FPType.Nonzero; // Subnormal.
+ real = Math.Pow(2d, -126) * ((double)frac32 * Math.Pow(2d, -23));
+ }
+ }
+ else if (exp32 == 0xFFu)
+ {
+ if (frac32 == 0u)
+ {
+ type = FPType.Infinity;
+ real = Math.Pow(2d, 1000);
+ }
+ else
+ {
+ type = (~frac32 & 0x00400000u) == 0u ? FPType.QNaN : FPType.SNaN;
+ real = 0d;
+ }
+ }
+ else
+ {
+ type = FPType.Nonzero; // Normal.
+ real = Math.Pow(2d, (int)exp32 - 127) * (1d + (double)frac32 * Math.Pow(2d, -23));
+ }
+
+ return sign ? -real : real;
+ }
+
+ private static ushort FPConvertNaN(uint valueBits)
+ {
+ return (ushort)((valueBits & 0x80000000u) >> 16 | 0x7E00u | (valueBits & 0x003FE000u) >> 13);
+ }
+ }
+
+ static class SoftFloat32
+ {
+ public static float FPAdd(float value1, float value2)
+ {
+ return FPAddFpscr(value1, value2, false);
+ }
+
+ public static float FPAddFpscr(float value1, float value2, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr);
+
+ float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if (inf1 && inf2 && sign1 == !sign2)
+ {
+ result = FPDefaultNaN();
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ else if ((inf1 && !sign1) || (inf2 && !sign2))
+ {
+ result = FPInfinity(false);
+ }
+ else if ((inf1 && sign1) || (inf2 && sign2))
+ {
+ result = FPInfinity(true);
+ }
+ else if (zero1 && zero2 && sign1 == sign2)
+ {
+ result = FPZero(sign1);
+ }
+ else
+ {
+ result = value1 + value2;
+
+ if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static int FPCompare(float value1, float value2, bool signalNaNs)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = context.Fpcr;
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out _, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out _, context, fpcr);
+
+ int result;
+
+ if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN)
+ {
+ result = 0b0011;
+
+ if (type1 == FPType.SNaN || type2 == FPType.SNaN || signalNaNs)
+ {
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ }
+ else
+ {
+ if (value1 == value2)
+ {
+ result = 0b0110;
+ }
+ else if (value1 < value2)
+ {
+ result = 0b1000;
+ }
+ else
+ {
+ result = 0b0010;
+ }
+ }
+
+ return result;
+ }
+
+ public static float FPCompareEQ(float value1, float value2)
+ {
+ return FPCompareEQFpscr(value1, value2, false);
+ }
+
+ public static float FPCompareEQFpscr(float value1, float value2, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value1 = value1.FPUnpack(out FPType type1, out _, out _, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out _, out _, context, fpcr);
+
+ float result;
+
+ if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN)
+ {
+ result = ZerosOrOnes(false);
+
+ if (type1 == FPType.SNaN || type2 == FPType.SNaN)
+ {
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ }
+ else
+ {
+ result = ZerosOrOnes(value1 == value2);
+ }
+
+ return result;
+ }
+
+ public static float FPCompareGE(float value1, float value2)
+ {
+ return FPCompareGEFpscr(value1, value2, false);
+ }
+
+ public static float FPCompareGEFpscr(float value1, float value2, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value1 = value1.FPUnpack(out FPType type1, out _, out _, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out _, out _, context, fpcr);
+
+ float result;
+
+ if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN)
+ {
+ result = ZerosOrOnes(false);
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ else
+ {
+ result = ZerosOrOnes(value1 >= value2);
+ }
+
+ return result;
+ }
+
+ public static float FPCompareGT(float value1, float value2)
+ {
+ return FPCompareGTFpscr(value1, value2, false);
+ }
+
+ public static float FPCompareGTFpscr(float value1, float value2, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value1 = value1.FPUnpack(out FPType type1, out _, out _, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out _, out _, context, fpcr);
+
+ float result;
+
+ if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN)
+ {
+ result = ZerosOrOnes(false);
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ else
+ {
+ result = ZerosOrOnes(value1 > value2);
+ }
+
+ return result;
+ }
+
+ public static float FPCompareLE(float value1, float value2)
+ {
+ return FPCompareGE(value2, value1);
+ }
+
+ public static float FPCompareLT(float value1, float value2)
+ {
+ return FPCompareGT(value2, value1);
+ }
+
+ public static float FPCompareLEFpscr(float value1, float value2, bool standardFpscr)
+ {
+ return FPCompareGEFpscr(value2, value1, standardFpscr);
+ }
+
+ public static float FPCompareLTFpscr(float value1, float value2, bool standardFpscr)
+ {
+ return FPCompareGTFpscr(value2, value1, standardFpscr);
+ }
+
+ public static float FPDiv(float value1, float value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = context.Fpcr;
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr);
+
+ float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if ((inf1 && inf2) || (zero1 && zero2))
+ {
+ result = FPDefaultNaN();
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ else if (inf1 || zero2)
+ {
+ result = FPInfinity(sign1 ^ sign2);
+
+ if (!inf1)
+ {
+ SoftFloat.FPProcessException(FPException.DivideByZero, context, fpcr);
+ }
+ }
+ else if (zero1 || inf2)
+ {
+ result = FPZero(sign1 ^ sign2);
+ }
+ else
+ {
+ result = value1 / value2;
+
+ if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static float FPMax(float value1, float value2)
+ {
+ return FPMaxFpscr(value1, value2, false);
+ }
+
+ public static float FPMaxFpscr(float value1, float value2, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr);
+
+ float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ if (value1 > value2)
+ {
+ if (type1 == FPType.Infinity)
+ {
+ result = FPInfinity(sign1);
+ }
+ else if (type1 == FPType.Zero)
+ {
+ result = FPZero(sign1 && sign2);
+ }
+ else
+ {
+ result = value1;
+
+ if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+ }
+ else
+ {
+ if (type2 == FPType.Infinity)
+ {
+ result = FPInfinity(sign2);
+ }
+ else if (type2 == FPType.Zero)
+ {
+ result = FPZero(sign1 && sign2);
+ }
+ else
+ {
+ result = value2;
+
+ if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static float FPMaxNum(float value1, float value2)
+ {
+ return FPMaxNumFpscr(value1, value2, false);
+ }
+
+ public static float FPMaxNumFpscr(float value1, float value2, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value1.FPUnpack(out FPType type1, out _, out _, context, fpcr);
+ value2.FPUnpack(out FPType type2, out _, out _, context, fpcr);
+
+ if (type1 == FPType.QNaN && type2 != FPType.QNaN)
+ {
+ value1 = FPInfinity(true);
+ }
+ else if (type1 != FPType.QNaN && type2 == FPType.QNaN)
+ {
+ value2 = FPInfinity(true);
+ }
+
+ return FPMaxFpscr(value1, value2, standardFpscr);
+ }
+
+ public static float FPMin(float value1, float value2)
+ {
+ return FPMinFpscr(value1, value2, false);
+ }
+
+ public static float FPMinFpscr(float value1, float value2, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr);
+
+ float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ if (value1 < value2)
+ {
+ if (type1 == FPType.Infinity)
+ {
+ result = FPInfinity(sign1);
+ }
+ else if (type1 == FPType.Zero)
+ {
+ result = FPZero(sign1 || sign2);
+ }
+ else
+ {
+ result = value1;
+
+ if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+ }
+ else
+ {
+ if (type2 == FPType.Infinity)
+ {
+ result = FPInfinity(sign2);
+ }
+ else if (type2 == FPType.Zero)
+ {
+ result = FPZero(sign1 || sign2);
+ }
+ else
+ {
+ result = value2;
+
+ if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static float FPMinNum(float value1, float value2)
+ {
+ return FPMinNumFpscr(value1, value2, false);
+ }
+
+ public static float FPMinNumFpscr(float value1, float value2, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value1.FPUnpack(out FPType type1, out _, out _, context, fpcr);
+ value2.FPUnpack(out FPType type2, out _, out _, context, fpcr);
+
+ if (type1 == FPType.QNaN && type2 != FPType.QNaN)
+ {
+ value1 = FPInfinity(false);
+ }
+ else if (type1 != FPType.QNaN && type2 == FPType.QNaN)
+ {
+ value2 = FPInfinity(false);
+ }
+
+ return FPMinFpscr(value1, value2, standardFpscr);
+ }
+
+ public static float FPMul(float value1, float value2)
+ {
+ return FPMulFpscr(value1, value2, false);
+ }
+
+ public static float FPMulFpscr(float value1, float value2, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr);
+
+ float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if ((inf1 && zero2) || (zero1 && inf2))
+ {
+ result = FPDefaultNaN();
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ else if (inf1 || inf2)
+ {
+ result = FPInfinity(sign1 ^ sign2);
+ }
+ else if (zero1 || zero2)
+ {
+ result = FPZero(sign1 ^ sign2);
+ }
+ else
+ {
+ result = value1 * value2;
+
+ if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static float FPMulAdd(float valueA, float value1, float value2)
+ {
+ return FPMulAddFpscr(valueA, value1, value2, false);
+ }
+
+ public static float FPMulAddFpscr(float valueA, float value1, float value2, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ valueA = valueA.FPUnpack(out FPType typeA, out bool signA, out uint addend, context, fpcr);
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr);
+
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ float result = FPProcessNaNs3(typeA, type1, type2, addend, op1, op2, out bool done, context, fpcr);
+
+ if (typeA == FPType.QNaN && ((inf1 && zero2) || (zero1 && inf2)))
+ {
+ result = FPDefaultNaN();
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+
+ if (!done)
+ {
+ bool infA = typeA == FPType.Infinity; bool zeroA = typeA == FPType.Zero;
+
+ bool signP = sign1 ^ sign2;
+ bool infP = inf1 || inf2;
+ bool zeroP = zero1 || zero2;
+
+ if ((inf1 && zero2) || (zero1 && inf2) || (infA && infP && signA != signP))
+ {
+ result = FPDefaultNaN();
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ else if ((infA && !signA) || (infP && !signP))
+ {
+ result = FPInfinity(false);
+ }
+ else if ((infA && signA) || (infP && signP))
+ {
+ result = FPInfinity(true);
+ }
+ else if (zeroA && zeroP && signA == signP)
+ {
+ result = FPZero(signA);
+ }
+ else
+ {
+ result = MathF.FusedMultiplyAdd(value1, value2, valueA);
+
+ if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static float FPMulSub(float valueA, float value1, float value2)
+ {
+ value1 = value1.FPNeg();
+
+ return FPMulAdd(valueA, value1, value2);
+ }
+
+ public static float FPMulSubFpscr(float valueA, float value1, float value2, bool standardFpscr)
+ {
+ value1 = value1.FPNeg();
+
+ return FPMulAddFpscr(valueA, value1, value2, standardFpscr);
+ }
+
+ public static float FPMulX(float value1, float value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = context.Fpcr;
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr);
+
+ float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if ((inf1 && zero2) || (zero1 && inf2))
+ {
+ result = FPTwo(sign1 ^ sign2);
+ }
+ else if (inf1 || inf2)
+ {
+ result = FPInfinity(sign1 ^ sign2);
+ }
+ else if (zero1 || zero2)
+ {
+ result = FPZero(sign1 ^ sign2);
+ }
+ else
+ {
+ result = value1 * value2;
+
+ if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static float FPNegMulAdd(float valueA, float value1, float value2)
+ {
+ valueA = valueA.FPNeg();
+ value1 = value1.FPNeg();
+
+ return FPMulAdd(valueA, value1, value2);
+ }
+
+ public static float FPNegMulSub(float valueA, float value1, float value2)
+ {
+ valueA = valueA.FPNeg();
+
+ return FPMulAdd(valueA, value1, value2);
+ }
+
+ public static float FPRecipEstimate(float value)
+ {
+ return FPRecipEstimateFpscr(value, false);
+ }
+
+ public static float FPRecipEstimateFpscr(float value, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value.FPUnpack(out FPType type, out bool sign, out uint op, context, fpcr);
+
+ float result;
+
+ if (type == FPType.SNaN || type == FPType.QNaN)
+ {
+ result = FPProcessNaN(type, op, context, fpcr);
+ }
+ else if (type == FPType.Infinity)
+ {
+ result = FPZero(sign);
+ }
+ else if (type == FPType.Zero)
+ {
+ result = FPInfinity(sign);
+
+ SoftFloat.FPProcessException(FPException.DivideByZero, context, fpcr);
+ }
+ else if (MathF.Abs(value) < MathF.Pow(2f, -128))
+ {
+ bool overflowToInf;
+
+ switch (fpcr.GetRoundingMode())
+ {
+ default:
+ case FPRoundingMode.ToNearest: overflowToInf = true; break;
+ case FPRoundingMode.TowardsPlusInfinity: overflowToInf = !sign; break;
+ case FPRoundingMode.TowardsMinusInfinity: overflowToInf = sign; break;
+ case FPRoundingMode.TowardsZero: overflowToInf = false; break;
+ }
+
+ result = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign);
+
+ SoftFloat.FPProcessException(FPException.Overflow, context, fpcr);
+ SoftFloat.FPProcessException(FPException.Inexact, context, fpcr);
+ }
+ else if ((fpcr & FPCR.Fz) != 0 && (MathF.Abs(value) >= MathF.Pow(2f, 126)))
+ {
+ result = FPZero(sign);
+
+ context.Fpsr |= FPSR.Ufc;
+ }
+ else
+ {
+ ulong fraction = (ulong)(op & 0x007FFFFFu) << 29;
+ uint exp = (op & 0x7F800000u) >> 23;
+
+ if (exp == 0u)
+ {
+ if ((fraction & 0x0008000000000000ul) == 0ul)
+ {
+ fraction = (fraction & 0x0003FFFFFFFFFFFFul) << 2;
+ exp -= 1u;
+ }
+ else
+ {
+ fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
+ }
+ }
+
+ uint scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44);
+
+ uint resultExp = 253u - exp;
+
+ uint estimate = (uint)SoftFloat.RecipEstimateTable[scaled - 256u] + 256u;
+
+ fraction = (ulong)(estimate & 0xFFu) << 44;
+
+ if (resultExp == 0u)
+ {
+ fraction = ((fraction & 0x000FFFFFFFFFFFFEul) | 0x0010000000000000ul) >> 1;
+ }
+ else if (resultExp + 1u == 0u)
+ {
+ fraction = ((fraction & 0x000FFFFFFFFFFFFCul) | 0x0010000000000000ul) >> 2;
+ resultExp = 0u;
+ }
+
+ result = BitConverter.Int32BitsToSingle(
+ (int)((sign ? 1u : 0u) << 31 | (resultExp & 0xFFu) << 23 | (uint)(fraction >> 29) & 0x007FFFFFu));
+ }
+
+ return result;
+ }
+
+ public static float FPRecipStep(float value1, float value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = context.StandardFpcrValue;
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr);
+
+ float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ float product;
+
+ if ((inf1 && zero2) || (zero1 && inf2))
+ {
+ product = FPZero(false);
+ }
+ else
+ {
+ product = FPMulFpscr(value1, value2, true);
+ }
+
+ result = FPSubFpscr(FPTwo(false), product, true);
+ }
+
+ return result;
+ }
+
+ public static float FPRecipStepFused(float value1, float value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = context.Fpcr;
+
+ value1 = value1.FPNeg();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr);
+
+ float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if ((inf1 && zero2) || (zero1 && inf2))
+ {
+ result = FPTwo(false);
+ }
+ else if (inf1 || inf2)
+ {
+ result = FPInfinity(sign1 ^ sign2);
+ }
+ else
+ {
+ result = MathF.FusedMultiplyAdd(value1, value2, 2f);
+
+ if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static float FPRecpX(float value)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = context.Fpcr;
+
+ value.FPUnpack(out FPType type, out bool sign, out uint op, context, fpcr);
+
+ float result;
+
+ if (type == FPType.SNaN || type == FPType.QNaN)
+ {
+ result = FPProcessNaN(type, op, context, fpcr);
+ }
+ else
+ {
+ uint notExp = (~op >> 23) & 0xFFu;
+ uint maxExp = 0xFEu;
+
+ result = BitConverter.Int32BitsToSingle(
+ (int)((sign ? 1u : 0u) << 31 | (notExp == 0xFFu ? maxExp : notExp) << 23));
+ }
+
+ return result;
+ }
+
+ public static float FPRSqrtEstimate(float value)
+ {
+ return FPRSqrtEstimateFpscr(value, false);
+ }
+
+ public static float FPRSqrtEstimateFpscr(float value, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value.FPUnpack(out FPType type, out bool sign, out uint op, context, fpcr);
+
+ float result;
+
+ if (type == FPType.SNaN || type == FPType.QNaN)
+ {
+ result = FPProcessNaN(type, op, context, fpcr);
+ }
+ else if (type == FPType.Zero)
+ {
+ result = FPInfinity(sign);
+
+ SoftFloat.FPProcessException(FPException.DivideByZero, context, fpcr);
+ }
+ else if (sign)
+ {
+ result = FPDefaultNaN();
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ else if (type == FPType.Infinity)
+ {
+ result = FPZero(false);
+ }
+ else
+ {
+ ulong fraction = (ulong)(op & 0x007FFFFFu) << 29;
+ uint exp = (op & 0x7F800000u) >> 23;
+
+ if (exp == 0u)
+ {
+ while ((fraction & 0x0008000000000000ul) == 0ul)
+ {
+ fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
+ exp -= 1u;
+ }
+
+ fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
+ }
+
+ uint scaled;
+
+ if ((exp & 1u) == 0u)
+ {
+ scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44);
+ }
+ else
+ {
+ scaled = (uint)(((fraction & 0x000FE00000000000ul) | 0x0010000000000000ul) >> 45);
+ }
+
+ uint resultExp = (380u - exp) >> 1;
+
+ uint estimate = (uint)SoftFloat.RecipSqrtEstimateTable[scaled - 128u] + 256u;
+
+ result = BitConverter.Int32BitsToSingle((int)((resultExp & 0xFFu) << 23 | (estimate & 0xFFu) << 15));
+ }
+
+ return result;
+ }
+
+ public static float FPHalvedSub(float value1, float value2, ExecutionContext context, FPCR fpcr)
+ {
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr);
+
+ float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if (inf1 && inf2 && sign1 == sign2)
+ {
+ result = FPDefaultNaN();
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ else if ((inf1 && !sign1) || (inf2 && sign2))
+ {
+ result = FPInfinity(false);
+ }
+ else if ((inf1 && sign1) || (inf2 && !sign2))
+ {
+ result = FPInfinity(true);
+ }
+ else if (zero1 && zero2 && sign1 == !sign2)
+ {
+ result = FPZero(sign1);
+ }
+ else
+ {
+ result = (value1 - value2) / 2.0f;
+
+ if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static float FPRSqrtStep(float value1, float value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = context.StandardFpcrValue;
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr);
+
+ float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ float product;
+
+ if ((inf1 && zero2) || (zero1 && inf2))
+ {
+ product = FPZero(false);
+ }
+ else
+ {
+ product = FPMulFpscr(value1, value2, true);
+ }
+
+ result = FPHalvedSub(FPThree(false), product, context, fpcr);
+ }
+
+ return result;
+ }
+
+ public static float FPRSqrtStepFused(float value1, float value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = context.Fpcr;
+
+ value1 = value1.FPNeg();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr);
+
+ float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if ((inf1 && zero2) || (zero1 && inf2))
+ {
+ result = FPOnePointFive(false);
+ }
+ else if (inf1 || inf2)
+ {
+ result = FPInfinity(sign1 ^ sign2);
+ }
+ else
+ {
+ result = MathF.FusedMultiplyAdd(value1, value2, 3f) / 2f;
+
+ if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static float FPSqrt(float value)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = context.Fpcr;
+
+ value = value.FPUnpack(out FPType type, out bool sign, out uint op, context, fpcr);
+
+ float result;
+
+ if (type == FPType.SNaN || type == FPType.QNaN)
+ {
+ result = FPProcessNaN(type, op, context, fpcr);
+ }
+ else if (type == FPType.Zero)
+ {
+ result = FPZero(sign);
+ }
+ else if (type == FPType.Infinity && !sign)
+ {
+ result = FPInfinity(sign);
+ }
+ else if (sign)
+ {
+ result = FPDefaultNaN();
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ else
+ {
+ result = MathF.Sqrt(value);
+
+ if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+
+ return result;
+ }
+
+ public static float FPSub(float value1, float value2)
+ {
+ return FPSubFpscr(value1, value2, false);
+ }
+
+ public static float FPSubFpscr(float value1, float value2, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr);
+
+ float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if (inf1 && inf2 && sign1 == sign2)
+ {
+ result = FPDefaultNaN();
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ else if ((inf1 && !sign1) || (inf2 && sign2))
+ {
+ result = FPInfinity(false);
+ }
+ else if ((inf1 && sign1) || (inf2 && !sign2))
+ {
+ result = FPInfinity(true);
+ }
+ else if (zero1 && zero2 && sign1 == !sign2)
+ {
+ result = FPZero(sign1);
+ }
+ else
+ {
+ result = value1 - value2;
+
+ if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static float FPDefaultNaN()
+ {
+ return BitConverter.Int32BitsToSingle(0x7fc00000);
+ }
+
+ public static float FPInfinity(bool sign)
+ {
+ return sign ? float.NegativeInfinity : float.PositiveInfinity;
+ }
+
+ public static float FPZero(bool sign)
+ {
+ return sign ? -0f : +0f;
+ }
+
+ public static float FPMaxNormal(bool sign)
+ {
+ return sign ? float.MinValue : float.MaxValue;
+ }
+
+ private static float FPTwo(bool sign)
+ {
+ return sign ? -2f : +2f;
+ }
+
+ private static float FPThree(bool sign)
+ {
+ return sign ? -3f : +3f;
+ }
+
+ private static float FPOnePointFive(bool sign)
+ {
+ return sign ? -1.5f : +1.5f;
+ }
+
+ private static float FPNeg(this float value)
+ {
+ return -value;
+ }
+
+ private static float ZerosOrOnes(bool ones)
+ {
+ return BitConverter.Int32BitsToSingle(ones ? -1 : 0);
+ }
+
+ private static float FPUnpack(
+ this float value,
+ out FPType type,
+ out bool sign,
+ out uint valueBits,
+ ExecutionContext context,
+ FPCR fpcr)
+ {
+ valueBits = (uint)BitConverter.SingleToInt32Bits(value);
+
+ sign = (~valueBits & 0x80000000u) == 0u;
+
+ if ((valueBits & 0x7F800000u) == 0u)
+ {
+ if ((valueBits & 0x007FFFFFu) == 0u || (fpcr & FPCR.Fz) != 0)
+ {
+ type = FPType.Zero;
+ value = FPZero(sign);
+
+ if ((valueBits & 0x007FFFFFu) != 0u)
+ {
+ SoftFloat.FPProcessException(FPException.InputDenorm, context, fpcr);
+ }
+ }
+ else
+ {
+ type = FPType.Nonzero;
+ }
+ }
+ else if ((~valueBits & 0x7F800000u) == 0u)
+ {
+ if ((valueBits & 0x007FFFFFu) == 0u)
+ {
+ type = FPType.Infinity;
+ }
+ else
+ {
+ type = (~valueBits & 0x00400000u) == 0u ? FPType.QNaN : FPType.SNaN;
+ value = FPZero(sign);
+ }
+ }
+ else
+ {
+ type = FPType.Nonzero;
+ }
+
+ return value;
+ }
+
+ private static float FPProcessNaNs(
+ FPType type1,
+ FPType type2,
+ uint op1,
+ uint op2,
+ out bool done,
+ ExecutionContext context,
+ FPCR fpcr)
+ {
+ done = true;
+
+ if (type1 == FPType.SNaN)
+ {
+ return FPProcessNaN(type1, op1, context, fpcr);
+ }
+ else if (type2 == FPType.SNaN)
+ {
+ return FPProcessNaN(type2, op2, context, fpcr);
+ }
+ else if (type1 == FPType.QNaN)
+ {
+ return FPProcessNaN(type1, op1, context, fpcr);
+ }
+ else if (type2 == FPType.QNaN)
+ {
+ return FPProcessNaN(type2, op2, context, fpcr);
+ }
+
+ done = false;
+
+ return FPZero(false);
+ }
+
+ private static float FPProcessNaNs3(
+ FPType type1,
+ FPType type2,
+ FPType type3,
+ uint op1,
+ uint op2,
+ uint op3,
+ out bool done,
+ ExecutionContext context,
+ FPCR fpcr)
+ {
+ done = true;
+
+ if (type1 == FPType.SNaN)
+ {
+ return FPProcessNaN(type1, op1, context, fpcr);
+ }
+ else if (type2 == FPType.SNaN)
+ {
+ return FPProcessNaN(type2, op2, context, fpcr);
+ }
+ else if (type3 == FPType.SNaN)
+ {
+ return FPProcessNaN(type3, op3, context, fpcr);
+ }
+ else if (type1 == FPType.QNaN)
+ {
+ return FPProcessNaN(type1, op1, context, fpcr);
+ }
+ else if (type2 == FPType.QNaN)
+ {
+ return FPProcessNaN(type2, op2, context, fpcr);
+ }
+ else if (type3 == FPType.QNaN)
+ {
+ return FPProcessNaN(type3, op3, context, fpcr);
+ }
+
+ done = false;
+
+ return FPZero(false);
+ }
+
+ private static float FPProcessNaN(FPType type, uint op, ExecutionContext context, FPCR fpcr)
+ {
+ if (type == FPType.SNaN)
+ {
+ op |= 1u << 22;
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+
+ if ((fpcr & FPCR.Dn) != 0)
+ {
+ return FPDefaultNaN();
+ }
+
+ return BitConverter.Int32BitsToSingle((int)op);
+ }
+ }
+
+ static class SoftFloat64_16
+ {
+ public static ushort FPConvert(double value)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ double real = value.FPUnpackCv(out FPType type, out bool sign, out ulong valueBits, context);
+
+ bool altHp = (context.Fpcr & FPCR.Ahp) != 0;
+
+ ushort resultBits;
+
+ if (type == FPType.SNaN || type == FPType.QNaN)
+ {
+ if (altHp)
+ {
+ resultBits = SoftFloat16.FPZero(sign);
+ }
+ else if ((context.Fpcr & FPCR.Dn) != 0)
+ {
+ resultBits = SoftFloat16.FPDefaultNaN();
+ }
+ else
+ {
+ resultBits = FPConvertNaN(valueBits);
+ }
+
+ if (type == FPType.SNaN || altHp)
+ {
+ SoftFloat.FPProcessException(FPException.InvalidOp, context);
+ }
+ }
+ else if (type == FPType.Infinity)
+ {
+ if (altHp)
+ {
+ resultBits = (ushort)((sign ? 1u : 0u) << 15 | 0x7FFFu);
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context);
+ }
+ else
+ {
+ resultBits = SoftFloat16.FPInfinity(sign);
+ }
+ }
+ else if (type == FPType.Zero)
+ {
+ resultBits = SoftFloat16.FPZero(sign);
+ }
+ else
+ {
+ resultBits = SoftFloat16.FPRoundCv(real, context);
+ }
+
+ return resultBits;
+ }
+
+ private static double FPUnpackCv(
+ this double value,
+ out FPType type,
+ out bool sign,
+ out ulong valueBits,
+ ExecutionContext context)
+ {
+ valueBits = (ulong)BitConverter.DoubleToInt64Bits(value);
+
+ sign = (~valueBits & 0x8000000000000000ul) == 0u;
+
+ ulong exp64 = (valueBits & 0x7FF0000000000000ul) >> 52;
+ ulong frac64 = valueBits & 0x000FFFFFFFFFFFFFul;
+
+ double real;
+
+ if (exp64 == 0u)
+ {
+ if (frac64 == 0u || (context.Fpcr & FPCR.Fz) != 0)
+ {
+ type = FPType.Zero;
+ real = 0d;
+
+ if (frac64 != 0u)
+ {
+ SoftFloat.FPProcessException(FPException.InputDenorm, context);
+ }
+ }
+ else
+ {
+ type = FPType.Nonzero; // Subnormal.
+ real = Math.Pow(2d, -1022) * ((double)frac64 * Math.Pow(2d, -52));
+ }
+ }
+ else if (exp64 == 0x7FFul)
+ {
+ if (frac64 == 0u)
+ {
+ type = FPType.Infinity;
+ real = Math.Pow(2d, 1000000);
+ }
+ else
+ {
+ type = (~frac64 & 0x0008000000000000ul) == 0u ? FPType.QNaN : FPType.SNaN;
+ real = 0d;
+ }
+ }
+ else
+ {
+ type = FPType.Nonzero; // Normal.
+ real = Math.Pow(2d, (int)exp64 - 1023) * (1d + (double)frac64 * Math.Pow(2d, -52));
+ }
+
+ return sign ? -real : real;
+ }
+
+ private static ushort FPConvertNaN(ulong valueBits)
+ {
+ return (ushort)((valueBits & 0x8000000000000000ul) >> 48 | 0x7E00u | (valueBits & 0x0007FC0000000000ul) >> 42);
+ }
+ }
+
+ static class SoftFloat64
+ {
+ public static double FPAdd(double value1, double value2)
+ {
+ return FPAddFpscr(value1, value2, false);
+ }
+
+ public static double FPAddFpscr(double value1, double value2, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr);
+
+ double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if (inf1 && inf2 && sign1 == !sign2)
+ {
+ result = FPDefaultNaN();
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ else if ((inf1 && !sign1) || (inf2 && !sign2))
+ {
+ result = FPInfinity(false);
+ }
+ else if ((inf1 && sign1) || (inf2 && sign2))
+ {
+ result = FPInfinity(true);
+ }
+ else if (zero1 && zero2 && sign1 == sign2)
+ {
+ result = FPZero(sign1);
+ }
+ else
+ {
+ result = value1 + value2;
+
+ if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static int FPCompare(double value1, double value2, bool signalNaNs)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = context.Fpcr;
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out _, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out _, context, fpcr);
+
+ int result;
+
+ if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN)
+ {
+ result = 0b0011;
+
+ if (type1 == FPType.SNaN || type2 == FPType.SNaN || signalNaNs)
+ {
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ }
+ else
+ {
+ if (value1 == value2)
+ {
+ result = 0b0110;
+ }
+ else if (value1 < value2)
+ {
+ result = 0b1000;
+ }
+ else
+ {
+ result = 0b0010;
+ }
+ }
+
+ return result;
+ }
+
+ public static double FPCompareEQ(double value1, double value2)
+ {
+ return FPCompareEQFpscr(value1, value2, false);
+ }
+
+ public static double FPCompareEQFpscr(double value1, double value2, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value1 = value1.FPUnpack(out FPType type1, out _, out _, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out _, out _, context, fpcr);
+
+ double result;
+
+ if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN)
+ {
+ result = ZerosOrOnes(false);
+
+ if (type1 == FPType.SNaN || type2 == FPType.SNaN)
+ {
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ }
+ else
+ {
+ result = ZerosOrOnes(value1 == value2);
+ }
+
+ return result;
+ }
+
+ public static double FPCompareGE(double value1, double value2)
+ {
+ return FPCompareGEFpscr(value1, value2, false);
+ }
+
+ public static double FPCompareGEFpscr(double value1, double value2, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value1 = value1.FPUnpack(out FPType type1, out _, out _, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out _, out _, context, fpcr);
+
+ double result;
+
+ if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN)
+ {
+ result = ZerosOrOnes(false);
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ else
+ {
+ result = ZerosOrOnes(value1 >= value2);
+ }
+
+ return result;
+ }
+
+ public static double FPCompareGT(double value1, double value2)
+ {
+ return FPCompareGTFpscr(value1, value2, false);
+ }
+
+ public static double FPCompareGTFpscr(double value1, double value2, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value1 = value1.FPUnpack(out FPType type1, out _, out _, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out _, out _, context, fpcr);
+
+ double result;
+
+ if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN)
+ {
+ result = ZerosOrOnes(false);
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ else
+ {
+ result = ZerosOrOnes(value1 > value2);
+ }
+
+ return result;
+ }
+
+ public static double FPCompareLE(double value1, double value2)
+ {
+ return FPCompareGE(value2, value1);
+ }
+
+ public static double FPCompareLT(double value1, double value2)
+ {
+ return FPCompareGT(value2, value1);
+ }
+
+ public static double FPCompareLEFpscr(double value1, double value2, bool standardFpscr)
+ {
+ return FPCompareGEFpscr(value2, value1, standardFpscr);
+ }
+
+ public static double FPCompareLTFpscr(double value1, double value2, bool standardFpscr)
+ {
+ return FPCompareGTFpscr(value2, value1, standardFpscr);
+ }
+
+ public static double FPDiv(double value1, double value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = context.Fpcr;
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr);
+
+ double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if ((inf1 && inf2) || (zero1 && zero2))
+ {
+ result = FPDefaultNaN();
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ else if (inf1 || zero2)
+ {
+ result = FPInfinity(sign1 ^ sign2);
+
+ if (!inf1)
+ {
+ SoftFloat.FPProcessException(FPException.DivideByZero, context, fpcr);
+ }
+ }
+ else if (zero1 || inf2)
+ {
+ result = FPZero(sign1 ^ sign2);
+ }
+ else
+ {
+ result = value1 / value2;
+
+ if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static double FPMax(double value1, double value2)
+ {
+ return FPMaxFpscr(value1, value2, false);
+ }
+
+ public static double FPMaxFpscr(double value1, double value2, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr);
+
+ double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ if (value1 > value2)
+ {
+ if (type1 == FPType.Infinity)
+ {
+ result = FPInfinity(sign1);
+ }
+ else if (type1 == FPType.Zero)
+ {
+ result = FPZero(sign1 && sign2);
+ }
+ else
+ {
+ result = value1;
+
+ if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+ }
+ else
+ {
+ if (type2 == FPType.Infinity)
+ {
+ result = FPInfinity(sign2);
+ }
+ else if (type2 == FPType.Zero)
+ {
+ result = FPZero(sign1 && sign2);
+ }
+ else
+ {
+ result = value2;
+
+ if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static double FPMaxNum(double value1, double value2)
+ {
+ return FPMaxNumFpscr(value1, value2, false);
+ }
+
+ public static double FPMaxNumFpscr(double value1, double value2, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value1.FPUnpack(out FPType type1, out _, out _, context, fpcr);
+ value2.FPUnpack(out FPType type2, out _, out _, context, fpcr);
+
+ if (type1 == FPType.QNaN && type2 != FPType.QNaN)
+ {
+ value1 = FPInfinity(true);
+ }
+ else if (type1 != FPType.QNaN && type2 == FPType.QNaN)
+ {
+ value2 = FPInfinity(true);
+ }
+
+ return FPMaxFpscr(value1, value2, standardFpscr);
+ }
+
+ public static double FPMin(double value1, double value2)
+ {
+ return FPMinFpscr(value1, value2, false);
+ }
+
+ public static double FPMinFpscr(double value1, double value2, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr);
+
+ double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ if (value1 < value2)
+ {
+ if (type1 == FPType.Infinity)
+ {
+ result = FPInfinity(sign1);
+ }
+ else if (type1 == FPType.Zero)
+ {
+ result = FPZero(sign1 || sign2);
+ }
+ else
+ {
+ result = value1;
+
+ if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+ }
+ else
+ {
+ if (type2 == FPType.Infinity)
+ {
+ result = FPInfinity(sign2);
+ }
+ else if (type2 == FPType.Zero)
+ {
+ result = FPZero(sign1 || sign2);
+ }
+ else
+ {
+ result = value2;
+
+ if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static double FPMinNum(double value1, double value2)
+ {
+ return FPMinNumFpscr(value1, value2, false);
+ }
+
+ public static double FPMinNumFpscr(double value1, double value2, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value1.FPUnpack(out FPType type1, out _, out _, context, fpcr);
+ value2.FPUnpack(out FPType type2, out _, out _, context, fpcr);
+
+ if (type1 == FPType.QNaN && type2 != FPType.QNaN)
+ {
+ value1 = FPInfinity(false);
+ }
+ else if (type1 != FPType.QNaN && type2 == FPType.QNaN)
+ {
+ value2 = FPInfinity(false);
+ }
+
+ return FPMinFpscr(value1, value2, standardFpscr);
+ }
+
+ public static double FPMul(double value1, double value2)
+ {
+ return FPMulFpscr(value1, value2, false);
+ }
+
+ public static double FPMulFpscr(double value1, double value2, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr);
+
+ double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if ((inf1 && zero2) || (zero1 && inf2))
+ {
+ result = FPDefaultNaN();
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ else if (inf1 || inf2)
+ {
+ result = FPInfinity(sign1 ^ sign2);
+ }
+ else if (zero1 || zero2)
+ {
+ result = FPZero(sign1 ^ sign2);
+ }
+ else
+ {
+ result = value1 * value2;
+
+ if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static double FPMulAdd(double valueA, double value1, double value2)
+ {
+ return FPMulAddFpscr(valueA, value1, value2, false);
+ }
+
+ public static double FPMulAddFpscr(double valueA, double value1, double value2, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ valueA = valueA.FPUnpack(out FPType typeA, out bool signA, out ulong addend, context, fpcr);
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr);
+
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ double result = FPProcessNaNs3(typeA, type1, type2, addend, op1, op2, out bool done, context, fpcr);
+
+ if (typeA == FPType.QNaN && ((inf1 && zero2) || (zero1 && inf2)))
+ {
+ result = FPDefaultNaN();
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+
+ if (!done)
+ {
+ bool infA = typeA == FPType.Infinity; bool zeroA = typeA == FPType.Zero;
+
+ bool signP = sign1 ^ sign2;
+ bool infP = inf1 || inf2;
+ bool zeroP = zero1 || zero2;
+
+ if ((inf1 && zero2) || (zero1 && inf2) || (infA && infP && signA != signP))
+ {
+ result = FPDefaultNaN();
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ else if ((infA && !signA) || (infP && !signP))
+ {
+ result = FPInfinity(false);
+ }
+ else if ((infA && signA) || (infP && signP))
+ {
+ result = FPInfinity(true);
+ }
+ else if (zeroA && zeroP && signA == signP)
+ {
+ result = FPZero(signA);
+ }
+ else
+ {
+ result = Math.FusedMultiplyAdd(value1, value2, valueA);
+
+ if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static double FPMulSub(double valueA, double value1, double value2)
+ {
+ value1 = value1.FPNeg();
+
+ return FPMulAdd(valueA, value1, value2);
+ }
+
+ public static double FPMulSubFpscr(double valueA, double value1, double value2, bool standardFpscr)
+ {
+ value1 = value1.FPNeg();
+
+ return FPMulAddFpscr(valueA, value1, value2, standardFpscr);
+ }
+
+ public static double FPMulX(double value1, double value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = context.Fpcr;
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr);
+
+ double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if ((inf1 && zero2) || (zero1 && inf2))
+ {
+ result = FPTwo(sign1 ^ sign2);
+ }
+ else if (inf1 || inf2)
+ {
+ result = FPInfinity(sign1 ^ sign2);
+ }
+ else if (zero1 || zero2)
+ {
+ result = FPZero(sign1 ^ sign2);
+ }
+ else
+ {
+ result = value1 * value2;
+
+ if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static double FPNegMulAdd(double valueA, double value1, double value2)
+ {
+ valueA = valueA.FPNeg();
+ value1 = value1.FPNeg();
+
+ return FPMulAdd(valueA, value1, value2);
+ }
+
+ public static double FPNegMulSub(double valueA, double value1, double value2)
+ {
+ valueA = valueA.FPNeg();
+
+ return FPMulAdd(valueA, value1, value2);
+ }
+
+ public static double FPRecipEstimate(double value)
+ {
+ return FPRecipEstimateFpscr(value, false);
+ }
+
+ public static double FPRecipEstimateFpscr(double value, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value.FPUnpack(out FPType type, out bool sign, out ulong op, context, fpcr);
+
+ double result;
+
+ if (type == FPType.SNaN || type == FPType.QNaN)
+ {
+ result = FPProcessNaN(type, op, context, fpcr);
+ }
+ else if (type == FPType.Infinity)
+ {
+ result = FPZero(sign);
+ }
+ else if (type == FPType.Zero)
+ {
+ result = FPInfinity(sign);
+
+ SoftFloat.FPProcessException(FPException.DivideByZero, context, fpcr);
+ }
+ else if (Math.Abs(value) < Math.Pow(2d, -1024))
+ {
+ bool overflowToInf;
+
+ switch (fpcr.GetRoundingMode())
+ {
+ default:
+ case FPRoundingMode.ToNearest: overflowToInf = true; break;
+ case FPRoundingMode.TowardsPlusInfinity: overflowToInf = !sign; break;
+ case FPRoundingMode.TowardsMinusInfinity: overflowToInf = sign; break;
+ case FPRoundingMode.TowardsZero: overflowToInf = false; break;
+ }
+
+ result = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign);
+
+ SoftFloat.FPProcessException(FPException.Overflow, context, fpcr);
+ SoftFloat.FPProcessException(FPException.Inexact, context, fpcr);
+ }
+ else if ((fpcr & FPCR.Fz) != 0 && (Math.Abs(value) >= Math.Pow(2d, 1022)))
+ {
+ result = FPZero(sign);
+
+ context.Fpsr |= FPSR.Ufc;
+ }
+ else
+ {
+ ulong fraction = op & 0x000FFFFFFFFFFFFFul;
+ uint exp = (uint)((op & 0x7FF0000000000000ul) >> 52);
+
+ if (exp == 0u)
+ {
+ if ((fraction & 0x0008000000000000ul) == 0ul)
+ {
+ fraction = (fraction & 0x0003FFFFFFFFFFFFul) << 2;
+ exp -= 1u;
+ }
+ else
+ {
+ fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
+ }
+ }
+
+ uint scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44);
+
+ uint resultExp = 2045u - exp;
+
+ uint estimate = (uint)SoftFloat.RecipEstimateTable[scaled - 256u] + 256u;
+
+ fraction = (ulong)(estimate & 0xFFu) << 44;
+
+ if (resultExp == 0u)
+ {
+ fraction = ((fraction & 0x000FFFFFFFFFFFFEul) | 0x0010000000000000ul) >> 1;
+ }
+ else if (resultExp + 1u == 0u)
+ {
+ fraction = ((fraction & 0x000FFFFFFFFFFFFCul) | 0x0010000000000000ul) >> 2;
+ resultExp = 0u;
+ }
+
+ result = BitConverter.Int64BitsToDouble(
+ (long)((sign ? 1ul : 0ul) << 63 | (resultExp & 0x7FFul) << 52 | (fraction & 0x000FFFFFFFFFFFFFul)));
+ }
+
+ return result;
+ }
+
+ public static double FPRecipStep(double value1, double value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = context.StandardFpcrValue;
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr);
+
+ double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ double product;
+
+ if ((inf1 && zero2) || (zero1 && inf2))
+ {
+ product = FPZero(false);
+ }
+ else
+ {
+ product = FPMulFpscr(value1, value2, true);
+ }
+
+ result = FPSubFpscr(FPTwo(false), product, true);
+ }
+
+ return result;
+ }
+
+ public static double FPRecipStepFused(double value1, double value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = context.Fpcr;
+
+ value1 = value1.FPNeg();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr);
+
+ double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if ((inf1 && zero2) || (zero1 && inf2))
+ {
+ result = FPTwo(false);
+ }
+ else if (inf1 || inf2)
+ {
+ result = FPInfinity(sign1 ^ sign2);
+ }
+ else
+ {
+ result = Math.FusedMultiplyAdd(value1, value2, 2d);
+
+ if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static double FPRecpX(double value)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = context.Fpcr;
+
+ value.FPUnpack(out FPType type, out bool sign, out ulong op, context, fpcr);
+
+ double result;
+
+ if (type == FPType.SNaN || type == FPType.QNaN)
+ {
+ result = FPProcessNaN(type, op, context, fpcr);
+ }
+ else
+ {
+ ulong notExp = (~op >> 52) & 0x7FFul;
+ ulong maxExp = 0x7FEul;
+
+ result = BitConverter.Int64BitsToDouble(
+ (long)((sign ? 1ul : 0ul) << 63 | (notExp == 0x7FFul ? maxExp : notExp) << 52));
+ }
+
+ return result;
+ }
+
+ public static double FPRSqrtEstimate(double value)
+ {
+ return FPRSqrtEstimateFpscr(value, false);
+ }
+
+ public static double FPRSqrtEstimateFpscr(double value, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value.FPUnpack(out FPType type, out bool sign, out ulong op, context, fpcr);
+
+ double result;
+
+ if (type == FPType.SNaN || type == FPType.QNaN)
+ {
+ result = FPProcessNaN(type, op, context, fpcr);
+ }
+ else if (type == FPType.Zero)
+ {
+ result = FPInfinity(sign);
+
+ SoftFloat.FPProcessException(FPException.DivideByZero, context, fpcr);
+ }
+ else if (sign)
+ {
+ result = FPDefaultNaN();
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ else if (type == FPType.Infinity)
+ {
+ result = FPZero(false);
+ }
+ else
+ {
+ ulong fraction = op & 0x000FFFFFFFFFFFFFul;
+ uint exp = (uint)((op & 0x7FF0000000000000ul) >> 52);
+
+ if (exp == 0u)
+ {
+ while ((fraction & 0x0008000000000000ul) == 0ul)
+ {
+ fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
+ exp -= 1u;
+ }
+
+ fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
+ }
+
+ uint scaled;
+
+ if ((exp & 1u) == 0u)
+ {
+ scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44);
+ }
+ else
+ {
+ scaled = (uint)(((fraction & 0x000FE00000000000ul) | 0x0010000000000000ul) >> 45);
+ }
+
+ uint resultExp = (3068u - exp) >> 1;
+
+ uint estimate = (uint)SoftFloat.RecipSqrtEstimateTable[scaled - 128u] + 256u;
+
+ result = BitConverter.Int64BitsToDouble((long)((resultExp & 0x7FFul) << 52 | (estimate & 0xFFul) << 44));
+ }
+
+ return result;
+ }
+
+ public static double FPHalvedSub(double value1, double value2, ExecutionContext context, FPCR fpcr)
+ {
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr);
+
+ double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if (inf1 && inf2 && sign1 == sign2)
+ {
+ result = FPDefaultNaN();
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ else if ((inf1 && !sign1) || (inf2 && sign2))
+ {
+ result = FPInfinity(false);
+ }
+ else if ((inf1 && sign1) || (inf2 && !sign2))
+ {
+ result = FPInfinity(true);
+ }
+ else if (zero1 && zero2 && sign1 == !sign2)
+ {
+ result = FPZero(sign1);
+ }
+ else
+ {
+ result = (value1 - value2) / 2.0;
+
+ if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static double FPRSqrtStep(double value1, double value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = context.StandardFpcrValue;
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr);
+
+ double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ double product;
+
+ if ((inf1 && zero2) || (zero1 && inf2))
+ {
+ product = FPZero(false);
+ }
+ else
+ {
+ product = FPMulFpscr(value1, value2, true);
+ }
+
+ result = FPHalvedSub(FPThree(false), product, context, fpcr);
+ }
+
+ return result;
+ }
+
+ public static double FPRSqrtStepFused(double value1, double value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = context.Fpcr;
+
+ value1 = value1.FPNeg();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr);
+
+ double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if ((inf1 && zero2) || (zero1 && inf2))
+ {
+ result = FPOnePointFive(false);
+ }
+ else if (inf1 || inf2)
+ {
+ result = FPInfinity(sign1 ^ sign2);
+ }
+ else
+ {
+ result = Math.FusedMultiplyAdd(value1, value2, 3d) / 2d;
+
+ if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static double FPSqrt(double value)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = context.Fpcr;
+
+ value = value.FPUnpack(out FPType type, out bool sign, out ulong op, context, fpcr);
+
+ double result;
+
+ if (type == FPType.SNaN || type == FPType.QNaN)
+ {
+ result = FPProcessNaN(type, op, context, fpcr);
+ }
+ else if (type == FPType.Zero)
+ {
+ result = FPZero(sign);
+ }
+ else if (type == FPType.Infinity && !sign)
+ {
+ result = FPInfinity(sign);
+ }
+ else if (sign)
+ {
+ result = FPDefaultNaN();
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ else
+ {
+ result = Math.Sqrt(value);
+
+ if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+
+ return result;
+ }
+
+ public static double FPSub(double value1, double value2)
+ {
+ return FPSubFpscr(value1, value2, false);
+ }
+
+ public static double FPSubFpscr(double value1, double value2, bool standardFpscr)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+ FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr;
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr);
+
+ double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if (inf1 && inf2 && sign1 == sign2)
+ {
+ result = FPDefaultNaN();
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+ else if ((inf1 && !sign1) || (inf2 && sign2))
+ {
+ result = FPInfinity(false);
+ }
+ else if ((inf1 && sign1) || (inf2 && !sign2))
+ {
+ result = FPInfinity(true);
+ }
+ else if (zero1 && zero2 && sign1 == !sign2)
+ {
+ result = FPZero(sign1);
+ }
+ else
+ {
+ result = value1 - value2;
+
+ if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static double FPDefaultNaN()
+ {
+ return BitConverter.Int64BitsToDouble(0x7ff8000000000000);
+ }
+
+ public static double FPInfinity(bool sign)
+ {
+ return sign ? double.NegativeInfinity : double.PositiveInfinity;
+ }
+
+ public static double FPZero(bool sign)
+ {
+ return sign ? -0d : +0d;
+ }
+
+ public static double FPMaxNormal(bool sign)
+ {
+ return sign ? double.MinValue : double.MaxValue;
+ }
+
+ private static double FPTwo(bool sign)
+ {
+ return sign ? -2d : +2d;
+ }
+
+ private static double FPThree(bool sign)
+ {
+ return sign ? -3d : +3d;
+ }
+
+ private static double FPOnePointFive(bool sign)
+ {
+ return sign ? -1.5d : +1.5d;
+ }
+
+ private static double FPNeg(this double value)
+ {
+ return -value;
+ }
+
+ private static double ZerosOrOnes(bool ones)
+ {
+ return BitConverter.Int64BitsToDouble(ones ? -1L : 0L);
+ }
+
+ private static double FPUnpack(
+ this double value,
+ out FPType type,
+ out bool sign,
+ out ulong valueBits,
+ ExecutionContext context,
+ FPCR fpcr)
+ {
+ valueBits = (ulong)BitConverter.DoubleToInt64Bits(value);
+
+ sign = (~valueBits & 0x8000000000000000ul) == 0ul;
+
+ if ((valueBits & 0x7FF0000000000000ul) == 0ul)
+ {
+ if ((valueBits & 0x000FFFFFFFFFFFFFul) == 0ul || (fpcr & FPCR.Fz) != 0)
+ {
+ type = FPType.Zero;
+ value = FPZero(sign);
+
+ if ((valueBits & 0x000FFFFFFFFFFFFFul) != 0ul)
+ {
+ SoftFloat.FPProcessException(FPException.InputDenorm, context, fpcr);
+ }
+ }
+ else
+ {
+ type = FPType.Nonzero;
+ }
+ }
+ else if ((~valueBits & 0x7FF0000000000000ul) == 0ul)
+ {
+ if ((valueBits & 0x000FFFFFFFFFFFFFul) == 0ul)
+ {
+ type = FPType.Infinity;
+ }
+ else
+ {
+ type = (~valueBits & 0x0008000000000000ul) == 0ul ? FPType.QNaN : FPType.SNaN;
+ value = FPZero(sign);
+ }
+ }
+ else
+ {
+ type = FPType.Nonzero;
+ }
+
+ return value;
+ }
+
+ private static double FPProcessNaNs(
+ FPType type1,
+ FPType type2,
+ ulong op1,
+ ulong op2,
+ out bool done,
+ ExecutionContext context,
+ FPCR fpcr)
+ {
+ done = true;
+
+ if (type1 == FPType.SNaN)
+ {
+ return FPProcessNaN(type1, op1, context, fpcr);
+ }
+ else if (type2 == FPType.SNaN)
+ {
+ return FPProcessNaN(type2, op2, context, fpcr);
+ }
+ else if (type1 == FPType.QNaN)
+ {
+ return FPProcessNaN(type1, op1, context, fpcr);
+ }
+ else if (type2 == FPType.QNaN)
+ {
+ return FPProcessNaN(type2, op2, context, fpcr);
+ }
+
+ done = false;
+
+ return FPZero(false);
+ }
+
+ private static double FPProcessNaNs3(
+ FPType type1,
+ FPType type2,
+ FPType type3,
+ ulong op1,
+ ulong op2,
+ ulong op3,
+ out bool done,
+ ExecutionContext context,
+ FPCR fpcr)
+ {
+ done = true;
+
+ if (type1 == FPType.SNaN)
+ {
+ return FPProcessNaN(type1, op1, context, fpcr);
+ }
+ else if (type2 == FPType.SNaN)
+ {
+ return FPProcessNaN(type2, op2, context, fpcr);
+ }
+ else if (type3 == FPType.SNaN)
+ {
+ return FPProcessNaN(type3, op3, context, fpcr);
+ }
+ else if (type1 == FPType.QNaN)
+ {
+ return FPProcessNaN(type1, op1, context, fpcr);
+ }
+ else if (type2 == FPType.QNaN)
+ {
+ return FPProcessNaN(type2, op2, context, fpcr);
+ }
+ else if (type3 == FPType.QNaN)
+ {
+ return FPProcessNaN(type3, op3, context, fpcr);
+ }
+
+ done = false;
+
+ return FPZero(false);
+ }
+
+ private static double FPProcessNaN(FPType type, ulong op, ExecutionContext context, FPCR fpcr)
+ {
+ if (type == FPType.SNaN)
+ {
+ op |= 1ul << 51;
+
+ SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr);
+ }
+
+ if ((fpcr & FPCR.Dn) != 0)
+ {
+ return FPDefaultNaN();
+ }
+
+ return BitConverter.Int64BitsToDouble((long)op);
+ }
+ }
+}
diff --git a/src/ARMeilleure/IntermediateRepresentation/BasicBlock.cs b/src/ARMeilleure/IntermediateRepresentation/BasicBlock.cs
new file mode 100644
index 00000000..07bd8b67
--- /dev/null
+++ b/src/ARMeilleure/IntermediateRepresentation/BasicBlock.cs
@@ -0,0 +1,159 @@
+using System;
+using System.Collections.Generic;
+using System.Runtime.CompilerServices;
+
+namespace ARMeilleure.IntermediateRepresentation
+{
+ class BasicBlock : IEquatable<BasicBlock>, IIntrusiveListNode<BasicBlock>
+ {
+ private const uint MaxSuccessors = 2;
+
+ private int _succCount;
+ private BasicBlock _succ0;
+ private BasicBlock _succ1;
+ private HashSet<BasicBlock> _domFrontiers;
+
+ public int Index { get; set; }
+ public BasicBlockFrequency Frequency { get; set; }
+ public BasicBlock ListPrevious { get; set; }
+ public BasicBlock ListNext { get; set; }
+ public IntrusiveList<Operation> Operations { get; }
+ public List<BasicBlock> Predecessors { get; }
+ public BasicBlock ImmediateDominator { get; set; }
+
+ public int SuccessorsCount => _succCount;
+
+ public HashSet<BasicBlock> DominanceFrontiers
+ {
+ get
+ {
+ if (_domFrontiers == null)
+ {
+ _domFrontiers = new HashSet<BasicBlock>();
+ }
+
+ return _domFrontiers;
+ }
+ }
+
+ public BasicBlock() : this(index: -1) { }
+
+ public BasicBlock(int index)
+ {
+ Operations = new IntrusiveList<Operation>();
+ Predecessors = new List<BasicBlock>();
+
+ Index = index;
+ }
+
+ public void AddSuccessor(BasicBlock block)
+ {
+ ArgumentNullException.ThrowIfNull(block);
+
+ if ((uint)_succCount + 1 > MaxSuccessors)
+ {
+ ThrowSuccessorOverflow();
+ }
+
+ block.Predecessors.Add(this);
+
+ GetSuccessorUnsafe(_succCount++) = block;
+ }
+
+ public void RemoveSuccessor(int index)
+ {
+ if ((uint)index >= (uint)_succCount)
+ {
+ ThrowOutOfRange(nameof(index));
+ }
+
+ ref BasicBlock oldBlock = ref GetSuccessorUnsafe(index);
+
+ oldBlock.Predecessors.Remove(this);
+ oldBlock = null;
+
+ if (index == 0)
+ {
+ _succ0 = _succ1;
+ }
+
+ _succCount--;
+ }
+
+ public BasicBlock GetSuccessor(int index)
+ {
+ if ((uint)index >= (uint)_succCount)
+ {
+ ThrowOutOfRange(nameof(index));
+ }
+
+ return GetSuccessorUnsafe(index);
+ }
+
+ private ref BasicBlock GetSuccessorUnsafe(int index)
+ {
+ return ref Unsafe.Add(ref _succ0, index);
+ }
+
+ public void SetSuccessor(int index, BasicBlock block)
+ {
+ ArgumentNullException.ThrowIfNull(block);
+
+ if ((uint)index >= (uint)_succCount)
+ {
+ ThrowOutOfRange(nameof(index));
+ }
+
+ ref BasicBlock oldBlock = ref GetSuccessorUnsafe(index);
+
+ oldBlock.Predecessors.Remove(this);
+ block.Predecessors.Add(this);
+
+ oldBlock = block;
+ }
+
+ public void Append(Operation node)
+ {
+ Operation last = Operations.Last;
+
+ // Append node before terminal or to end if no terminal.
+ if (last == default)
+ {
+ Operations.AddLast(node);
+
+ return;
+ }
+
+ switch (last.Instruction)
+ {
+ case Instruction.Return:
+ case Instruction.Tailcall:
+ case Instruction.BranchIf:
+ Operations.AddBefore(last, node);
+ break;
+
+ default:
+ Operations.AddLast(node);
+ break;
+ }
+ }
+
+ private static void ThrowOutOfRange(string name) => throw new ArgumentOutOfRangeException(name);
+ private static void ThrowSuccessorOverflow() => throw new OverflowException($"BasicBlock can only have {MaxSuccessors} successors.");
+
+ public bool Equals(BasicBlock other)
+ {
+ return other == this;
+ }
+
+ public override bool Equals(object obj)
+ {
+ return Equals(obj as BasicBlock);
+ }
+
+ public override int GetHashCode()
+ {
+ return base.GetHashCode();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/IntermediateRepresentation/BasicBlockFrequency.cs b/src/ARMeilleure/IntermediateRepresentation/BasicBlockFrequency.cs
new file mode 100644
index 00000000..96cfee35
--- /dev/null
+++ b/src/ARMeilleure/IntermediateRepresentation/BasicBlockFrequency.cs
@@ -0,0 +1,8 @@
+namespace ARMeilleure.IntermediateRepresentation
+{
+ enum BasicBlockFrequency
+ {
+ Default,
+ Cold
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/IntermediateRepresentation/Comparison.cs b/src/ARMeilleure/IntermediateRepresentation/Comparison.cs
new file mode 100644
index 00000000..628ce105
--- /dev/null
+++ b/src/ARMeilleure/IntermediateRepresentation/Comparison.cs
@@ -0,0 +1,24 @@
+namespace ARMeilleure.IntermediateRepresentation
+{
+ enum Comparison
+ {
+ Equal = 0,
+ NotEqual = 1,
+ Greater = 2,
+ LessOrEqual = 3,
+ GreaterUI = 4,
+ LessOrEqualUI = 5,
+ GreaterOrEqual = 6,
+ Less = 7,
+ GreaterOrEqualUI = 8,
+ LessUI = 9
+ }
+
+ static class ComparisonExtensions
+ {
+ public static Comparison Invert(this Comparison comp)
+ {
+ return (Comparison)((int)comp ^ 1);
+ }
+ }
+}
diff --git a/src/ARMeilleure/IntermediateRepresentation/IIntrusiveListNode.cs b/src/ARMeilleure/IntermediateRepresentation/IIntrusiveListNode.cs
new file mode 100644
index 00000000..caa9b83f
--- /dev/null
+++ b/src/ARMeilleure/IntermediateRepresentation/IIntrusiveListNode.cs
@@ -0,0 +1,8 @@
+namespace ARMeilleure.IntermediateRepresentation
+{
+ interface IIntrusiveListNode<T>
+ {
+ T ListPrevious { get; set; }
+ T ListNext { get; set; }
+ }
+}
diff --git a/src/ARMeilleure/IntermediateRepresentation/Instruction.cs b/src/ARMeilleure/IntermediateRepresentation/Instruction.cs
new file mode 100644
index 00000000..b55fe1da
--- /dev/null
+++ b/src/ARMeilleure/IntermediateRepresentation/Instruction.cs
@@ -0,0 +1,72 @@
+namespace ARMeilleure.IntermediateRepresentation
+{
+ enum Instruction : ushort
+ {
+ Add,
+ BitwiseAnd,
+ BitwiseExclusiveOr,
+ BitwiseNot,
+ BitwiseOr,
+ BranchIf,
+ ByteSwap,
+ Call,
+ Compare,
+ CompareAndSwap,
+ CompareAndSwap16,
+ CompareAndSwap8,
+ ConditionalSelect,
+ ConvertI64ToI32,
+ ConvertToFP,
+ ConvertToFPUI,
+ Copy,
+ CountLeadingZeros,
+ Divide,
+ DivideUI,
+ Load,
+ Load16,
+ Load8,
+ LoadArgument,
+ MemoryBarrier,
+ Multiply,
+ Multiply64HighSI,
+ Multiply64HighUI,
+ Negate,
+ Return,
+ RotateRight,
+ ShiftLeft,
+ ShiftRightSI,
+ ShiftRightUI,
+ SignExtend16,
+ SignExtend32,
+ SignExtend8,
+ StackAlloc,
+ Store,
+ Store16,
+ Store8,
+ Subtract,
+ Tailcall,
+ VectorCreateScalar,
+ VectorExtract,
+ VectorExtract16,
+ VectorExtract8,
+ VectorInsert,
+ VectorInsert16,
+ VectorInsert8,
+ VectorOne,
+ VectorZero,
+ VectorZeroUpper64,
+ VectorZeroUpper96,
+ ZeroExtend16,
+ ZeroExtend32,
+ ZeroExtend8,
+
+ Clobber,
+ Extended,
+ Fill,
+ LoadFromContext,
+ Phi,
+ Spill,
+ SpillArg,
+ StoreToContext
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/IntermediateRepresentation/Intrinsic.cs b/src/ARMeilleure/IntermediateRepresentation/Intrinsic.cs
new file mode 100644
index 00000000..f5a776fa
--- /dev/null
+++ b/src/ARMeilleure/IntermediateRepresentation/Intrinsic.cs
@@ -0,0 +1,636 @@
+namespace ARMeilleure.IntermediateRepresentation
+{
+ enum Intrinsic : ushort
+ {
+ // X86 (SSE and AVX)
+
+ X86Addpd,
+ X86Addps,
+ X86Addsd,
+ X86Addss,
+ X86Aesdec,
+ X86Aesdeclast,
+ X86Aesenc,
+ X86Aesenclast,
+ X86Aesimc,
+ X86Andnpd,
+ X86Andnps,
+ X86Andpd,
+ X86Andps,
+ X86Blendvpd,
+ X86Blendvps,
+ X86Cmppd,
+ X86Cmpps,
+ X86Cmpsd,
+ X86Cmpss,
+ X86Comisdeq,
+ X86Comisdge,
+ X86Comisdlt,
+ X86Comisseq,
+ X86Comissge,
+ X86Comisslt,
+ X86Crc32,
+ X86Crc32_16,
+ X86Crc32_8,
+ X86Cvtdq2pd,
+ X86Cvtdq2ps,
+ X86Cvtpd2dq,
+ X86Cvtpd2ps,
+ X86Cvtps2dq,
+ X86Cvtps2pd,
+ X86Cvtsd2si,
+ X86Cvtsd2ss,
+ X86Cvtsi2sd,
+ X86Cvtsi2si,
+ X86Cvtsi2ss,
+ X86Cvtss2sd,
+ X86Cvtss2si,
+ X86Divpd,
+ X86Divps,
+ X86Divsd,
+ X86Divss,
+ X86Gf2p8affineqb,
+ X86Haddpd,
+ X86Haddps,
+ X86Insertps,
+ X86Ldmxcsr,
+ X86Maxpd,
+ X86Maxps,
+ X86Maxsd,
+ X86Maxss,
+ X86Minpd,
+ X86Minps,
+ X86Minsd,
+ X86Minss,
+ X86Movhlps,
+ X86Movlhps,
+ X86Movss,
+ X86Mulpd,
+ X86Mulps,
+ X86Mulsd,
+ X86Mulss,
+ X86Paddb,
+ X86Paddd,
+ X86Paddq,
+ X86Paddw,
+ X86Palignr,
+ X86Pand,
+ X86Pandn,
+ X86Pavgb,
+ X86Pavgw,
+ X86Pblendvb,
+ X86Pclmulqdq,
+ X86Pcmpeqb,
+ X86Pcmpeqd,
+ X86Pcmpeqq,
+ X86Pcmpeqw,
+ X86Pcmpgtb,
+ X86Pcmpgtd,
+ X86Pcmpgtq,
+ X86Pcmpgtw,
+ X86Pmaxsb,
+ X86Pmaxsd,
+ X86Pmaxsw,
+ X86Pmaxub,
+ X86Pmaxud,
+ X86Pmaxuw,
+ X86Pminsb,
+ X86Pminsd,
+ X86Pminsw,
+ X86Pminub,
+ X86Pminud,
+ X86Pminuw,
+ X86Pmovsxbw,
+ X86Pmovsxdq,
+ X86Pmovsxwd,
+ X86Pmovzxbw,
+ X86Pmovzxdq,
+ X86Pmovzxwd,
+ X86Pmulld,
+ X86Pmullw,
+ X86Popcnt,
+ X86Por,
+ X86Pshufb,
+ X86Pshufd,
+ X86Pslld,
+ X86Pslldq,
+ X86Psllq,
+ X86Psllw,
+ X86Psrad,
+ X86Psraw,
+ X86Psrld,
+ X86Psrlq,
+ X86Psrldq,
+ X86Psrlw,
+ X86Psubb,
+ X86Psubd,
+ X86Psubq,
+ X86Psubw,
+ X86Punpckhbw,
+ X86Punpckhdq,
+ X86Punpckhqdq,
+ X86Punpckhwd,
+ X86Punpcklbw,
+ X86Punpckldq,
+ X86Punpcklqdq,
+ X86Punpcklwd,
+ X86Pxor,
+ X86Rcpps,
+ X86Rcpss,
+ X86Roundpd,
+ X86Roundps,
+ X86Roundsd,
+ X86Roundss,
+ X86Rsqrtps,
+ X86Rsqrtss,
+ X86Sha256Msg1,
+ X86Sha256Msg2,
+ X86Sha256Rnds2,
+ X86Shufpd,
+ X86Shufps,
+ X86Sqrtpd,
+ X86Sqrtps,
+ X86Sqrtsd,
+ X86Sqrtss,
+ X86Stmxcsr,
+ X86Subpd,
+ X86Subps,
+ X86Subsd,
+ X86Subss,
+ X86Unpckhpd,
+ X86Unpckhps,
+ X86Unpcklpd,
+ X86Unpcklps,
+ X86Vcvtph2ps,
+ X86Vcvtps2ph,
+ X86Vfmadd231pd,
+ X86Vfmadd231ps,
+ X86Vfmadd231sd,
+ X86Vfmadd231ss,
+ X86Vfmsub231sd,
+ X86Vfmsub231ss,
+ X86Vfnmadd231pd,
+ X86Vfnmadd231ps,
+ X86Vfnmadd231sd,
+ X86Vfnmadd231ss,
+ X86Vfnmsub231sd,
+ X86Vfnmsub231ss,
+ X86Vpternlogd,
+ X86Xorpd,
+ X86Xorps,
+
+ // Arm64 (FP and Advanced SIMD)
+
+ Arm64AbsS,
+ Arm64AbsV,
+ Arm64AddhnV,
+ Arm64AddpS,
+ Arm64AddpV,
+ Arm64AddvV,
+ Arm64AddS,
+ Arm64AddV,
+ Arm64AesdV,
+ Arm64AeseV,
+ Arm64AesimcV,
+ Arm64AesmcV,
+ Arm64AndV,
+ Arm64BicVi,
+ Arm64BicV,
+ Arm64BifV,
+ Arm64BitV,
+ Arm64BslV,
+ Arm64ClsV,
+ Arm64ClzV,
+ Arm64CmeqS,
+ Arm64CmeqV,
+ Arm64CmeqSz,
+ Arm64CmeqVz,
+ Arm64CmgeS,
+ Arm64CmgeV,
+ Arm64CmgeSz,
+ Arm64CmgeVz,
+ Arm64CmgtS,
+ Arm64CmgtV,
+ Arm64CmgtSz,
+ Arm64CmgtVz,
+ Arm64CmhiS,
+ Arm64CmhiV,
+ Arm64CmhsS,
+ Arm64CmhsV,
+ Arm64CmleSz,
+ Arm64CmleVz,
+ Arm64CmltSz,
+ Arm64CmltVz,
+ Arm64CmtstS,
+ Arm64CmtstV,
+ Arm64CntV,
+ Arm64DupSe,
+ Arm64DupVe,
+ Arm64DupGp,
+ Arm64EorV,
+ Arm64ExtV,
+ Arm64FabdS,
+ Arm64FabdV,
+ Arm64FabsV,
+ Arm64FabsS,
+ Arm64FacgeS,
+ Arm64FacgeV,
+ Arm64FacgtS,
+ Arm64FacgtV,
+ Arm64FaddpS,
+ Arm64FaddpV,
+ Arm64FaddV,
+ Arm64FaddS,
+ Arm64FccmpeS,
+ Arm64FccmpS,
+ Arm64FcmeqS,
+ Arm64FcmeqV,
+ Arm64FcmeqSz,
+ Arm64FcmeqVz,
+ Arm64FcmgeS,
+ Arm64FcmgeV,
+ Arm64FcmgeSz,
+ Arm64FcmgeVz,
+ Arm64FcmgtS,
+ Arm64FcmgtV,
+ Arm64FcmgtSz,
+ Arm64FcmgtVz,
+ Arm64FcmleSz,
+ Arm64FcmleVz,
+ Arm64FcmltSz,
+ Arm64FcmltVz,
+ Arm64FcmpeS,
+ Arm64FcmpS,
+ Arm64FcselS,
+ Arm64FcvtasS,
+ Arm64FcvtasV,
+ Arm64FcvtasGp,
+ Arm64FcvtauS,
+ Arm64FcvtauV,
+ Arm64FcvtauGp,
+ Arm64FcvtlV,
+ Arm64FcvtmsS,
+ Arm64FcvtmsV,
+ Arm64FcvtmsGp,
+ Arm64FcvtmuS,
+ Arm64FcvtmuV,
+ Arm64FcvtmuGp,
+ Arm64FcvtnsS,
+ Arm64FcvtnsV,
+ Arm64FcvtnsGp,
+ Arm64FcvtnuS,
+ Arm64FcvtnuV,
+ Arm64FcvtnuGp,
+ Arm64FcvtnV,
+ Arm64FcvtpsS,
+ Arm64FcvtpsV,
+ Arm64FcvtpsGp,
+ Arm64FcvtpuS,
+ Arm64FcvtpuV,
+ Arm64FcvtpuGp,
+ Arm64FcvtxnS,
+ Arm64FcvtxnV,
+ Arm64FcvtzsSFixed,
+ Arm64FcvtzsVFixed,
+ Arm64FcvtzsS,
+ Arm64FcvtzsV,
+ Arm64FcvtzsGpFixed,
+ Arm64FcvtzsGp,
+ Arm64FcvtzuSFixed,
+ Arm64FcvtzuVFixed,
+ Arm64FcvtzuS,
+ Arm64FcvtzuV,
+ Arm64FcvtzuGpFixed,
+ Arm64FcvtzuGp,
+ Arm64FcvtS,
+ Arm64FdivV,
+ Arm64FdivS,
+ Arm64FmaddS,
+ Arm64FmaxnmpS,
+ Arm64FmaxnmpV,
+ Arm64FmaxnmvV,
+ Arm64FmaxnmV,
+ Arm64FmaxnmS,
+ Arm64FmaxpS,
+ Arm64FmaxpV,
+ Arm64FmaxvV,
+ Arm64FmaxV,
+ Arm64FmaxS,
+ Arm64FminnmpS,
+ Arm64FminnmpV,
+ Arm64FminnmvV,
+ Arm64FminnmV,
+ Arm64FminnmS,
+ Arm64FminpS,
+ Arm64FminpV,
+ Arm64FminvV,
+ Arm64FminV,
+ Arm64FminS,
+ Arm64FmlaSe,
+ Arm64FmlaVe,
+ Arm64FmlaV,
+ Arm64FmlsSe,
+ Arm64FmlsVe,
+ Arm64FmlsV,
+ Arm64FmovVi,
+ Arm64FmovS,
+ Arm64FmovGp,
+ Arm64FmovSi,
+ Arm64FmsubS,
+ Arm64FmulxSe,
+ Arm64FmulxVe,
+ Arm64FmulxS,
+ Arm64FmulxV,
+ Arm64FmulSe,
+ Arm64FmulVe,
+ Arm64FmulV,
+ Arm64FmulS,
+ Arm64FnegV,
+ Arm64FnegS,
+ Arm64FnmaddS,
+ Arm64FnmsubS,
+ Arm64FnmulS,
+ Arm64FrecpeS,
+ Arm64FrecpeV,
+ Arm64FrecpsS,
+ Arm64FrecpsV,
+ Arm64FrecpxS,
+ Arm64FrintaV,
+ Arm64FrintaS,
+ Arm64FrintiV,
+ Arm64FrintiS,
+ Arm64FrintmV,
+ Arm64FrintmS,
+ Arm64FrintnV,
+ Arm64FrintnS,
+ Arm64FrintpV,
+ Arm64FrintpS,
+ Arm64FrintxV,
+ Arm64FrintxS,
+ Arm64FrintzV,
+ Arm64FrintzS,
+ Arm64FrsqrteS,
+ Arm64FrsqrteV,
+ Arm64FrsqrtsS,
+ Arm64FrsqrtsV,
+ Arm64FsqrtV,
+ Arm64FsqrtS,
+ Arm64FsubV,
+ Arm64FsubS,
+ Arm64InsVe,
+ Arm64InsGp,
+ Arm64Ld1rV,
+ Arm64Ld1Vms,
+ Arm64Ld1Vss,
+ Arm64Ld2rV,
+ Arm64Ld2Vms,
+ Arm64Ld2Vss,
+ Arm64Ld3rV,
+ Arm64Ld3Vms,
+ Arm64Ld3Vss,
+ Arm64Ld4rV,
+ Arm64Ld4Vms,
+ Arm64Ld4Vss,
+ Arm64MlaVe,
+ Arm64MlaV,
+ Arm64MlsVe,
+ Arm64MlsV,
+ Arm64MoviV,
+ Arm64MrsFpcr,
+ Arm64MsrFpcr,
+ Arm64MrsFpsr,
+ Arm64MsrFpsr,
+ Arm64MulVe,
+ Arm64MulV,
+ Arm64MvniV,
+ Arm64NegS,
+ Arm64NegV,
+ Arm64NotV,
+ Arm64OrnV,
+ Arm64OrrVi,
+ Arm64OrrV,
+ Arm64PmullV,
+ Arm64PmulV,
+ Arm64RaddhnV,
+ Arm64RbitV,
+ Arm64Rev16V,
+ Arm64Rev32V,
+ Arm64Rev64V,
+ Arm64RshrnV,
+ Arm64RsubhnV,
+ Arm64SabalV,
+ Arm64SabaV,
+ Arm64SabdlV,
+ Arm64SabdV,
+ Arm64SadalpV,
+ Arm64SaddlpV,
+ Arm64SaddlvV,
+ Arm64SaddlV,
+ Arm64SaddwV,
+ Arm64ScvtfSFixed,
+ Arm64ScvtfVFixed,
+ Arm64ScvtfS,
+ Arm64ScvtfV,
+ Arm64ScvtfGpFixed,
+ Arm64ScvtfGp,
+ Arm64Sha1cV,
+ Arm64Sha1hV,
+ Arm64Sha1mV,
+ Arm64Sha1pV,
+ Arm64Sha1su0V,
+ Arm64Sha1su1V,
+ Arm64Sha256h2V,
+ Arm64Sha256hV,
+ Arm64Sha256su0V,
+ Arm64Sha256su1V,
+ Arm64ShaddV,
+ Arm64ShllV,
+ Arm64ShlS,
+ Arm64ShlV,
+ Arm64ShrnV,
+ Arm64ShsubV,
+ Arm64SliS,
+ Arm64SliV,
+ Arm64SmaxpV,
+ Arm64SmaxvV,
+ Arm64SmaxV,
+ Arm64SminpV,
+ Arm64SminvV,
+ Arm64SminV,
+ Arm64SmlalVe,
+ Arm64SmlalV,
+ Arm64SmlslVe,
+ Arm64SmlslV,
+ Arm64SmovV,
+ Arm64SmullVe,
+ Arm64SmullV,
+ Arm64SqabsS,
+ Arm64SqabsV,
+ Arm64SqaddS,
+ Arm64SqaddV,
+ Arm64SqdmlalSe,
+ Arm64SqdmlalVe,
+ Arm64SqdmlalS,
+ Arm64SqdmlalV,
+ Arm64SqdmlslSe,
+ Arm64SqdmlslVe,
+ Arm64SqdmlslS,
+ Arm64SqdmlslV,
+ Arm64SqdmulhSe,
+ Arm64SqdmulhVe,
+ Arm64SqdmulhS,
+ Arm64SqdmulhV,
+ Arm64SqdmullSe,
+ Arm64SqdmullVe,
+ Arm64SqdmullS,
+ Arm64SqdmullV,
+ Arm64SqnegS,
+ Arm64SqnegV,
+ Arm64SqrdmulhSe,
+ Arm64SqrdmulhVe,
+ Arm64SqrdmulhS,
+ Arm64SqrdmulhV,
+ Arm64SqrshlS,
+ Arm64SqrshlV,
+ Arm64SqrshrnS,
+ Arm64SqrshrnV,
+ Arm64SqrshrunS,
+ Arm64SqrshrunV,
+ Arm64SqshluS,
+ Arm64SqshluV,
+ Arm64SqshlSi,
+ Arm64SqshlVi,
+ Arm64SqshlS,
+ Arm64SqshlV,
+ Arm64SqshrnS,
+ Arm64SqshrnV,
+ Arm64SqshrunS,
+ Arm64SqshrunV,
+ Arm64SqsubS,
+ Arm64SqsubV,
+ Arm64SqxtnS,
+ Arm64SqxtnV,
+ Arm64SqxtunS,
+ Arm64SqxtunV,
+ Arm64SrhaddV,
+ Arm64SriS,
+ Arm64SriV,
+ Arm64SrshlS,
+ Arm64SrshlV,
+ Arm64SrshrS,
+ Arm64SrshrV,
+ Arm64SrsraS,
+ Arm64SrsraV,
+ Arm64SshllV,
+ Arm64SshlS,
+ Arm64SshlV,
+ Arm64SshrS,
+ Arm64SshrV,
+ Arm64SsraS,
+ Arm64SsraV,
+ Arm64SsublV,
+ Arm64SsubwV,
+ Arm64St1Vms,
+ Arm64St1Vss,
+ Arm64St2Vms,
+ Arm64St2Vss,
+ Arm64St3Vms,
+ Arm64St3Vss,
+ Arm64St4Vms,
+ Arm64St4Vss,
+ Arm64SubhnV,
+ Arm64SubS,
+ Arm64SubV,
+ Arm64SuqaddS,
+ Arm64SuqaddV,
+ Arm64TblV,
+ Arm64TbxV,
+ Arm64Trn1V,
+ Arm64Trn2V,
+ Arm64UabalV,
+ Arm64UabaV,
+ Arm64UabdlV,
+ Arm64UabdV,
+ Arm64UadalpV,
+ Arm64UaddlpV,
+ Arm64UaddlvV,
+ Arm64UaddlV,
+ Arm64UaddwV,
+ Arm64UcvtfSFixed,
+ Arm64UcvtfVFixed,
+ Arm64UcvtfS,
+ Arm64UcvtfV,
+ Arm64UcvtfGpFixed,
+ Arm64UcvtfGp,
+ Arm64UhaddV,
+ Arm64UhsubV,
+ Arm64UmaxpV,
+ Arm64UmaxvV,
+ Arm64UmaxV,
+ Arm64UminpV,
+ Arm64UminvV,
+ Arm64UminV,
+ Arm64UmlalVe,
+ Arm64UmlalV,
+ Arm64UmlslVe,
+ Arm64UmlslV,
+ Arm64UmovV,
+ Arm64UmullVe,
+ Arm64UmullV,
+ Arm64UqaddS,
+ Arm64UqaddV,
+ Arm64UqrshlS,
+ Arm64UqrshlV,
+ Arm64UqrshrnS,
+ Arm64UqrshrnV,
+ Arm64UqshlSi,
+ Arm64UqshlVi,
+ Arm64UqshlS,
+ Arm64UqshlV,
+ Arm64UqshrnS,
+ Arm64UqshrnV,
+ Arm64UqsubS,
+ Arm64UqsubV,
+ Arm64UqxtnS,
+ Arm64UqxtnV,
+ Arm64UrecpeV,
+ Arm64UrhaddV,
+ Arm64UrshlS,
+ Arm64UrshlV,
+ Arm64UrshrS,
+ Arm64UrshrV,
+ Arm64UrsqrteV,
+ Arm64UrsraS,
+ Arm64UrsraV,
+ Arm64UshllV,
+ Arm64UshlS,
+ Arm64UshlV,
+ Arm64UshrS,
+ Arm64UshrV,
+ Arm64UsqaddS,
+ Arm64UsqaddV,
+ Arm64UsraS,
+ Arm64UsraV,
+ Arm64UsublV,
+ Arm64UsubwV,
+ Arm64Uzp1V,
+ Arm64Uzp2V,
+ Arm64XtnV,
+ Arm64Zip1V,
+ Arm64Zip2V,
+
+ Arm64VTypeShift = 13,
+ Arm64VTypeMask = 1 << Arm64VTypeShift,
+ Arm64V64 = 0 << Arm64VTypeShift,
+ Arm64V128 = 1 << Arm64VTypeShift,
+
+ Arm64VSizeShift = 14,
+ Arm64VSizeMask = 3 << Arm64VSizeShift,
+ Arm64VFloat = 0 << Arm64VSizeShift,
+ Arm64VDouble = 1 << Arm64VSizeShift,
+ Arm64VByte = 0 << Arm64VSizeShift,
+ Arm64VHWord = 1 << Arm64VSizeShift,
+ Arm64VWord = 2 << Arm64VSizeShift,
+ Arm64VDWord = 3 << Arm64VSizeShift
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/IntermediateRepresentation/IntrusiveList.cs b/src/ARMeilleure/IntermediateRepresentation/IntrusiveList.cs
new file mode 100644
index 00000000..184df87c
--- /dev/null
+++ b/src/ARMeilleure/IntermediateRepresentation/IntrusiveList.cs
@@ -0,0 +1,208 @@
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+
+namespace ARMeilleure.IntermediateRepresentation
+{
+ /// <summary>
+ /// Represents a efficient linked list that stores the pointer on the object directly and does not allocate.
+ /// </summary>
+ /// <typeparam name="T">Type of the list items</typeparam>
+ class IntrusiveList<T> where T : IEquatable<T>, IIntrusiveListNode<T>
+ {
+ /// <summary>
+ /// First item of the list, or null if empty.
+ /// </summary>
+ public T First { get; private set; }
+
+ /// <summary>
+ /// Last item of the list, or null if empty.
+ /// </summary>
+ public T Last { get; private set; }
+
+ /// <summary>
+ /// Total number of items on the list.
+ /// </summary>
+ public int Count { get; private set; }
+
+ /// <summary>
+ /// Initializes a new instance of the <see cref="IntrusiveList{T}"/> class.
+ /// </summary>
+ /// <exception cref="ArgumentException"><typeparamref name="T"/> is not pointer sized.</exception>
+ public IntrusiveList()
+ {
+ if (Unsafe.SizeOf<T>() != IntPtr.Size)
+ {
+ throw new ArgumentException("T must be a reference type or a pointer sized struct.");
+ }
+ }
+
+ /// <summary>
+ /// Adds a item as the first item of the list.
+ /// </summary>
+ /// <param name="newNode">Item to be added</param>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public T AddFirst(T newNode)
+ {
+ if (!EqualsNull(First))
+ {
+ return AddBefore(First, newNode);
+ }
+ else
+ {
+ Debug.Assert(EqualsNull(newNode.ListPrevious));
+ Debug.Assert(EqualsNull(newNode.ListNext));
+ Debug.Assert(EqualsNull(Last));
+
+ First = newNode;
+ Last = newNode;
+
+ Debug.Assert(Count == 0);
+
+ Count = 1;
+
+ return newNode;
+ }
+ }
+
+ /// <summary>
+ /// Adds a item as the last item of the list.
+ /// </summary>
+ /// <param name="newNode">Item to be added</param>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public T AddLast(T newNode)
+ {
+ if (!EqualsNull(Last))
+ {
+ return AddAfter(Last, newNode);
+ }
+ else
+ {
+ Debug.Assert(EqualsNull(newNode.ListPrevious));
+ Debug.Assert(EqualsNull(newNode.ListNext));
+ Debug.Assert(EqualsNull(First));
+
+ First = newNode;
+ Last = newNode;
+
+ Debug.Assert(Count == 0);
+
+ Count = 1;
+
+ return newNode;
+ }
+ }
+
+ /// <summary>
+ /// Adds a item before a existing item on the list.
+ /// </summary>
+ /// <param name="node">Item on the list that will succeed the new item</param>
+ /// <param name="newNode">Item to be added</param>
+ /// <returns>New item</returns>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public T AddBefore(T node, T newNode)
+ {
+ Debug.Assert(EqualsNull(newNode.ListPrevious));
+ Debug.Assert(EqualsNull(newNode.ListNext));
+
+ newNode.ListPrevious = node.ListPrevious;
+ newNode.ListNext = node;
+
+ node.ListPrevious = newNode;
+
+ if (!EqualsNull(newNode.ListPrevious))
+ {
+ newNode.ListPrevious.ListNext = newNode;
+ }
+
+ if (Equals(First, node))
+ {
+ First = newNode;
+ }
+
+ Count++;
+
+ return newNode;
+ }
+
+ /// <summary>
+ /// Adds a item after a existing item on the list.
+ /// </summary>
+ /// <param name="node">Item on the list that will preceed the new item</param>
+ /// <param name="newNode">Item to be added</param>
+ /// <returns>New item</returns>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public T AddAfter(T node, T newNode)
+ {
+ Debug.Assert(EqualsNull(newNode.ListPrevious));
+ Debug.Assert(EqualsNull(newNode.ListNext));
+
+ newNode.ListPrevious = node;
+ newNode.ListNext = node.ListNext;
+
+ node.ListNext = newNode;
+
+ if (!EqualsNull(newNode.ListNext))
+ {
+ newNode.ListNext.ListPrevious = newNode;
+ }
+
+ if (Equals(Last, node))
+ {
+ Last = newNode;
+ }
+
+ Count++;
+
+ return newNode;
+ }
+
+ /// <summary>
+ /// Removes a item from the list.
+ /// </summary>
+ /// <param name="node">The item to be removed</param>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public void Remove(T node)
+ {
+ if (!EqualsNull(node.ListPrevious))
+ {
+ node.ListPrevious.ListNext = node.ListNext;
+ }
+ else
+ {
+ Debug.Assert(Equals(First, node));
+
+ First = node.ListNext;
+ }
+
+ if (!EqualsNull(node.ListNext))
+ {
+ node.ListNext.ListPrevious = node.ListPrevious;
+ }
+ else
+ {
+ Debug.Assert(Equals(Last, node));
+
+ Last = node.ListPrevious;
+ }
+
+ node.ListPrevious = default;
+ node.ListNext = default;
+
+ Count--;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static bool EqualsNull(T a)
+ {
+ return EqualityComparer<T>.Default.Equals(a, default);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static bool Equals(T a, T b)
+ {
+ return EqualityComparer<T>.Default.Equals(a, b);
+ }
+ }
+}
diff --git a/src/ARMeilleure/IntermediateRepresentation/MemoryOperand.cs b/src/ARMeilleure/IntermediateRepresentation/MemoryOperand.cs
new file mode 100644
index 00000000..07d2633b
--- /dev/null
+++ b/src/ARMeilleure/IntermediateRepresentation/MemoryOperand.cs
@@ -0,0 +1,54 @@
+using System;
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+
+namespace ARMeilleure.IntermediateRepresentation
+{
+ unsafe struct MemoryOperand
+ {
+ private struct Data
+ {
+#pragma warning disable CS0649
+ public byte Kind;
+ public byte Type;
+#pragma warning restore CS0649
+ public byte Scale;
+ public Operand BaseAddress;
+ public Operand Index;
+ public int Displacement;
+ }
+
+ private Data* _data;
+
+ public MemoryOperand(Operand operand)
+ {
+ Debug.Assert(operand.Kind == OperandKind.Memory);
+
+ _data = (Data*)Unsafe.As<Operand, IntPtr>(ref operand);
+ }
+
+ public Operand BaseAddress
+ {
+ get => _data->BaseAddress;
+ set => _data->BaseAddress = value;
+ }
+
+ public Operand Index
+ {
+ get => _data->Index;
+ set => _data->Index = value;
+ }
+
+ public Multiplier Scale
+ {
+ get => (Multiplier)_data->Scale;
+ set => _data->Scale = (byte)value;
+ }
+
+ public int Displacement
+ {
+ get => _data->Displacement;
+ set => _data->Displacement = value;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/IntermediateRepresentation/Multiplier.cs b/src/ARMeilleure/IntermediateRepresentation/Multiplier.cs
new file mode 100644
index 00000000..d6bc7d99
--- /dev/null
+++ b/src/ARMeilleure/IntermediateRepresentation/Multiplier.cs
@@ -0,0 +1,11 @@
+namespace ARMeilleure.IntermediateRepresentation
+{
+ enum Multiplier
+ {
+ x1 = 0,
+ x2 = 1,
+ x4 = 2,
+ x8 = 3,
+ x16 = 4
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/IntermediateRepresentation/Operand.cs b/src/ARMeilleure/IntermediateRepresentation/Operand.cs
new file mode 100644
index 00000000..9e8de3ba
--- /dev/null
+++ b/src/ARMeilleure/IntermediateRepresentation/Operand.cs
@@ -0,0 +1,594 @@
+using ARMeilleure.CodeGen.Linking;
+using ARMeilleure.Common;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+
+namespace ARMeilleure.IntermediateRepresentation
+{
+ unsafe struct Operand : IEquatable<Operand>
+ {
+ internal struct Data
+ {
+ public byte Kind;
+ public byte Type;
+ public byte SymbolType;
+ public byte Padding; // Unused space.
+ public ushort AssignmentsCount;
+ public ushort AssignmentsCapacity;
+ public uint UsesCount;
+ public uint UsesCapacity;
+ public Operation* Assignments;
+ public Operation* Uses;
+ public ulong Value;
+ public ulong SymbolValue;
+ }
+
+ private Data* _data;
+
+ public OperandKind Kind
+ {
+ get => (OperandKind)_data->Kind;
+ private set => _data->Kind = (byte)value;
+ }
+
+ public OperandType Type
+ {
+ get => (OperandType)_data->Type;
+ private set => _data->Type = (byte)value;
+ }
+
+ public ulong Value
+ {
+ get => _data->Value;
+ private set => _data->Value = value;
+ }
+
+ public Symbol Symbol
+ {
+ get
+ {
+ Debug.Assert(Kind != OperandKind.Memory);
+
+ return new Symbol((SymbolType)_data->SymbolType, _data->SymbolValue);
+ }
+ private set
+ {
+ Debug.Assert(Kind != OperandKind.Memory);
+
+ if (value.Type == SymbolType.None)
+ {
+ _data->SymbolType = (byte)SymbolType.None;
+ }
+ else
+ {
+ _data->SymbolType = (byte)value.Type;
+ _data->SymbolValue = value.Value;
+ }
+ }
+ }
+
+ public ReadOnlySpan<Operation> Assignments
+ {
+ get
+ {
+ Debug.Assert(Kind != OperandKind.Memory);
+
+ return new ReadOnlySpan<Operation>(_data->Assignments, _data->AssignmentsCount);
+ }
+ }
+
+ public ReadOnlySpan<Operation> Uses
+ {
+ get
+ {
+ Debug.Assert(Kind != OperandKind.Memory);
+
+ return new ReadOnlySpan<Operation>(_data->Uses, (int)_data->UsesCount);
+ }
+ }
+
+ public int UsesCount => (int)_data->UsesCount;
+ public int AssignmentsCount => _data->AssignmentsCount;
+
+ public bool Relocatable => Symbol.Type != SymbolType.None;
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public Register GetRegister()
+ {
+ Debug.Assert(Kind == OperandKind.Register);
+
+ return new Register((int)Value & 0xffffff, (RegisterType)(Value >> 24));
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public MemoryOperand GetMemory()
+ {
+ Debug.Assert(Kind == OperandKind.Memory);
+
+ return new MemoryOperand(this);
+ }
+
+ public int GetLocalNumber()
+ {
+ Debug.Assert(Kind == OperandKind.LocalVariable);
+
+ return (int)Value;
+ }
+
+ public byte AsByte()
+ {
+ return (byte)Value;
+ }
+
+ public short AsInt16()
+ {
+ return (short)Value;
+ }
+
+ public int AsInt32()
+ {
+ return (int)Value;
+ }
+
+ public long AsInt64()
+ {
+ return (long)Value;
+ }
+
+ public float AsFloat()
+ {
+ return BitConverter.Int32BitsToSingle((int)Value);
+ }
+
+ public double AsDouble()
+ {
+ return BitConverter.Int64BitsToDouble((long)Value);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ internal ref ulong GetValueUnsafe()
+ {
+ return ref _data->Value;
+ }
+
+ internal void NumberLocal(int number)
+ {
+ if (Kind != OperandKind.LocalVariable)
+ {
+ throw new InvalidOperationException("The operand is not a local variable.");
+ }
+
+ Value = (ulong)number;
+ }
+
+ public void AddAssignment(Operation operation)
+ {
+ if (Kind == OperandKind.LocalVariable)
+ {
+ Add(operation, ref _data->Assignments, ref _data->AssignmentsCount, ref _data->AssignmentsCapacity);
+ }
+ else if (Kind == OperandKind.Memory)
+ {
+ MemoryOperand memOp = GetMemory();
+ Operand addr = memOp.BaseAddress;
+ Operand index = memOp.Index;
+
+ if (addr != default)
+ {
+ Add(operation, ref addr._data->Assignments, ref addr._data->AssignmentsCount, ref addr._data->AssignmentsCapacity);
+ }
+
+ if (index != default)
+ {
+ Add(operation, ref index._data->Assignments, ref index._data->AssignmentsCount, ref index._data->AssignmentsCapacity);
+ }
+ }
+ }
+
+ public void RemoveAssignment(Operation operation)
+ {
+ if (Kind == OperandKind.LocalVariable)
+ {
+ Remove(operation, ref _data->Assignments, ref _data->AssignmentsCount);
+ }
+ else if (Kind == OperandKind.Memory)
+ {
+ MemoryOperand memOp = GetMemory();
+ Operand addr = memOp.BaseAddress;
+ Operand index = memOp.Index;
+
+ if (addr != default)
+ {
+ Remove(operation, ref addr._data->Assignments, ref addr._data->AssignmentsCount);
+ }
+
+ if (index != default)
+ {
+ Remove(operation, ref index._data->Assignments, ref index._data->AssignmentsCount);
+ }
+ }
+ }
+
+ public void AddUse(Operation operation)
+ {
+ if (Kind == OperandKind.LocalVariable)
+ {
+ Add(operation, ref _data->Uses, ref _data->UsesCount, ref _data->UsesCapacity);
+ }
+ else if (Kind == OperandKind.Memory)
+ {
+ MemoryOperand memOp = GetMemory();
+ Operand addr = memOp.BaseAddress;
+ Operand index = memOp.Index;
+
+ if (addr != default)
+ {
+ Add(operation, ref addr._data->Uses, ref addr._data->UsesCount, ref addr._data->UsesCapacity);
+ }
+
+ if (index != default)
+ {
+ Add(operation, ref index._data->Uses, ref index._data->UsesCount, ref index._data->UsesCapacity);
+ }
+ }
+ }
+
+ public void RemoveUse(Operation operation)
+ {
+ if (Kind == OperandKind.LocalVariable)
+ {
+ Remove(operation, ref _data->Uses, ref _data->UsesCount);
+ }
+ else if (Kind == OperandKind.Memory)
+ {
+ MemoryOperand memOp = GetMemory();
+ Operand addr = memOp.BaseAddress;
+ Operand index = memOp.Index;
+
+ if (addr != default)
+ {
+ Remove(operation, ref addr._data->Uses, ref addr._data->UsesCount);
+ }
+
+ if (index != default)
+ {
+ Remove(operation, ref index._data->Uses, ref index._data->UsesCount);
+ }
+ }
+ }
+
+ public Span<Operation> GetUses(ref Span<Operation> buffer)
+ {
+ ReadOnlySpan<Operation> uses = Uses;
+
+ if (buffer.Length < uses.Length)
+ {
+ buffer = Allocators.Default.AllocateSpan<Operation>((uint)uses.Length);
+ }
+
+ uses.CopyTo(buffer);
+
+ return buffer.Slice(0, uses.Length);
+ }
+
+ private static void New<T>(ref T* data, ref ushort count, ref ushort capacity, ushort initialCapacity) where T : unmanaged
+ {
+ count = 0;
+ capacity = initialCapacity;
+ data = Allocators.References.Allocate<T>(initialCapacity);
+ }
+
+ private static void New<T>(ref T* data, ref uint count, ref uint capacity, uint initialCapacity) where T : unmanaged
+ {
+ count = 0;
+ capacity = initialCapacity;
+ data = Allocators.References.Allocate<T>(initialCapacity);
+ }
+
+ private static void Add<T>(T item, ref T* data, ref ushort count, ref ushort capacity) where T : unmanaged
+ {
+ if (count < capacity)
+ {
+ data[(uint)count++] = item;
+
+ return;
+ }
+
+ // Could not add item in the fast path, fallback onto the slow path.
+ ExpandAdd(item, ref data, ref count, ref capacity);
+
+ static void ExpandAdd(T item, ref T* data, ref ushort count, ref ushort capacity)
+ {
+ ushort newCount = checked((ushort)(count + 1));
+ ushort newCapacity = (ushort)Math.Min(capacity * 2, ushort.MaxValue);
+
+ var oldSpan = new Span<T>(data, count);
+
+ capacity = newCapacity;
+ data = Allocators.References.Allocate<T>(capacity);
+
+ oldSpan.CopyTo(new Span<T>(data, count));
+
+ data[count] = item;
+ count = newCount;
+ }
+ }
+
+ private static void Add<T>(T item, ref T* data, ref uint count, ref uint capacity) where T : unmanaged
+ {
+ if (count < capacity)
+ {
+ data[count++] = item;
+
+ return;
+ }
+
+ // Could not add item in the fast path, fallback onto the slow path.
+ ExpandAdd(item, ref data, ref count, ref capacity);
+
+ static void ExpandAdd(T item, ref T* data, ref uint count, ref uint capacity)
+ {
+ uint newCount = checked(count + 1);
+ uint newCapacity = (uint)Math.Min(capacity * 2, int.MaxValue);
+
+ if (newCapacity <= capacity)
+ {
+ throw new OverflowException();
+ }
+
+ var oldSpan = new Span<T>(data, (int)count);
+
+ capacity = newCapacity;
+ data = Allocators.References.Allocate<T>(capacity);
+
+ oldSpan.CopyTo(new Span<T>(data, (int)count));
+
+ data[count] = item;
+ count = newCount;
+ }
+ }
+
+ private static void Remove<T>(in T item, ref T* data, ref ushort count) where T : unmanaged
+ {
+ var span = new Span<T>(data, count);
+
+ for (int i = 0; i < span.Length; i++)
+ {
+ if (EqualityComparer<T>.Default.Equals(span[i], item))
+ {
+ if (i + 1 < count)
+ {
+ span.Slice(i + 1).CopyTo(span.Slice(i));
+ }
+
+ count--;
+
+ return;
+ }
+ }
+ }
+
+ private static void Remove<T>(in T item, ref T* data, ref uint count) where T : unmanaged
+ {
+ var span = new Span<T>(data, (int)count);
+
+ for (int i = 0; i < span.Length; i++)
+ {
+ if (EqualityComparer<T>.Default.Equals(span[i], item))
+ {
+ if (i + 1 < count)
+ {
+ span.Slice(i + 1).CopyTo(span.Slice(i));
+ }
+
+ count--;
+
+ return;
+ }
+ }
+ }
+
+ public override int GetHashCode()
+ {
+ return ((ulong)_data).GetHashCode();
+ }
+
+ public bool Equals(Operand operand)
+ {
+ return operand._data == _data;
+ }
+
+ public override bool Equals(object obj)
+ {
+ return obj is Operand operand && Equals(operand);
+ }
+
+ public static bool operator ==(Operand a, Operand b)
+ {
+ return a.Equals(b);
+ }
+
+ public static bool operator !=(Operand a, Operand b)
+ {
+ return !a.Equals(b);
+ }
+
+ public static class Factory
+ {
+ private const int InternTableSize = 256;
+ private const int InternTableProbeLength = 8;
+
+ [ThreadStatic]
+ private static Data* _internTable;
+
+ private static Data* InternTable
+ {
+ get
+ {
+ if (_internTable == null)
+ {
+ _internTable = (Data*)NativeAllocator.Instance.Allocate((uint)sizeof(Data) * InternTableSize);
+
+ // Make sure the table is zeroed.
+ new Span<Data>(_internTable, InternTableSize).Clear();
+ }
+
+ return _internTable;
+ }
+ }
+
+ private static Operand Make(OperandKind kind, OperandType type, ulong value, Symbol symbol = default)
+ {
+ Debug.Assert(kind != OperandKind.None);
+
+ Data* data = null;
+
+ // If constant or register, then try to look up in the intern table before allocating.
+ if (kind == OperandKind.Constant || kind == OperandKind.Register)
+ {
+ uint hash = (uint)HashCode.Combine(kind, type, value);
+
+ // Look in the next InternTableProbeLength slots for a match.
+ for (uint i = 0; i < InternTableProbeLength; i++)
+ {
+ Operand interned = new();
+ interned._data = &InternTable[(hash + i) % InternTableSize];
+
+ // If slot matches the allocation request then return that slot.
+ if (interned.Kind == kind && interned.Type == type && interned.Value == value && interned.Symbol == symbol)
+ {
+ return interned;
+ }
+ // Otherwise if the slot is not occupied, we store in that slot.
+ else if (interned.Kind == OperandKind.None)
+ {
+ data = interned._data;
+
+ break;
+ }
+ }
+ }
+
+ // If we could not get a slot from the intern table, we allocate somewhere else and store there.
+ if (data == null)
+ {
+ data = Allocators.Operands.Allocate<Data>();
+ }
+
+ *data = default;
+
+ Operand result = new();
+ result._data = data;
+ result.Value = value;
+ result.Kind = kind;
+ result.Type = type;
+
+ if (kind != OperandKind.Memory)
+ {
+ result.Symbol = symbol;
+ }
+
+ // If local variable, then the use and def list is initialized with default sizes.
+ if (kind == OperandKind.LocalVariable)
+ {
+ New(ref result._data->Assignments, ref result._data->AssignmentsCount, ref result._data->AssignmentsCapacity, 1);
+ New(ref result._data->Uses, ref result._data->UsesCount, ref result._data->UsesCapacity, 4);
+ }
+
+ return result;
+ }
+
+ public static Operand Const(OperandType type, long value)
+ {
+ Debug.Assert(type is OperandType.I32 or OperandType.I64);
+
+ return type == OperandType.I32 ? Const((int)value) : Const(value);
+ }
+
+ public static Operand Const(bool value)
+ {
+ return Const(value ? 1 : 0);
+ }
+
+ public static Operand Const(int value)
+ {
+ return Const((uint)value);
+ }
+
+ public static Operand Const(uint value)
+ {
+ return Make(OperandKind.Constant, OperandType.I32, value);
+ }
+
+ public static Operand Const(long value)
+ {
+ return Const(value, symbol: default);
+ }
+
+ public static Operand Const<T>(ref T reference, Symbol symbol = default)
+ {
+ return Const((long)Unsafe.AsPointer(ref reference), symbol);
+ }
+
+ public static Operand Const(long value, Symbol symbol)
+ {
+ return Make(OperandKind.Constant, OperandType.I64, (ulong)value, symbol);
+ }
+
+ public static Operand Const(ulong value)
+ {
+ return Make(OperandKind.Constant, OperandType.I64, value);
+ }
+
+ public static Operand ConstF(float value)
+ {
+ return Make(OperandKind.Constant, OperandType.FP32, (ulong)BitConverter.SingleToInt32Bits(value));
+ }
+
+ public static Operand ConstF(double value)
+ {
+ return Make(OperandKind.Constant, OperandType.FP64, (ulong)BitConverter.DoubleToInt64Bits(value));
+ }
+
+ public static Operand Label()
+ {
+ return Make(OperandKind.Label, OperandType.None, 0);
+ }
+
+ public static Operand Local(OperandType type)
+ {
+ return Make(OperandKind.LocalVariable, type, 0);
+ }
+
+ public static Operand Register(int index, RegisterType regType, OperandType type)
+ {
+ return Make(OperandKind.Register, type, (ulong)((int)regType << 24 | index));
+ }
+
+ public static Operand Undef()
+ {
+ return Make(OperandKind.Undefined, OperandType.None, 0);
+ }
+
+ public static Operand MemoryOp(
+ OperandType type,
+ Operand baseAddress,
+ Operand index = default,
+ Multiplier scale = Multiplier.x1,
+ int displacement = 0)
+ {
+ Operand result = Make(OperandKind.Memory, type, 0);
+
+ MemoryOperand memory = result.GetMemory();
+ memory.BaseAddress = baseAddress;
+ memory.Index = index;
+ memory.Scale = scale;
+ memory.Displacement = displacement;
+
+ return result;
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/IntermediateRepresentation/OperandKind.cs b/src/ARMeilleure/IntermediateRepresentation/OperandKind.cs
new file mode 100644
index 00000000..adb83561
--- /dev/null
+++ b/src/ARMeilleure/IntermediateRepresentation/OperandKind.cs
@@ -0,0 +1,13 @@
+namespace ARMeilleure.IntermediateRepresentation
+{
+ enum OperandKind
+ {
+ None,
+ Constant,
+ Label,
+ LocalVariable,
+ Memory,
+ Register,
+ Undefined
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/IntermediateRepresentation/OperandType.cs b/src/ARMeilleure/IntermediateRepresentation/OperandType.cs
new file mode 100644
index 00000000..81b22cf5
--- /dev/null
+++ b/src/ARMeilleure/IntermediateRepresentation/OperandType.cs
@@ -0,0 +1,65 @@
+using System;
+
+namespace ARMeilleure.IntermediateRepresentation
+{
+ enum OperandType
+ {
+ None,
+ I32,
+ I64,
+ FP32,
+ FP64,
+ V128
+ }
+
+ static class OperandTypeExtensions
+ {
+ public static bool IsInteger(this OperandType type)
+ {
+ return type == OperandType.I32 ||
+ type == OperandType.I64;
+ }
+
+ public static RegisterType ToRegisterType(this OperandType type)
+ {
+ switch (type)
+ {
+ case OperandType.FP32: return RegisterType.Vector;
+ case OperandType.FP64: return RegisterType.Vector;
+ case OperandType.I32: return RegisterType.Integer;
+ case OperandType.I64: return RegisterType.Integer;
+ case OperandType.V128: return RegisterType.Vector;
+ }
+
+ throw new InvalidOperationException($"Invalid operand type \"{type}\".");
+ }
+
+ public static int GetSizeInBytes(this OperandType type)
+ {
+ switch (type)
+ {
+ case OperandType.FP32: return 4;
+ case OperandType.FP64: return 8;
+ case OperandType.I32: return 4;
+ case OperandType.I64: return 8;
+ case OperandType.V128: return 16;
+ }
+
+ throw new InvalidOperationException($"Invalid operand type \"{type}\".");
+ }
+
+ public static int GetSizeInBytesLog2(this OperandType type)
+ {
+ switch (type)
+ {
+ case OperandType.FP32: return 2;
+ case OperandType.FP64: return 3;
+ case OperandType.I32: return 2;
+ case OperandType.I64: return 3;
+ case OperandType.V128: return 4;
+ }
+
+ throw new InvalidOperationException($"Invalid operand type \"{type}\".");
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/IntermediateRepresentation/Operation.cs b/src/ARMeilleure/IntermediateRepresentation/Operation.cs
new file mode 100644
index 00000000..c71e143c
--- /dev/null
+++ b/src/ARMeilleure/IntermediateRepresentation/Operation.cs
@@ -0,0 +1,376 @@
+using System;
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+
+namespace ARMeilleure.IntermediateRepresentation
+{
+ unsafe struct Operation : IEquatable<Operation>, IIntrusiveListNode<Operation>
+ {
+ internal struct Data
+ {
+ public ushort Instruction;
+ public ushort Intrinsic;
+ public ushort SourcesCount;
+ public ushort DestinationsCount;
+ public Operation ListPrevious;
+ public Operation ListNext;
+ public Operand* Destinations;
+ public Operand* Sources;
+ }
+
+ private Data* _data;
+
+ public Instruction Instruction
+ {
+ get => (Instruction)_data->Instruction;
+ private set => _data->Instruction = (ushort)value;
+ }
+
+ public Intrinsic Intrinsic
+ {
+ get => (Intrinsic)_data->Intrinsic;
+ private set => _data->Intrinsic = (ushort)value;
+ }
+
+ public Operation ListPrevious
+ {
+ get => _data->ListPrevious;
+ set => _data->ListPrevious = value;
+ }
+
+ public Operation ListNext
+ {
+ get => _data->ListNext;
+ set => _data->ListNext = value;
+ }
+
+ public Operand Destination
+ {
+ get => _data->DestinationsCount != 0 ? GetDestination(0) : default;
+ set => SetDestination(value);
+ }
+
+ public int DestinationsCount => _data->DestinationsCount;
+ public int SourcesCount => _data->SourcesCount;
+
+ internal Span<Operand> DestinationsUnsafe => new(_data->Destinations, _data->DestinationsCount);
+ internal Span<Operand> SourcesUnsafe => new(_data->Sources, _data->SourcesCount);
+
+ public PhiOperation AsPhi()
+ {
+ Debug.Assert(Instruction == Instruction.Phi);
+
+ return new PhiOperation(this);
+ }
+
+ public Operand GetDestination(int index)
+ {
+ return DestinationsUnsafe[index];
+ }
+
+ public Operand GetSource(int index)
+ {
+ return SourcesUnsafe[index];
+ }
+
+ public void SetDestination(int index, Operand dest)
+ {
+ ref Operand curDest = ref DestinationsUnsafe[index];
+
+ RemoveAssignment(curDest);
+ AddAssignment(dest);
+
+ curDest = dest;
+ }
+
+ public void SetSource(int index, Operand src)
+ {
+ ref Operand curSrc = ref SourcesUnsafe[index];
+
+ RemoveUse(curSrc);
+ AddUse(src);
+
+ curSrc = src;
+ }
+
+ private void RemoveOldDestinations()
+ {
+ for (int i = 0; i < _data->DestinationsCount; i++)
+ {
+ RemoveAssignment(_data->Destinations[i]);
+ }
+ }
+
+ public void SetDestination(Operand dest)
+ {
+ RemoveOldDestinations();
+
+ if (dest == default)
+ {
+ _data->DestinationsCount = 0;
+ }
+ else
+ {
+ EnsureCapacity(ref _data->Destinations, ref _data->DestinationsCount, 1);
+
+ _data->Destinations[0] = dest;
+
+ AddAssignment(dest);
+ }
+ }
+
+ public void SetDestinations(Operand[] dests)
+ {
+ RemoveOldDestinations();
+
+ EnsureCapacity(ref _data->Destinations, ref _data->DestinationsCount, dests.Length);
+
+ for (int index = 0; index < dests.Length; index++)
+ {
+ Operand newOp = dests[index];
+
+ _data->Destinations[index] = newOp;
+
+ AddAssignment(newOp);
+ }
+ }
+
+ private void RemoveOldSources()
+ {
+ for (int index = 0; index < _data->SourcesCount; index++)
+ {
+ RemoveUse(_data->Sources[index]);
+ }
+ }
+
+ public void SetSource(Operand src)
+ {
+ RemoveOldSources();
+
+ if (src == default)
+ {
+ _data->SourcesCount = 0;
+ }
+ else
+ {
+ EnsureCapacity(ref _data->Sources, ref _data->SourcesCount, 1);
+
+ _data->Sources[0] = src;
+
+ AddUse(src);
+ }
+ }
+
+ public void SetSources(Operand[] srcs)
+ {
+ RemoveOldSources();
+
+ EnsureCapacity(ref _data->Sources, ref _data->SourcesCount, srcs.Length);
+
+ for (int index = 0; index < srcs.Length; index++)
+ {
+ Operand newOp = srcs[index];
+
+ _data->Sources[index] = newOp;
+
+ AddUse(newOp);
+ }
+ }
+
+ public void TurnIntoCopy(Operand source)
+ {
+ Instruction = Instruction.Copy;
+
+ SetSource(source);
+ }
+
+ private void AddAssignment(Operand op)
+ {
+ if (op != default)
+ {
+ op.AddAssignment(this);
+ }
+ }
+
+ private void RemoveAssignment(Operand op)
+ {
+ if (op != default)
+ {
+ op.RemoveAssignment(this);
+ }
+ }
+
+ private void AddUse(Operand op)
+ {
+ if (op != default)
+ {
+ op.AddUse(this);
+ }
+ }
+
+ private void RemoveUse(Operand op)
+ {
+ if (op != default)
+ {
+ op.RemoveUse(this);
+ }
+ }
+
+ public bool Equals(Operation operation)
+ {
+ return operation._data == _data;
+ }
+
+ public override bool Equals(object obj)
+ {
+ return obj is Operation operation && Equals(operation);
+ }
+
+ public override int GetHashCode()
+ {
+ return HashCode.Combine((IntPtr)_data);
+ }
+
+ public static bool operator ==(Operation a, Operation b)
+ {
+ return a.Equals(b);
+ }
+
+ public static bool operator !=(Operation a, Operation b)
+ {
+ return !a.Equals(b);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static void EnsureCapacity(ref Operand* list, ref ushort capacity, int newCapacity)
+ {
+ if (newCapacity > ushort.MaxValue)
+ {
+ ThrowOverflow(newCapacity);
+ }
+ // We only need to allocate a new buffer if we're increasing the size.
+ else if (newCapacity > capacity)
+ {
+ list = Allocators.References.Allocate<Operand>((uint)newCapacity);
+ }
+
+ capacity = (ushort)newCapacity;
+ }
+
+ private static void ThrowOverflow(int count) =>
+ throw new OverflowException($"Exceeded maximum size for Source or Destinations. Required {count}.");
+
+ public static class Factory
+ {
+ private static Operation Make(Instruction inst, int destCount, int srcCount)
+ {
+ Data* data = Allocators.Operations.Allocate<Data>();
+ *data = default;
+
+ Operation result = new();
+ result._data = data;
+ result.Instruction = inst;
+
+ EnsureCapacity(ref result._data->Destinations, ref result._data->DestinationsCount, destCount);
+ EnsureCapacity(ref result._data->Sources, ref result._data->SourcesCount, srcCount);
+
+ result.DestinationsUnsafe.Clear();
+ result.SourcesUnsafe.Clear();
+
+ return result;
+ }
+
+ public static Operation Operation(Instruction inst, Operand dest)
+ {
+ Operation result = Make(inst, 0, 0);
+ result.SetDestination(dest);
+ return result;
+ }
+
+ public static Operation Operation(Instruction inst, Operand dest, Operand src0)
+ {
+ Operation result = Make(inst, 0, 1);
+ result.SetDestination(dest);
+ result.SetSource(0, src0);
+ return result;
+ }
+
+ public static Operation Operation(Instruction inst, Operand dest, Operand src0, Operand src1)
+ {
+ Operation result = Make(inst, 0, 2);
+ result.SetDestination(dest);
+ result.SetSource(0, src0);
+ result.SetSource(1, src1);
+ return result;
+ }
+
+ public static Operation Operation(Instruction inst, Operand dest, Operand src0, Operand src1, Operand src2)
+ {
+ Operation result = Make(inst, 0, 3);
+ result.SetDestination(dest);
+ result.SetSource(0, src0);
+ result.SetSource(1, src1);
+ result.SetSource(2, src2);
+ return result;
+ }
+
+ public static Operation Operation(Instruction inst, Operand dest, int srcCount)
+ {
+ Operation result = Make(inst, 0, srcCount);
+ result.SetDestination(dest);
+ return result;
+ }
+
+ public static Operation Operation(Instruction inst, Operand dest, Operand[] srcs)
+ {
+ Operation result = Make(inst, 0, srcs.Length);
+
+ result.SetDestination(dest);
+
+ for (int index = 0; index < srcs.Length; index++)
+ {
+ result.SetSource(index, srcs[index]);
+ }
+
+ return result;
+ }
+
+ public static Operation Operation(Intrinsic intrin, Operand dest, params Operand[] srcs)
+ {
+ Operation result = Make(Instruction.Extended, 0, srcs.Length);
+
+ result.Intrinsic = intrin;
+ result.SetDestination(dest);
+
+ for (int index = 0; index < srcs.Length; index++)
+ {
+ result.SetSource(index, srcs[index]);
+ }
+
+ return result;
+ }
+
+ public static Operation Operation(Instruction inst, Operand[] dests, Operand[] srcs)
+ {
+ Operation result = Make(inst, dests.Length, srcs.Length);
+
+ for (int index = 0; index < dests.Length; index++)
+ {
+ result.SetDestination(index, dests[index]);
+ }
+
+ for (int index = 0; index < srcs.Length; index++)
+ {
+ result.SetSource(index, srcs[index]);
+ }
+
+ return result;
+ }
+
+ public static Operation PhiOperation(Operand dest, int srcCount)
+ {
+ return Operation(Instruction.Phi, dest, srcCount * 2);
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/IntermediateRepresentation/PhiOperation.cs b/src/ARMeilleure/IntermediateRepresentation/PhiOperation.cs
new file mode 100644
index 00000000..d2a3cf21
--- /dev/null
+++ b/src/ARMeilleure/IntermediateRepresentation/PhiOperation.cs
@@ -0,0 +1,37 @@
+using ARMeilleure.Translation;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.IntermediateRepresentation
+{
+ readonly struct PhiOperation
+ {
+ private readonly Operation _operation;
+
+ public PhiOperation(Operation operation)
+ {
+ _operation = operation;
+ }
+
+ public int SourcesCount => _operation.SourcesCount / 2;
+
+ public BasicBlock GetBlock(ControlFlowGraph cfg, int index)
+ {
+ return cfg.PostOrderBlocks[cfg.PostOrderMap[_operation.GetSource(index * 2).AsInt32()]];
+ }
+
+ public void SetBlock(int index, BasicBlock block)
+ {
+ _operation.SetSource(index * 2, Const(block.Index));
+ }
+
+ public Operand GetSource(int index)
+ {
+ return _operation.GetSource(index * 2 + 1);
+ }
+
+ public void SetSource(int index, Operand operand)
+ {
+ _operation.SetSource(index * 2 + 1, operand);
+ }
+ }
+}
diff --git a/src/ARMeilleure/IntermediateRepresentation/Register.cs b/src/ARMeilleure/IntermediateRepresentation/Register.cs
new file mode 100644
index 00000000..241e4d13
--- /dev/null
+++ b/src/ARMeilleure/IntermediateRepresentation/Register.cs
@@ -0,0 +1,43 @@
+using System;
+
+namespace ARMeilleure.IntermediateRepresentation
+{
+ readonly struct Register : IEquatable<Register>
+ {
+ public int Index { get; }
+
+ public RegisterType Type { get; }
+
+ public Register(int index, RegisterType type)
+ {
+ Index = index;
+ Type = type;
+ }
+
+ public override int GetHashCode()
+ {
+ return (ushort)Index | ((int)Type << 16);
+ }
+
+ public static bool operator ==(Register x, Register y)
+ {
+ return x.Equals(y);
+ }
+
+ public static bool operator !=(Register x, Register y)
+ {
+ return !x.Equals(y);
+ }
+
+ public override bool Equals(object obj)
+ {
+ return obj is Register reg && Equals(reg);
+ }
+
+ public bool Equals(Register other)
+ {
+ return other.Index == Index &&
+ other.Type == Type;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/IntermediateRepresentation/RegisterType.cs b/src/ARMeilleure/IntermediateRepresentation/RegisterType.cs
new file mode 100644
index 00000000..88ac6c12
--- /dev/null
+++ b/src/ARMeilleure/IntermediateRepresentation/RegisterType.cs
@@ -0,0 +1,10 @@
+namespace ARMeilleure.IntermediateRepresentation
+{
+ enum RegisterType
+ {
+ Integer,
+ Vector,
+ Flag,
+ FpFlag
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Memory/IJitMemoryAllocator.cs b/src/ARMeilleure/Memory/IJitMemoryAllocator.cs
new file mode 100644
index 00000000..19b696b0
--- /dev/null
+++ b/src/ARMeilleure/Memory/IJitMemoryAllocator.cs
@@ -0,0 +1,10 @@
+namespace ARMeilleure.Memory
+{
+ public interface IJitMemoryAllocator
+ {
+ IJitMemoryBlock Allocate(ulong size);
+ IJitMemoryBlock Reserve(ulong size);
+
+ ulong GetPageSize();
+ }
+}
diff --git a/src/ARMeilleure/Memory/IJitMemoryBlock.cs b/src/ARMeilleure/Memory/IJitMemoryBlock.cs
new file mode 100644
index 00000000..670f2862
--- /dev/null
+++ b/src/ARMeilleure/Memory/IJitMemoryBlock.cs
@@ -0,0 +1,14 @@
+using System;
+
+namespace ARMeilleure.Memory
+{
+ public interface IJitMemoryBlock : IDisposable
+ {
+ IntPtr Pointer { get; }
+
+ bool Commit(ulong offset, ulong size);
+
+ void MapAsRx(ulong offset, ulong size);
+ void MapAsRwx(ulong offset, ulong size);
+ }
+}
diff --git a/src/ARMeilleure/Memory/IMemoryManager.cs b/src/ARMeilleure/Memory/IMemoryManager.cs
new file mode 100644
index 00000000..5eb1fadd
--- /dev/null
+++ b/src/ARMeilleure/Memory/IMemoryManager.cs
@@ -0,0 +1,77 @@
+using System;
+
+namespace ARMeilleure.Memory
+{
+ public interface IMemoryManager
+ {
+ int AddressSpaceBits { get; }
+
+ IntPtr PageTablePointer { get; }
+
+ MemoryManagerType Type { get; }
+
+ event Action<ulong, ulong> UnmapEvent;
+
+ /// <summary>
+ /// Reads data from CPU mapped memory.
+ /// </summary>
+ /// <typeparam name="T">Type of the data being read</typeparam>
+ /// <param name="va">Virtual address of the data in memory</param>
+ /// <returns>The data</returns>
+ T Read<T>(ulong va) where T : unmanaged;
+
+ /// <summary>
+ /// Reads data from CPU mapped memory, with read tracking
+ /// </summary>
+ /// <typeparam name="T">Type of the data being read</typeparam>
+ /// <param name="va">Virtual address of the data in memory</param>
+ /// <returns>The data</returns>
+ T ReadTracked<T>(ulong va) where T : unmanaged;
+
+ /// <summary>
+ /// Writes data to CPU mapped memory.
+ /// </summary>
+ /// <typeparam name="T">Type of the data being written</typeparam>
+ /// <param name="va">Virtual address to write the data into</param>
+ /// <param name="value">Data to be written</param>
+ void Write<T>(ulong va, T value) where T : unmanaged;
+
+ /// <summary>
+ /// Gets a read-only span of data from CPU mapped memory.
+ /// </summary>
+ /// <param name="va">Virtual address of the data</param>
+ /// <param name="size">Size of the data</param>
+ /// <param name="tracked">True if read tracking is triggered on the span</param>
+ /// <returns>A read-only span of the data</returns>
+ ReadOnlySpan<byte> GetSpan(ulong va, int size, bool tracked = false);
+
+ /// <summary>
+ /// Gets a reference for the given type at the specified virtual memory address.
+ /// </summary>
+ /// <remarks>
+ /// The data must be located at a contiguous memory region.
+ /// </remarks>
+ /// <typeparam name="T">Type of the data to get the reference</typeparam>
+ /// <param name="va">Virtual address of the data</param>
+ /// <returns>A reference to the data in memory</returns>
+ ref T GetRef<T>(ulong va) where T : unmanaged;
+
+ /// <summary>
+ /// Checks if the page at a given CPU virtual address is mapped.
+ /// </summary>
+ /// <param name="va">Virtual address to check</param>
+ /// <returns>True if the address is mapped, false otherwise</returns>
+ bool IsMapped(ulong va);
+
+ /// <summary>
+ /// Alerts the memory tracking that a given region has been read from or written to.
+ /// This should be called before read/write is performed.
+ /// </summary>
+ /// <param name="va">Virtual address of the region</param>
+ /// <param name="size">Size of the region</param>
+ /// <param name="write">True if the region was written, false if read</param>
+ /// <param name="precise">True if the access is precise, false otherwise</param>
+ /// <param name="exemptId">Optional ID of the handles that should not be signalled</param>
+ void SignalMemoryTracking(ulong va, ulong size, bool write, bool precise = false, int? exemptId = null);
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Memory/InvalidAccessException.cs b/src/ARMeilleure/Memory/InvalidAccessException.cs
new file mode 100644
index 00000000..ad540719
--- /dev/null
+++ b/src/ARMeilleure/Memory/InvalidAccessException.cs
@@ -0,0 +1,23 @@
+using System;
+
+namespace ARMeilleure.Memory
+{
+ class InvalidAccessException : Exception
+ {
+ public InvalidAccessException()
+ {
+ }
+
+ public InvalidAccessException(ulong address) : base($"Invalid memory access at virtual address 0x{address:X16}.")
+ {
+ }
+
+ public InvalidAccessException(string message) : base(message)
+ {
+ }
+
+ public InvalidAccessException(string message, Exception innerException) : base(message, innerException)
+ {
+ }
+ }
+}
diff --git a/src/ARMeilleure/Memory/MemoryManagerType.cs b/src/ARMeilleure/Memory/MemoryManagerType.cs
new file mode 100644
index 00000000..ce84ccaf
--- /dev/null
+++ b/src/ARMeilleure/Memory/MemoryManagerType.cs
@@ -0,0 +1,41 @@
+namespace ARMeilleure.Memory
+{
+ /// <summary>
+ /// Indicates the type of a memory manager and the method it uses for memory mapping
+ /// and address translation. This controls the code generated for memory accesses on the JIT.
+ /// </summary>
+ public enum MemoryManagerType
+ {
+ /// <summary>
+ /// Complete software MMU implementation, the read/write methods are always called,
+ /// without any attempt to perform faster memory access.
+ /// </summary>
+ SoftwareMmu,
+
+ /// <summary>
+ /// High level implementation using a software flat page table for address translation,
+ /// used to speed up address translation if possible without calling the read/write methods.
+ /// </summary>
+ SoftwarePageTable,
+
+ /// <summary>
+ /// High level implementation with mappings managed by the host OS, effectively using hardware
+ /// page tables. No address translation is performed in software and the memory is just accessed directly.
+ /// </summary>
+ HostMapped,
+
+ /// <summary>
+ /// Same as the host mapped memory manager type, but without masking the address within the address space.
+ /// Allows invalid access from JIT code to the rest of the program, but is faster.
+ /// </summary>
+ HostMappedUnsafe
+ }
+
+ static class MemoryManagerTypeExtensions
+ {
+ public static bool IsHostMapped(this MemoryManagerType type)
+ {
+ return type == MemoryManagerType.HostMapped || type == MemoryManagerType.HostMappedUnsafe;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Memory/ReservedRegion.cs b/src/ARMeilleure/Memory/ReservedRegion.cs
new file mode 100644
index 00000000..2197afad
--- /dev/null
+++ b/src/ARMeilleure/Memory/ReservedRegion.cs
@@ -0,0 +1,58 @@
+using System;
+
+namespace ARMeilleure.Memory
+{
+ class ReservedRegion
+ {
+ public const int DefaultGranularity = 65536; // Mapping granularity in Windows.
+
+ public IJitMemoryBlock Block { get; }
+
+ public IntPtr Pointer => Block.Pointer;
+
+ private readonly ulong _maxSize;
+ private readonly ulong _sizeGranularity;
+ private ulong _currentSize;
+
+ public ReservedRegion(IJitMemoryAllocator allocator, ulong maxSize, ulong granularity = 0)
+ {
+ if (granularity == 0)
+ {
+ granularity = DefaultGranularity;
+ }
+
+ Block = allocator.Reserve(maxSize);
+ _maxSize = maxSize;
+ _sizeGranularity = granularity;
+ _currentSize = 0;
+ }
+
+ public void ExpandIfNeeded(ulong desiredSize)
+ {
+ if (desiredSize > _maxSize)
+ {
+ throw new OutOfMemoryException();
+ }
+
+ if (desiredSize > _currentSize)
+ {
+ // Lock, and then check again. We only want to commit once.
+ lock (this)
+ {
+ if (desiredSize >= _currentSize)
+ {
+ ulong overflowBytes = desiredSize - _currentSize;
+ ulong moreToCommit = (((_sizeGranularity - 1) + overflowBytes) / _sizeGranularity) * _sizeGranularity; // Round up.
+ Block.Commit(_currentSize, moreToCommit);
+ _currentSize += moreToCommit;
+ }
+ }
+ }
+ }
+
+ public void Dispose()
+ {
+ Block.Dispose();
+ }
+ }
+}
diff --git a/src/ARMeilleure/Native/JitSupportDarwin.cs b/src/ARMeilleure/Native/JitSupportDarwin.cs
new file mode 100644
index 00000000..7d6a8634
--- /dev/null
+++ b/src/ARMeilleure/Native/JitSupportDarwin.cs
@@ -0,0 +1,13 @@
+using System;
+using System.Runtime.InteropServices;
+using System.Runtime.Versioning;
+
+namespace ARMeilleure.Native
+{
+ [SupportedOSPlatform("macos")]
+ public static partial class JitSupportDarwin
+ {
+ [LibraryImport("libarmeilleure-jitsupport", EntryPoint = "armeilleure_jit_memcpy")]
+ public static partial void Copy(IntPtr dst, IntPtr src, ulong n);
+ }
+}
diff --git a/src/ARMeilleure/Native/libs/libarmeilleure-jitsupport.dylib b/src/ARMeilleure/Native/libs/libarmeilleure-jitsupport.dylib
new file mode 100644
index 00000000..c65b0a4e
--- /dev/null
+++ b/src/ARMeilleure/Native/libs/libarmeilleure-jitsupport.dylib
Binary files differ
diff --git a/src/ARMeilleure/Native/macos_jit_support/Makefile b/src/ARMeilleure/Native/macos_jit_support/Makefile
new file mode 100644
index 00000000..d6da35d5
--- /dev/null
+++ b/src/ARMeilleure/Native/macos_jit_support/Makefile
@@ -0,0 +1,8 @@
+NAME = libarmeilleure-jitsupport.dylib
+
+all: ${NAME}
+
+${NAME}:
+ clang -O3 -dynamiclib support.c -o ${NAME}
+clean:
+ rm -f ${NAME}
diff --git a/src/ARMeilleure/Native/macos_jit_support/support.c b/src/ARMeilleure/Native/macos_jit_support/support.c
new file mode 100644
index 00000000..1b13d906
--- /dev/null
+++ b/src/ARMeilleure/Native/macos_jit_support/support.c
@@ -0,0 +1,14 @@
+#include <stddef.h>
+#include <string.h>
+#include <pthread.h>
+
+#include <libkern/OSCacheControl.h>
+
+void armeilleure_jit_memcpy(void *dst, const void *src, size_t n) {
+ pthread_jit_write_protect_np(0);
+ memcpy(dst, src, n);
+ pthread_jit_write_protect_np(1);
+
+ // Ensure that the instruction cache for this range is invalidated.
+ sys_icache_invalidate(dst, n);
+}
diff --git a/src/ARMeilleure/Optimizations.cs b/src/ARMeilleure/Optimizations.cs
new file mode 100644
index 00000000..a84a4dc4
--- /dev/null
+++ b/src/ARMeilleure/Optimizations.cs
@@ -0,0 +1,68 @@
+using System.Runtime.Intrinsics.Arm;
+
+namespace ARMeilleure
+{
+ using Arm64HardwareCapabilities = ARMeilleure.CodeGen.Arm64.HardwareCapabilities;
+ using X86HardwareCapabilities = ARMeilleure.CodeGen.X86.HardwareCapabilities;
+
+ public static class Optimizations
+ {
+ public static bool FastFP { get; set; } = true;
+
+ public static bool AllowLcqInFunctionTable { get; set; } = true;
+ public static bool UseUnmanagedDispatchLoop { get; set; } = true;
+
+ public static bool UseAdvSimdIfAvailable { get; set; } = true;
+ public static bool UseArm64PmullIfAvailable { get; set; } = true;
+
+ public static bool UseSseIfAvailable { get; set; } = true;
+ public static bool UseSse2IfAvailable { get; set; } = true;
+ public static bool UseSse3IfAvailable { get; set; } = true;
+ public static bool UseSsse3IfAvailable { get; set; } = true;
+ public static bool UseSse41IfAvailable { get; set; } = true;
+ public static bool UseSse42IfAvailable { get; set; } = true;
+ public static bool UsePopCntIfAvailable { get; set; } = true;
+ public static bool UseAvxIfAvailable { get; set; } = true;
+ public static bool UseAvx512FIfAvailable { get; set; } = true;
+ public static bool UseAvx512VlIfAvailable { get; set; } = true;
+ public static bool UseAvx512BwIfAvailable { get; set; } = true;
+ public static bool UseAvx512DqIfAvailable { get; set; } = true;
+ public static bool UseF16cIfAvailable { get; set; } = true;
+ public static bool UseFmaIfAvailable { get; set; } = true;
+ public static bool UseAesniIfAvailable { get; set; } = true;
+ public static bool UsePclmulqdqIfAvailable { get; set; } = true;
+ public static bool UseShaIfAvailable { get; set; } = true;
+ public static bool UseGfniIfAvailable { get; set; } = true;
+
+ public static bool ForceLegacySse
+ {
+ get => X86HardwareCapabilities.ForceLegacySse;
+ set => X86HardwareCapabilities.ForceLegacySse = value;
+ }
+
+ internal static bool UseAdvSimd => UseAdvSimdIfAvailable && Arm64HardwareCapabilities.SupportsAdvSimd;
+ internal static bool UseArm64Pmull => UseArm64PmullIfAvailable && Arm64HardwareCapabilities.SupportsPmull;
+
+ internal static bool UseSse => UseSseIfAvailable && X86HardwareCapabilities.SupportsSse;
+ internal static bool UseSse2 => UseSse2IfAvailable && X86HardwareCapabilities.SupportsSse2;
+ internal static bool UseSse3 => UseSse3IfAvailable && X86HardwareCapabilities.SupportsSse3;
+ internal static bool UseSsse3 => UseSsse3IfAvailable && X86HardwareCapabilities.SupportsSsse3;
+ internal static bool UseSse41 => UseSse41IfAvailable && X86HardwareCapabilities.SupportsSse41;
+ internal static bool UseSse42 => UseSse42IfAvailable && X86HardwareCapabilities.SupportsSse42;
+ internal static bool UsePopCnt => UsePopCntIfAvailable && X86HardwareCapabilities.SupportsPopcnt;
+ internal static bool UseAvx => UseAvxIfAvailable && X86HardwareCapabilities.SupportsAvx && !ForceLegacySse;
+ internal static bool UseAvx512F => UseAvx512FIfAvailable && X86HardwareCapabilities.SupportsAvx512F && !ForceLegacySse;
+ internal static bool UseAvx512Vl => UseAvx512VlIfAvailable && X86HardwareCapabilities.SupportsAvx512Vl && !ForceLegacySse;
+ internal static bool UseAvx512Bw => UseAvx512BwIfAvailable && X86HardwareCapabilities.SupportsAvx512Bw && !ForceLegacySse;
+ internal static bool UseAvx512Dq => UseAvx512DqIfAvailable && X86HardwareCapabilities.SupportsAvx512Dq && !ForceLegacySse;
+ internal static bool UseF16c => UseF16cIfAvailable && X86HardwareCapabilities.SupportsF16c;
+ internal static bool UseFma => UseFmaIfAvailable && X86HardwareCapabilities.SupportsFma;
+ internal static bool UseAesni => UseAesniIfAvailable && X86HardwareCapabilities.SupportsAesni;
+ internal static bool UsePclmulqdq => UsePclmulqdqIfAvailable && X86HardwareCapabilities.SupportsPclmulqdq;
+ internal static bool UseSha => UseShaIfAvailable && X86HardwareCapabilities.SupportsSha;
+ internal static bool UseGfni => UseGfniIfAvailable && X86HardwareCapabilities.SupportsGfni;
+
+ internal static bool UseAvx512Ortho => UseAvx512F && UseAvx512Vl;
+ internal static bool UseAvx512OrthoFloat => UseAvx512Ortho && UseAvx512Dq;
+ }
+}
diff --git a/src/ARMeilleure/Signal/NativeSignalHandler.cs b/src/ARMeilleure/Signal/NativeSignalHandler.cs
new file mode 100644
index 00000000..cddeb817
--- /dev/null
+++ b/src/ARMeilleure/Signal/NativeSignalHandler.cs
@@ -0,0 +1,422 @@
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Memory;
+using ARMeilleure.Translation;
+using ARMeilleure.Translation.Cache;
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Signal
+{
+ [StructLayout(LayoutKind.Sequential, Pack = 1)]
+ struct SignalHandlerRange
+ {
+ public int IsActive;
+ public nuint RangeAddress;
+ public nuint RangeEndAddress;
+ public IntPtr ActionPointer;
+ }
+
+ [StructLayout(LayoutKind.Sequential, Pack = 1)]
+ struct SignalHandlerConfig
+ {
+ /// <summary>
+ /// The byte offset of the faulting address in the SigInfo or ExceptionRecord struct.
+ /// </summary>
+ public int StructAddressOffset;
+
+ /// <summary>
+ /// The byte offset of the write flag in the SigInfo or ExceptionRecord struct.
+ /// </summary>
+ public int StructWriteOffset;
+
+ /// <summary>
+ /// The sigaction handler that was registered before this one. (unix only)
+ /// </summary>
+ public nuint UnixOldSigaction;
+
+ /// <summary>
+ /// The type of the previous sigaction. True for the 3 argument variant. (unix only)
+ /// </summary>
+ public int UnixOldSigaction3Arg;
+
+ public SignalHandlerRange Range0;
+ public SignalHandlerRange Range1;
+ public SignalHandlerRange Range2;
+ public SignalHandlerRange Range3;
+ public SignalHandlerRange Range4;
+ public SignalHandlerRange Range5;
+ public SignalHandlerRange Range6;
+ public SignalHandlerRange Range7;
+ }
+
+ public static class NativeSignalHandler
+ {
+ private delegate void UnixExceptionHandler(int sig, IntPtr info, IntPtr ucontext);
+ [UnmanagedFunctionPointer(CallingConvention.Winapi)]
+ private delegate int VectoredExceptionHandler(IntPtr exceptionInfo);
+
+ private const int MaxTrackedRanges = 8;
+
+ private const int StructAddressOffset = 0;
+ private const int StructWriteOffset = 4;
+ private const int UnixOldSigaction = 8;
+ private const int UnixOldSigaction3Arg = 16;
+ private const int RangeOffset = 20;
+
+ private const int EXCEPTION_CONTINUE_SEARCH = 0;
+ private const int EXCEPTION_CONTINUE_EXECUTION = -1;
+
+ private const uint EXCEPTION_ACCESS_VIOLATION = 0xc0000005;
+
+ private static ulong _pageSize;
+ private static ulong _pageMask;
+
+ private static IntPtr _handlerConfig;
+ private static IntPtr _signalHandlerPtr;
+ private static IntPtr _signalHandlerHandle;
+
+ private static readonly object _lock = new object();
+ private static bool _initialized;
+
+ static NativeSignalHandler()
+ {
+ _handlerConfig = Marshal.AllocHGlobal(Unsafe.SizeOf<SignalHandlerConfig>());
+ ref SignalHandlerConfig config = ref GetConfigRef();
+
+ config = new SignalHandlerConfig();
+ }
+
+ public static void Initialize(IJitMemoryAllocator allocator)
+ {
+ JitCache.Initialize(allocator);
+ }
+
+ public static void InitializeSignalHandler(ulong pageSize, Func<IntPtr, IntPtr, IntPtr> customSignalHandlerFactory = null)
+ {
+ if (_initialized) return;
+
+ lock (_lock)
+ {
+ if (_initialized) return;
+
+ _pageSize = pageSize;
+ _pageMask = pageSize - 1;
+
+ ref SignalHandlerConfig config = ref GetConfigRef();
+
+ if (OperatingSystem.IsLinux() || OperatingSystem.IsMacOS())
+ {
+ _signalHandlerPtr = Marshal.GetFunctionPointerForDelegate(GenerateUnixSignalHandler(_handlerConfig));
+
+ if (customSignalHandlerFactory != null)
+ {
+ _signalHandlerPtr = customSignalHandlerFactory(UnixSignalHandlerRegistration.GetSegfaultExceptionHandler().sa_handler, _signalHandlerPtr);
+ }
+
+ var old = UnixSignalHandlerRegistration.RegisterExceptionHandler(_signalHandlerPtr);
+
+ config.UnixOldSigaction = (nuint)(ulong)old.sa_handler;
+ config.UnixOldSigaction3Arg = old.sa_flags & 4;
+ }
+ else
+ {
+ config.StructAddressOffset = 40; // ExceptionInformation1
+ config.StructWriteOffset = 32; // ExceptionInformation0
+
+ _signalHandlerPtr = Marshal.GetFunctionPointerForDelegate(GenerateWindowsSignalHandler(_handlerConfig));
+
+ if (customSignalHandlerFactory != null)
+ {
+ _signalHandlerPtr = customSignalHandlerFactory(IntPtr.Zero, _signalHandlerPtr);
+ }
+
+ _signalHandlerHandle = WindowsSignalHandlerRegistration.RegisterExceptionHandler(_signalHandlerPtr);
+ }
+
+ _initialized = true;
+ }
+ }
+
+ private static unsafe ref SignalHandlerConfig GetConfigRef()
+ {
+ return ref Unsafe.AsRef<SignalHandlerConfig>((void*)_handlerConfig);
+ }
+
+ public static unsafe bool AddTrackedRegion(nuint address, nuint endAddress, IntPtr action)
+ {
+ var ranges = &((SignalHandlerConfig*)_handlerConfig)->Range0;
+
+ for (int i = 0; i < MaxTrackedRanges; i++)
+ {
+ if (ranges[i].IsActive == 0)
+ {
+ ranges[i].RangeAddress = address;
+ ranges[i].RangeEndAddress = endAddress;
+ ranges[i].ActionPointer = action;
+ ranges[i].IsActive = 1;
+
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ public static unsafe bool RemoveTrackedRegion(nuint address)
+ {
+ var ranges = &((SignalHandlerConfig*)_handlerConfig)->Range0;
+
+ for (int i = 0; i < MaxTrackedRanges; i++)
+ {
+ if (ranges[i].IsActive == 1 && ranges[i].RangeAddress == address)
+ {
+ ranges[i].IsActive = 0;
+
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ private static Operand EmitGenericRegionCheck(EmitterContext context, IntPtr signalStructPtr, Operand faultAddress, Operand isWrite)
+ {
+ Operand inRegionLocal = context.AllocateLocal(OperandType.I32);
+ context.Copy(inRegionLocal, Const(0));
+
+ Operand endLabel = Label();
+
+ for (int i = 0; i < MaxTrackedRanges; i++)
+ {
+ ulong rangeBaseOffset = (ulong)(RangeOffset + i * Unsafe.SizeOf<SignalHandlerRange>());
+
+ Operand nextLabel = Label();
+
+ Operand isActive = context.Load(OperandType.I32, Const((ulong)signalStructPtr + rangeBaseOffset));
+
+ context.BranchIfFalse(nextLabel, isActive);
+
+ Operand rangeAddress = context.Load(OperandType.I64, Const((ulong)signalStructPtr + rangeBaseOffset + 4));
+ Operand rangeEndAddress = context.Load(OperandType.I64, Const((ulong)signalStructPtr + rangeBaseOffset + 12));
+
+ // Is the fault address within this tracked region?
+ Operand inRange = context.BitwiseAnd(
+ context.ICompare(faultAddress, rangeAddress, Comparison.GreaterOrEqualUI),
+ context.ICompare(faultAddress, rangeEndAddress, Comparison.LessUI)
+ );
+
+ // Only call tracking if in range.
+ context.BranchIfFalse(nextLabel, inRange, BasicBlockFrequency.Cold);
+
+ Operand offset = context.BitwiseAnd(context.Subtract(faultAddress, rangeAddress), Const(~_pageMask));
+
+ // Call the tracking action, with the pointer's relative offset to the base address.
+ Operand trackingActionPtr = context.Load(OperandType.I64, Const((ulong)signalStructPtr + rangeBaseOffset + 20));
+
+ context.Copy(inRegionLocal, Const(0));
+
+ Operand skipActionLabel = Label();
+
+ // Tracking action should be non-null to call it, otherwise assume false return.
+ context.BranchIfFalse(skipActionLabel, trackingActionPtr);
+ Operand result = context.Call(trackingActionPtr, OperandType.I32, offset, Const(_pageSize), isWrite);
+ context.Copy(inRegionLocal, result);
+
+ context.MarkLabel(skipActionLabel);
+
+ // If the tracking action returns false or does not exist, it might be an invalid access due to a partial overlap on Windows.
+ if (OperatingSystem.IsWindows())
+ {
+ context.BranchIfTrue(endLabel, inRegionLocal);
+
+ context.Copy(inRegionLocal, WindowsPartialUnmapHandler.EmitRetryFromAccessViolation(context));
+ }
+
+ context.Branch(endLabel);
+
+ context.MarkLabel(nextLabel);
+ }
+
+ context.MarkLabel(endLabel);
+
+ return context.Copy(inRegionLocal);
+ }
+
+ private static Operand GenerateUnixFaultAddress(EmitterContext context, Operand sigInfoPtr)
+ {
+ ulong structAddressOffset = OperatingSystem.IsMacOS() ? 24ul : 16ul; // si_addr
+ return context.Load(OperandType.I64, context.Add(sigInfoPtr, Const(structAddressOffset)));
+ }
+
+ private static Operand GenerateUnixWriteFlag(EmitterContext context, Operand ucontextPtr)
+ {
+ if (OperatingSystem.IsMacOS())
+ {
+ const ulong mcontextOffset = 48; // uc_mcontext
+ Operand ctxPtr = context.Load(OperandType.I64, context.Add(ucontextPtr, Const(mcontextOffset)));
+
+ if (RuntimeInformation.ProcessArchitecture == Architecture.Arm64)
+ {
+ const ulong esrOffset = 8; // __es.__esr
+ Operand esr = context.Load(OperandType.I64, context.Add(ctxPtr, Const(esrOffset)));
+ return context.BitwiseAnd(esr, Const(0x40ul));
+ }
+
+ if (RuntimeInformation.ProcessArchitecture == Architecture.X64)
+ {
+ const ulong errOffset = 4; // __es.__err
+ Operand err = context.Load(OperandType.I64, context.Add(ctxPtr, Const(errOffset)));
+ return context.BitwiseAnd(err, Const(2ul));
+ }
+ }
+ else if (OperatingSystem.IsLinux())
+ {
+ if (RuntimeInformation.ProcessArchitecture == Architecture.Arm64)
+ {
+ Operand auxPtr = context.AllocateLocal(OperandType.I64);
+
+ Operand loopLabel = Label();
+ Operand successLabel = Label();
+
+ const ulong auxOffset = 464; // uc_mcontext.__reserved
+ const uint esrMagic = 0x45535201;
+
+ context.Copy(auxPtr, context.Add(ucontextPtr, Const(auxOffset)));
+
+ context.MarkLabel(loopLabel);
+
+ // _aarch64_ctx::magic
+ Operand magic = context.Load(OperandType.I32, auxPtr);
+ // _aarch64_ctx::size
+ Operand size = context.Load(OperandType.I32, context.Add(auxPtr, Const(4ul)));
+
+ context.BranchIf(successLabel, magic, Const(esrMagic), Comparison.Equal);
+
+ context.Copy(auxPtr, context.Add(auxPtr, context.ZeroExtend32(OperandType.I64, size)));
+
+ context.Branch(loopLabel);
+
+ context.MarkLabel(successLabel);
+
+ // esr_context::esr
+ Operand esr = context.Load(OperandType.I64, context.Add(auxPtr, Const(8ul)));
+ return context.BitwiseAnd(esr, Const(0x40ul));
+ }
+
+ if (RuntimeInformation.ProcessArchitecture == Architecture.X64)
+ {
+ const int errOffset = 192; // uc_mcontext.gregs[REG_ERR]
+ Operand err = context.Load(OperandType.I64, context.Add(ucontextPtr, Const(errOffset)));
+ return context.BitwiseAnd(err, Const(2ul));
+ }
+ }
+
+ throw new PlatformNotSupportedException();
+ }
+
+ private static UnixExceptionHandler GenerateUnixSignalHandler(IntPtr signalStructPtr)
+ {
+ EmitterContext context = new EmitterContext();
+
+ // (int sig, SigInfo* sigInfo, void* ucontext)
+ Operand sigInfoPtr = context.LoadArgument(OperandType.I64, 1);
+ Operand ucontextPtr = context.LoadArgument(OperandType.I64, 2);
+
+ Operand faultAddress = GenerateUnixFaultAddress(context, sigInfoPtr);
+ Operand writeFlag = GenerateUnixWriteFlag(context, ucontextPtr);
+
+ Operand isWrite = context.ICompareNotEqual(writeFlag, Const(0L)); // Normalize to 0/1.
+
+ Operand isInRegion = EmitGenericRegionCheck(context, signalStructPtr, faultAddress, isWrite);
+
+ Operand endLabel = Label();
+
+ context.BranchIfTrue(endLabel, isInRegion);
+
+ Operand unixOldSigaction = context.Load(OperandType.I64, Const((ulong)signalStructPtr + UnixOldSigaction));
+ Operand unixOldSigaction3Arg = context.Load(OperandType.I64, Const((ulong)signalStructPtr + UnixOldSigaction3Arg));
+ Operand threeArgLabel = Label();
+
+ context.BranchIfTrue(threeArgLabel, unixOldSigaction3Arg);
+
+ context.Call(unixOldSigaction, OperandType.None, context.LoadArgument(OperandType.I32, 0));
+ context.Branch(endLabel);
+
+ context.MarkLabel(threeArgLabel);
+
+ context.Call(unixOldSigaction,
+ OperandType.None,
+ context.LoadArgument(OperandType.I32, 0),
+ sigInfoPtr,
+ context.LoadArgument(OperandType.I64, 2)
+ );
+
+ context.MarkLabel(endLabel);
+
+ context.Return();
+
+ ControlFlowGraph cfg = context.GetControlFlowGraph();
+
+ OperandType[] argTypes = new OperandType[] { OperandType.I32, OperandType.I64, OperandType.I64 };
+
+ return Compiler.Compile(cfg, argTypes, OperandType.None, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<UnixExceptionHandler>();
+ }
+
+ private static VectoredExceptionHandler GenerateWindowsSignalHandler(IntPtr signalStructPtr)
+ {
+ EmitterContext context = new EmitterContext();
+
+ // (ExceptionPointers* exceptionInfo)
+ Operand exceptionInfoPtr = context.LoadArgument(OperandType.I64, 0);
+ Operand exceptionRecordPtr = context.Load(OperandType.I64, exceptionInfoPtr);
+
+ // First thing's first - this catches a number of exceptions, but we only want access violations.
+ Operand validExceptionLabel = Label();
+
+ Operand exceptionCode = context.Load(OperandType.I32, exceptionRecordPtr);
+
+ context.BranchIf(validExceptionLabel, exceptionCode, Const(EXCEPTION_ACCESS_VIOLATION), Comparison.Equal);
+
+ context.Return(Const(EXCEPTION_CONTINUE_SEARCH)); // Don't handle this one.
+
+ context.MarkLabel(validExceptionLabel);
+
+ // Next, read the address of the invalid access, and whether it is a write or not.
+
+ Operand structAddressOffset = context.Load(OperandType.I32, Const((ulong)signalStructPtr + StructAddressOffset));
+ Operand structWriteOffset = context.Load(OperandType.I32, Const((ulong)signalStructPtr + StructWriteOffset));
+
+ Operand faultAddress = context.Load(OperandType.I64, context.Add(exceptionRecordPtr, context.ZeroExtend32(OperandType.I64, structAddressOffset)));
+ Operand writeFlag = context.Load(OperandType.I64, context.Add(exceptionRecordPtr, context.ZeroExtend32(OperandType.I64, structWriteOffset)));
+
+ Operand isWrite = context.ICompareNotEqual(writeFlag, Const(0L)); // Normalize to 0/1.
+
+ Operand isInRegion = EmitGenericRegionCheck(context, signalStructPtr, faultAddress, isWrite);
+
+ Operand endLabel = Label();
+
+ // If the region check result is false, then run the next vectored exception handler.
+
+ context.BranchIfTrue(endLabel, isInRegion);
+
+ context.Return(Const(EXCEPTION_CONTINUE_SEARCH));
+
+ context.MarkLabel(endLabel);
+
+ // Otherwise, return to execution.
+
+ context.Return(Const(EXCEPTION_CONTINUE_EXECUTION));
+
+ // Compile and return the function.
+
+ ControlFlowGraph cfg = context.GetControlFlowGraph();
+
+ OperandType[] argTypes = new OperandType[] { OperandType.I64 };
+
+ return Compiler.Compile(cfg, argTypes, OperandType.I32, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<VectoredExceptionHandler>();
+ }
+ }
+}
diff --git a/src/ARMeilleure/Signal/TestMethods.cs b/src/ARMeilleure/Signal/TestMethods.cs
new file mode 100644
index 00000000..e2ecad24
--- /dev/null
+++ b/src/ARMeilleure/Signal/TestMethods.cs
@@ -0,0 +1,84 @@
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Runtime.InteropServices;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Signal
+{
+ public struct NativeWriteLoopState
+ {
+ public int Running;
+ public int Error;
+ }
+
+ public static class TestMethods
+ {
+ public delegate bool DebugPartialUnmap();
+ public delegate int DebugThreadLocalMapGetOrReserve(int threadId, int initialState);
+ public delegate void DebugNativeWriteLoop(IntPtr nativeWriteLoopPtr, IntPtr writePtr);
+
+ public static DebugPartialUnmap GenerateDebugPartialUnmap()
+ {
+ EmitterContext context = new EmitterContext();
+
+ var result = WindowsPartialUnmapHandler.EmitRetryFromAccessViolation(context);
+
+ context.Return(result);
+
+ // Compile and return the function.
+
+ ControlFlowGraph cfg = context.GetControlFlowGraph();
+
+ OperandType[] argTypes = new OperandType[] { OperandType.I64 };
+
+ return Compiler.Compile(cfg, argTypes, OperandType.I32, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<DebugPartialUnmap>();
+ }
+
+ public static DebugThreadLocalMapGetOrReserve GenerateDebugThreadLocalMapGetOrReserve(IntPtr structPtr)
+ {
+ EmitterContext context = new EmitterContext();
+
+ var result = WindowsPartialUnmapHandler.EmitThreadLocalMapIntGetOrReserve(context, structPtr, context.LoadArgument(OperandType.I32, 0), context.LoadArgument(OperandType.I32, 1));
+
+ context.Return(result);
+
+ // Compile and return the function.
+
+ ControlFlowGraph cfg = context.GetControlFlowGraph();
+
+ OperandType[] argTypes = new OperandType[] { OperandType.I64 };
+
+ return Compiler.Compile(cfg, argTypes, OperandType.I32, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<DebugThreadLocalMapGetOrReserve>();
+ }
+
+ public static DebugNativeWriteLoop GenerateDebugNativeWriteLoop()
+ {
+ EmitterContext context = new EmitterContext();
+
+ // Loop a write to the target address until "running" is false.
+
+ Operand structPtr = context.Copy(context.LoadArgument(OperandType.I64, 0));
+ Operand writePtr = context.Copy(context.LoadArgument(OperandType.I64, 1));
+
+ Operand loopLabel = Label();
+ context.MarkLabel(loopLabel);
+
+ context.Store(writePtr, Const(12345));
+
+ Operand running = context.Load(OperandType.I32, structPtr);
+
+ context.BranchIfTrue(loopLabel, running);
+
+ context.Return();
+
+ // Compile and return the function.
+
+ ControlFlowGraph cfg = context.GetControlFlowGraph();
+
+ OperandType[] argTypes = new OperandType[] { OperandType.I64 };
+
+ return Compiler.Compile(cfg, argTypes, OperandType.None, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<DebugNativeWriteLoop>();
+ }
+ }
+}
diff --git a/src/ARMeilleure/Signal/UnixSignalHandlerRegistration.cs b/src/ARMeilleure/Signal/UnixSignalHandlerRegistration.cs
new file mode 100644
index 00000000..22009240
--- /dev/null
+++ b/src/ARMeilleure/Signal/UnixSignalHandlerRegistration.cs
@@ -0,0 +1,83 @@
+using System;
+using System.Runtime.InteropServices;
+
+namespace ARMeilleure.Signal
+{
+ static partial class UnixSignalHandlerRegistration
+ {
+ [StructLayout(LayoutKind.Sequential, Pack = 1)]
+ public unsafe struct SigSet
+ {
+ fixed long sa_mask[16];
+ }
+
+ [StructLayout(LayoutKind.Sequential, Pack = 1)]
+ public struct SigAction
+ {
+ public IntPtr sa_handler;
+ public SigSet sa_mask;
+ public int sa_flags;
+ public IntPtr sa_restorer;
+ }
+
+ private const int SIGSEGV = 11;
+ private const int SIGBUS = 10;
+ private const int SA_SIGINFO = 0x00000004;
+
+ [LibraryImport("libc", SetLastError = true)]
+ private static partial int sigaction(int signum, ref SigAction sigAction, out SigAction oldAction);
+
+ [LibraryImport("libc", SetLastError = true)]
+ private static partial int sigaction(int signum, IntPtr sigAction, out SigAction oldAction);
+
+ [LibraryImport("libc", SetLastError = true)]
+ private static partial int sigemptyset(ref SigSet set);
+
+ public static SigAction GetSegfaultExceptionHandler()
+ {
+ int result = sigaction(SIGSEGV, IntPtr.Zero, out SigAction old);
+
+ if (result != 0)
+ {
+ throw new InvalidOperationException($"Could not get SIGSEGV sigaction. Error: {result}");
+ }
+
+ return old;
+ }
+
+ public static SigAction RegisterExceptionHandler(IntPtr action)
+ {
+ SigAction sig = new SigAction
+ {
+ sa_handler = action,
+ sa_flags = SA_SIGINFO
+ };
+
+ sigemptyset(ref sig.sa_mask);
+
+ int result = sigaction(SIGSEGV, ref sig, out SigAction old);
+
+ if (result != 0)
+ {
+ throw new InvalidOperationException($"Could not register SIGSEGV sigaction. Error: {result}");
+ }
+
+ if (OperatingSystem.IsMacOS())
+ {
+ result = sigaction(SIGBUS, ref sig, out _);
+
+ if (result != 0)
+ {
+ throw new InvalidOperationException($"Could not register SIGBUS sigaction. Error: {result}");
+ }
+ }
+
+ return old;
+ }
+
+ public static bool RestoreExceptionHandler(SigAction oldAction)
+ {
+ return sigaction(SIGSEGV, ref oldAction, out SigAction _) == 0 && (!OperatingSystem.IsMacOS() || sigaction(SIGBUS, ref oldAction, out SigAction _) == 0);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Signal/WindowsPartialUnmapHandler.cs b/src/ARMeilleure/Signal/WindowsPartialUnmapHandler.cs
new file mode 100644
index 00000000..941e36e5
--- /dev/null
+++ b/src/ARMeilleure/Signal/WindowsPartialUnmapHandler.cs
@@ -0,0 +1,186 @@
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using Ryujinx.Common.Memory.PartialUnmaps;
+using System;
+
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Signal
+{
+ /// <summary>
+ /// Methods to handle signals caused by partial unmaps. See the structs for C# implementations of the methods.
+ /// </summary>
+ internal static class WindowsPartialUnmapHandler
+ {
+ public static Operand EmitRetryFromAccessViolation(EmitterContext context)
+ {
+ IntPtr partialRemapStatePtr = PartialUnmapState.GlobalState;
+ IntPtr localCountsPtr = IntPtr.Add(partialRemapStatePtr, PartialUnmapState.LocalCountsOffset);
+
+ // Get the lock first.
+ EmitNativeReaderLockAcquire(context, IntPtr.Add(partialRemapStatePtr, PartialUnmapState.PartialUnmapLockOffset));
+
+ IntPtr getCurrentThreadId = WindowsSignalHandlerRegistration.GetCurrentThreadIdFunc();
+ Operand threadId = context.Call(Const((ulong)getCurrentThreadId), OperandType.I32);
+ Operand threadIndex = EmitThreadLocalMapIntGetOrReserve(context, localCountsPtr, threadId, Const(0));
+
+ Operand endLabel = Label();
+ Operand retry = context.AllocateLocal(OperandType.I32);
+ Operand threadIndexValidLabel = Label();
+
+ context.BranchIfFalse(threadIndexValidLabel, context.ICompareEqual(threadIndex, Const(-1)));
+
+ context.Copy(retry, Const(1)); // Always retry when thread local cannot be allocated.
+
+ context.Branch(endLabel);
+
+ context.MarkLabel(threadIndexValidLabel);
+
+ Operand threadLocalPartialUnmapsPtr = EmitThreadLocalMapIntGetValuePtr(context, localCountsPtr, threadIndex);
+ Operand threadLocalPartialUnmaps = context.Load(OperandType.I32, threadLocalPartialUnmapsPtr);
+ Operand partialUnmapsCount = context.Load(OperandType.I32, Const((ulong)IntPtr.Add(partialRemapStatePtr, PartialUnmapState.PartialUnmapsCountOffset)));
+
+ context.Copy(retry, context.ICompareNotEqual(threadLocalPartialUnmaps, partialUnmapsCount));
+
+ Operand noRetryLabel = Label();
+
+ context.BranchIfFalse(noRetryLabel, retry);
+
+ // if (retry) {
+
+ context.Store(threadLocalPartialUnmapsPtr, partialUnmapsCount);
+
+ context.Branch(endLabel);
+
+ context.MarkLabel(noRetryLabel);
+
+ // }
+
+ context.MarkLabel(endLabel);
+
+ // Finally, release the lock and return the retry value.
+ EmitNativeReaderLockRelease(context, IntPtr.Add(partialRemapStatePtr, PartialUnmapState.PartialUnmapLockOffset));
+
+ return retry;
+ }
+
+ public static Operand EmitThreadLocalMapIntGetOrReserve(EmitterContext context, IntPtr threadLocalMapPtr, Operand threadId, Operand initialState)
+ {
+ Operand idsPtr = Const((ulong)IntPtr.Add(threadLocalMapPtr, ThreadLocalMap<int>.ThreadIdsOffset));
+
+ Operand i = context.AllocateLocal(OperandType.I32);
+
+ context.Copy(i, Const(0));
+
+ // (Loop 1) Check all slots for a matching Thread ID (while also trying to allocate)
+
+ Operand endLabel = Label();
+
+ Operand loopLabel = Label();
+ context.MarkLabel(loopLabel);
+
+ Operand offset = context.Multiply(i, Const(sizeof(int)));
+ Operand idPtr = context.Add(idsPtr, context.SignExtend32(OperandType.I64, offset));
+
+ // Check that this slot has the thread ID.
+ Operand existingId = context.CompareAndSwap(idPtr, threadId, threadId);
+
+ // If it was already the thread ID, then we just need to return i.
+ context.BranchIfTrue(endLabel, context.ICompareEqual(existingId, threadId));
+
+ context.Copy(i, context.Add(i, Const(1)));
+
+ context.BranchIfTrue(loopLabel, context.ICompareLess(i, Const(ThreadLocalMap<int>.MapSize)));
+
+ // (Loop 2) Try take a slot that is 0 with our Thread ID.
+
+ context.Copy(i, Const(0)); // Reset i.
+
+ Operand loop2Label = Label();
+ context.MarkLabel(loop2Label);
+
+ Operand offset2 = context.Multiply(i, Const(sizeof(int)));
+ Operand idPtr2 = context.Add(idsPtr, context.SignExtend32(OperandType.I64, offset2));
+
+ // Try and swap in the thread id on top of 0.
+ Operand existingId2 = context.CompareAndSwap(idPtr2, Const(0), threadId);
+
+ Operand idNot0Label = Label();
+
+ // If it was 0, then we need to initialize the struct entry and return i.
+ context.BranchIfFalse(idNot0Label, context.ICompareEqual(existingId2, Const(0)));
+
+ Operand structsPtr = Const((ulong)IntPtr.Add(threadLocalMapPtr, ThreadLocalMap<int>.StructsOffset));
+ Operand structPtr = context.Add(structsPtr, context.SignExtend32(OperandType.I64, offset2));
+ context.Store(structPtr, initialState);
+
+ context.Branch(endLabel);
+
+ context.MarkLabel(idNot0Label);
+
+ context.Copy(i, context.Add(i, Const(1)));
+
+ context.BranchIfTrue(loop2Label, context.ICompareLess(i, Const(ThreadLocalMap<int>.MapSize)));
+
+ context.Copy(i, Const(-1)); // Could not place the thread in the list.
+
+ context.MarkLabel(endLabel);
+
+ return context.Copy(i);
+ }
+
+ private static Operand EmitThreadLocalMapIntGetValuePtr(EmitterContext context, IntPtr threadLocalMapPtr, Operand index)
+ {
+ Operand offset = context.Multiply(index, Const(sizeof(int)));
+ Operand structsPtr = Const((ulong)IntPtr.Add(threadLocalMapPtr, ThreadLocalMap<int>.StructsOffset));
+
+ return context.Add(structsPtr, context.SignExtend32(OperandType.I64, offset));
+ }
+
+ private static void EmitThreadLocalMapIntRelease(EmitterContext context, IntPtr threadLocalMapPtr, Operand threadId, Operand index)
+ {
+ Operand offset = context.Multiply(index, Const(sizeof(int)));
+ Operand idsPtr = Const((ulong)IntPtr.Add(threadLocalMapPtr, ThreadLocalMap<int>.ThreadIdsOffset));
+ Operand idPtr = context.Add(idsPtr, context.SignExtend32(OperandType.I64, offset));
+
+ context.CompareAndSwap(idPtr, threadId, Const(0));
+ }
+
+ private static void EmitAtomicAddI32(EmitterContext context, Operand ptr, Operand additive)
+ {
+ Operand loop = Label();
+ context.MarkLabel(loop);
+
+ Operand initial = context.Load(OperandType.I32, ptr);
+ Operand newValue = context.Add(initial, additive);
+
+ Operand replaced = context.CompareAndSwap(ptr, initial, newValue);
+
+ context.BranchIfFalse(loop, context.ICompareEqual(initial, replaced));
+ }
+
+ private static void EmitNativeReaderLockAcquire(EmitterContext context, IntPtr nativeReaderLockPtr)
+ {
+ Operand writeLockPtr = Const((ulong)IntPtr.Add(nativeReaderLockPtr, NativeReaderWriterLock.WriteLockOffset));
+
+ // Spin until we can acquire the write lock.
+ Operand spinLabel = Label();
+ context.MarkLabel(spinLabel);
+
+ // Old value must be 0 to continue (we gained the write lock)
+ context.BranchIfTrue(spinLabel, context.CompareAndSwap(writeLockPtr, Const(0), Const(1)));
+
+ // Increment reader count.
+ EmitAtomicAddI32(context, Const((ulong)IntPtr.Add(nativeReaderLockPtr, NativeReaderWriterLock.ReaderCountOffset)), Const(1));
+
+ // Release write lock.
+ context.CompareAndSwap(writeLockPtr, Const(1), Const(0));
+ }
+
+ private static void EmitNativeReaderLockRelease(EmitterContext context, IntPtr nativeReaderLockPtr)
+ {
+ // Decrement reader count.
+ EmitAtomicAddI32(context, Const((ulong)IntPtr.Add(nativeReaderLockPtr, NativeReaderWriterLock.ReaderCountOffset)), Const(-1));
+ }
+ }
+}
diff --git a/src/ARMeilleure/Signal/WindowsSignalHandlerRegistration.cs b/src/ARMeilleure/Signal/WindowsSignalHandlerRegistration.cs
new file mode 100644
index 00000000..3219e015
--- /dev/null
+++ b/src/ARMeilleure/Signal/WindowsSignalHandlerRegistration.cs
@@ -0,0 +1,44 @@
+using System;
+using System.Runtime.InteropServices;
+
+namespace ARMeilleure.Signal
+{
+ unsafe partial class WindowsSignalHandlerRegistration
+ {
+ [LibraryImport("kernel32.dll")]
+ private static partial IntPtr AddVectoredExceptionHandler(uint first, IntPtr handler);
+
+ [LibraryImport("kernel32.dll")]
+ private static partial ulong RemoveVectoredExceptionHandler(IntPtr handle);
+
+ [LibraryImport("kernel32.dll", SetLastError = true, EntryPoint = "LoadLibraryA")]
+ private static partial IntPtr LoadLibrary([MarshalAs(UnmanagedType.LPStr)] string lpFileName);
+
+ [LibraryImport("kernel32.dll", SetLastError = true)]
+ private static partial IntPtr GetProcAddress(IntPtr hModule, [MarshalAs(UnmanagedType.LPStr)] string procName);
+
+ private static IntPtr _getCurrentThreadIdPtr;
+
+ public static IntPtr RegisterExceptionHandler(IntPtr action)
+ {
+ return AddVectoredExceptionHandler(1, action);
+ }
+
+ public static bool RemoveExceptionHandler(IntPtr handle)
+ {
+ return RemoveVectoredExceptionHandler(handle) != 0;
+ }
+
+ public static IntPtr GetCurrentThreadIdFunc()
+ {
+ if (_getCurrentThreadIdPtr == IntPtr.Zero)
+ {
+ IntPtr handle = LoadLibrary("kernel32.dll");
+
+ _getCurrentThreadIdPtr = GetProcAddress(handle, "GetCurrentThreadId");
+ }
+
+ return _getCurrentThreadIdPtr;
+ }
+ }
+}
diff --git a/src/ARMeilleure/State/Aarch32Mode.cs b/src/ARMeilleure/State/Aarch32Mode.cs
new file mode 100644
index 00000000..395e288a
--- /dev/null
+++ b/src/ARMeilleure/State/Aarch32Mode.cs
@@ -0,0 +1,15 @@
+namespace ARMeilleure.State
+{
+ enum Aarch32Mode
+ {
+ User = 0b10000,
+ Fiq = 0b10001,
+ Irq = 0b10010,
+ Supervisor = 0b10011,
+ Monitor = 0b10110,
+ Abort = 0b10111,
+ Hypervisor = 0b11010,
+ Undefined = 0b11011,
+ System = 0b11111
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/State/ExceptionCallback.cs b/src/ARMeilleure/State/ExceptionCallback.cs
new file mode 100644
index 00000000..38d6eef7
--- /dev/null
+++ b/src/ARMeilleure/State/ExceptionCallback.cs
@@ -0,0 +1,5 @@
+namespace ARMeilleure.State
+{
+ public delegate void ExceptionCallbackNoArgs(ExecutionContext context);
+ public delegate void ExceptionCallback(ExecutionContext context, ulong address, int id);
+} \ No newline at end of file
diff --git a/src/ARMeilleure/State/ExecutionContext.cs b/src/ARMeilleure/State/ExecutionContext.cs
new file mode 100644
index 00000000..859fb3a5
--- /dev/null
+++ b/src/ARMeilleure/State/ExecutionContext.cs
@@ -0,0 +1,173 @@
+using ARMeilleure.Memory;
+using System;
+
+namespace ARMeilleure.State
+{
+ public class ExecutionContext
+ {
+ private const int MinCountForCheck = 4000;
+
+ private NativeContext _nativeContext;
+
+ internal IntPtr NativeContextPtr => _nativeContext.BasePtr;
+
+ private bool _interrupted;
+
+ private readonly ICounter _counter;
+
+ public ulong Pc => _nativeContext.GetPc();
+
+ public uint CtrEl0 => 0x8444c004;
+ public uint DczidEl0 => 0x00000004;
+
+ public ulong CntfrqEl0 => _counter.Frequency;
+ public ulong CntpctEl0 => _counter.Counter;
+
+ // CNTVCT_EL0 = CNTPCT_EL0 - CNTVOFF_EL2
+ // Since EL2 isn't implemented, CNTVOFF_EL2 = 0
+ public ulong CntvctEl0 => CntpctEl0;
+
+ public long TpidrEl0
+ {
+ get => _nativeContext.GetTpidrEl0();
+ set => _nativeContext.SetTpidrEl0(value);
+ }
+
+ public long TpidrroEl0
+ {
+ get => _nativeContext.GetTpidrroEl0();
+ set => _nativeContext.SetTpidrroEl0(value);
+ }
+
+ public uint Pstate
+ {
+ get => _nativeContext.GetPstate();
+ set => _nativeContext.SetPstate(value);
+ }
+
+ public FPSR Fpsr
+ {
+ get => (FPSR)_nativeContext.GetFPState((uint)FPSR.Mask);
+ set => _nativeContext.SetFPState((uint)value, (uint)FPSR.Mask);
+ }
+
+ public FPCR Fpcr
+ {
+ get => (FPCR)_nativeContext.GetFPState((uint)FPCR.Mask);
+ set => _nativeContext.SetFPState((uint)value, (uint)FPCR.Mask);
+ }
+ public FPCR StandardFpcrValue => (Fpcr & (FPCR.Ahp)) | FPCR.Dn | FPCR.Fz;
+
+ public FPSCR Fpscr
+ {
+ get => (FPSCR)_nativeContext.GetFPState((uint)FPSCR.Mask);
+ set => _nativeContext.SetFPState((uint)value, (uint)FPSCR.Mask);
+ }
+
+ public bool IsAarch32 { get; set; }
+
+ internal ExecutionMode ExecutionMode
+ {
+ get
+ {
+ if (IsAarch32)
+ {
+ return GetPstateFlag(PState.TFlag)
+ ? ExecutionMode.Aarch32Thumb
+ : ExecutionMode.Aarch32Arm;
+ }
+ else
+ {
+ return ExecutionMode.Aarch64;
+ }
+ }
+ }
+
+ public bool Running
+ {
+ get => _nativeContext.GetRunning();
+ private set => _nativeContext.SetRunning(value);
+ }
+
+ private readonly ExceptionCallbackNoArgs _interruptCallback;
+ private readonly ExceptionCallback _breakCallback;
+ private readonly ExceptionCallback _supervisorCallback;
+ private readonly ExceptionCallback _undefinedCallback;
+
+ public ExecutionContext(
+ IJitMemoryAllocator allocator,
+ ICounter counter,
+ ExceptionCallbackNoArgs interruptCallback = null,
+ ExceptionCallback breakCallback = null,
+ ExceptionCallback supervisorCallback = null,
+ ExceptionCallback undefinedCallback = null)
+ {
+ _nativeContext = new NativeContext(allocator);
+ _counter = counter;
+ _interruptCallback = interruptCallback;
+ _breakCallback = breakCallback;
+ _supervisorCallback = supervisorCallback;
+ _undefinedCallback = undefinedCallback;
+
+ Running = true;
+
+ _nativeContext.SetCounter(MinCountForCheck);
+ }
+
+ public ulong GetX(int index) => _nativeContext.GetX(index);
+ public void SetX(int index, ulong value) => _nativeContext.SetX(index, value);
+
+ public V128 GetV(int index) => _nativeContext.GetV(index);
+ public void SetV(int index, V128 value) => _nativeContext.SetV(index, value);
+
+ public bool GetPstateFlag(PState flag) => _nativeContext.GetPstateFlag(flag);
+ public void SetPstateFlag(PState flag, bool value) => _nativeContext.SetPstateFlag(flag, value);
+
+ public bool GetFPstateFlag(FPState flag) => _nativeContext.GetFPStateFlag(flag);
+ public void SetFPstateFlag(FPState flag, bool value) => _nativeContext.SetFPStateFlag(flag, value);
+
+ internal void CheckInterrupt()
+ {
+ if (_interrupted)
+ {
+ _interrupted = false;
+
+ _interruptCallback?.Invoke(this);
+ }
+
+ _nativeContext.SetCounter(MinCountForCheck);
+ }
+
+ public void RequestInterrupt()
+ {
+ _interrupted = true;
+ }
+
+ internal void OnBreak(ulong address, int imm)
+ {
+ _breakCallback?.Invoke(this, address, imm);
+ }
+
+ internal void OnSupervisorCall(ulong address, int imm)
+ {
+ _supervisorCallback?.Invoke(this, address, imm);
+ }
+
+ internal void OnUndefined(ulong address, int opCode)
+ {
+ _undefinedCallback?.Invoke(this, address, opCode);
+ }
+
+ public void StopRunning()
+ {
+ Running = false;
+
+ _nativeContext.SetCounter(0);
+ }
+
+ public void Dispose()
+ {
+ _nativeContext.Dispose();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/State/ExecutionMode.cs b/src/ARMeilleure/State/ExecutionMode.cs
new file mode 100644
index 00000000..29154a25
--- /dev/null
+++ b/src/ARMeilleure/State/ExecutionMode.cs
@@ -0,0 +1,9 @@
+namespace ARMeilleure.State
+{
+ enum ExecutionMode : int
+ {
+ Aarch32Arm = 0,
+ Aarch32Thumb = 1,
+ Aarch64 = 2
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/State/FPCR.cs b/src/ARMeilleure/State/FPCR.cs
new file mode 100644
index 00000000..6f707de7
--- /dev/null
+++ b/src/ARMeilleure/State/FPCR.cs
@@ -0,0 +1,22 @@
+using System;
+
+namespace ARMeilleure.State
+{
+ [Flags]
+ public enum FPCR : uint
+ {
+ Ioe = 1u << 8,
+ Dze = 1u << 9,
+ Ofe = 1u << 10,
+ Ufe = 1u << 11,
+ Ixe = 1u << 12,
+ Ide = 1u << 15,
+ RMode0 = 1u << 22,
+ RMode1 = 1u << 23,
+ Fz = 1u << 24,
+ Dn = 1u << 25,
+ Ahp = 1u << 26,
+
+ Mask = Ahp | Dn | Fz | RMode1 | RMode0 | Ide | Ixe | Ufe | Ofe | Dze | Ioe // 0x07C09F00u
+ }
+}
diff --git a/src/ARMeilleure/State/FPException.cs b/src/ARMeilleure/State/FPException.cs
new file mode 100644
index 00000000..e24e07af
--- /dev/null
+++ b/src/ARMeilleure/State/FPException.cs
@@ -0,0 +1,12 @@
+namespace ARMeilleure.State
+{
+ enum FPException
+ {
+ InvalidOp = 0,
+ DivideByZero = 1,
+ Overflow = 2,
+ Underflow = 3,
+ Inexact = 4,
+ InputDenorm = 7
+ }
+}
diff --git a/src/ARMeilleure/State/FPRoundingMode.cs b/src/ARMeilleure/State/FPRoundingMode.cs
new file mode 100644
index 00000000..8d757a15
--- /dev/null
+++ b/src/ARMeilleure/State/FPRoundingMode.cs
@@ -0,0 +1,11 @@
+namespace ARMeilleure.State
+{
+ public enum FPRoundingMode
+ {
+ ToNearest = 0, // With ties to even.
+ TowardsPlusInfinity = 1,
+ TowardsMinusInfinity = 2,
+ TowardsZero = 3,
+ ToNearestAway = 4 // With ties to away.
+ }
+}
diff --git a/src/ARMeilleure/State/FPSCR.cs b/src/ARMeilleure/State/FPSCR.cs
new file mode 100644
index 00000000..d6d2fc26
--- /dev/null
+++ b/src/ARMeilleure/State/FPSCR.cs
@@ -0,0 +1,15 @@
+using System;
+
+namespace ARMeilleure.State
+{
+ [Flags]
+ public enum FPSCR : uint
+ {
+ V = 1u << 28,
+ C = 1u << 29,
+ Z = 1u << 30,
+ N = 1u << 31,
+
+ Mask = N | Z | C | V | FPSR.Mask | FPCR.Mask // 0xFFC09F9Fu
+ }
+}
diff --git a/src/ARMeilleure/State/FPSR.cs b/src/ARMeilleure/State/FPSR.cs
new file mode 100644
index 00000000..5e66d5ce
--- /dev/null
+++ b/src/ARMeilleure/State/FPSR.cs
@@ -0,0 +1,18 @@
+using System;
+
+namespace ARMeilleure.State
+{
+ [Flags]
+ public enum FPSR : uint
+ {
+ Ioc = 1u << 0,
+ Dzc = 1u << 1,
+ Ofc = 1u << 2,
+ Ufc = 1u << 3,
+ Ixc = 1u << 4,
+ Idc = 1u << 7,
+ Qc = 1u << 27,
+
+ Mask = Qc | Idc | Ixc | Ufc | Ofc | Dzc | Ioc // 0x0800009Fu
+ }
+}
diff --git a/src/ARMeilleure/State/FPState.cs b/src/ARMeilleure/State/FPState.cs
new file mode 100644
index 00000000..fa6ab9d4
--- /dev/null
+++ b/src/ARMeilleure/State/FPState.cs
@@ -0,0 +1,31 @@
+namespace ARMeilleure.State
+{
+ public enum FPState
+ {
+ // FPSR Flags.
+ IocFlag = 0,
+ DzcFlag = 1,
+ OfcFlag = 2,
+ UfcFlag = 3,
+ IxcFlag = 4,
+ IdcFlag = 7,
+ QcFlag = 27,
+ VFlag = 28,
+ CFlag = 29,
+ ZFlag = 30,
+ NFlag = 31,
+
+ // FPCR Flags.
+ IoeFlag = 8,
+ DzeFlag = 9,
+ OfeFlag = 10,
+ UfeFlag = 11,
+ IxeFlag = 12,
+ IdeFlag = 15,
+ RMode0Flag = 22,
+ RMode1Flag = 23,
+ FzFlag = 24,
+ DnFlag = 25,
+ AhpFlag = 26
+ }
+}
diff --git a/src/ARMeilleure/State/FPType.cs b/src/ARMeilleure/State/FPType.cs
new file mode 100644
index 00000000..84e0db8d
--- /dev/null
+++ b/src/ARMeilleure/State/FPType.cs
@@ -0,0 +1,11 @@
+namespace ARMeilleure.State
+{
+ enum FPType
+ {
+ Nonzero,
+ Zero,
+ Infinity,
+ QNaN,
+ SNaN
+ }
+}
diff --git a/src/ARMeilleure/State/ICounter.cs b/src/ARMeilleure/State/ICounter.cs
new file mode 100644
index 00000000..93e721ea
--- /dev/null
+++ b/src/ARMeilleure/State/ICounter.cs
@@ -0,0 +1,18 @@
+namespace ARMeilleure.State
+{
+ /// <summary>
+ /// CPU Counter interface.
+ /// </summary>
+ public interface ICounter
+ {
+ /// <summary>
+ /// Counter frequency in Hertz.
+ /// </summary>
+ ulong Frequency { get; }
+
+ /// <summary>
+ /// Current counter value.
+ /// </summary>
+ ulong Counter { get; }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/State/NativeContext.cs b/src/ARMeilleure/State/NativeContext.cs
new file mode 100644
index 00000000..3189bdd8
--- /dev/null
+++ b/src/ARMeilleure/State/NativeContext.cs
@@ -0,0 +1,269 @@
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Memory;
+using System;
+using System.Runtime.CompilerServices;
+
+namespace ARMeilleure.State
+{
+ class NativeContext : IDisposable
+ {
+ private unsafe struct NativeCtxStorage
+ {
+ public fixed ulong X[RegisterConsts.IntRegsCount];
+ public fixed ulong V[RegisterConsts.VecRegsCount * 2];
+ public fixed uint Flags[RegisterConsts.FlagsCount];
+ public fixed uint FpFlags[RegisterConsts.FpFlagsCount];
+ public long TpidrEl0;
+ public long TpidrroEl0;
+ public int Counter;
+ public ulong DispatchAddress;
+ public ulong ExclusiveAddress;
+ public ulong ExclusiveValueLow;
+ public ulong ExclusiveValueHigh;
+ public int Running;
+ }
+
+ private static NativeCtxStorage _dummyStorage = new NativeCtxStorage();
+
+ private readonly IJitMemoryBlock _block;
+
+ public IntPtr BasePtr => _block.Pointer;
+
+ public NativeContext(IJitMemoryAllocator allocator)
+ {
+ _block = allocator.Allocate((ulong)Unsafe.SizeOf<NativeCtxStorage>());
+
+ GetStorage().ExclusiveAddress = ulong.MaxValue;
+ }
+
+ public ulong GetPc()
+ {
+ // TODO: More precise tracking of PC value.
+ return GetStorage().DispatchAddress;
+ }
+
+ public unsafe ulong GetX(int index)
+ {
+ if ((uint)index >= RegisterConsts.IntRegsCount)
+ {
+ throw new ArgumentOutOfRangeException(nameof(index));
+ }
+
+ return GetStorage().X[index];
+ }
+
+ public unsafe void SetX(int index, ulong value)
+ {
+ if ((uint)index >= RegisterConsts.IntRegsCount)
+ {
+ throw new ArgumentOutOfRangeException(nameof(index));
+ }
+
+ GetStorage().X[index] = value;
+ }
+
+ public unsafe V128 GetV(int index)
+ {
+ if ((uint)index >= RegisterConsts.VecRegsCount)
+ {
+ throw new ArgumentOutOfRangeException(nameof(index));
+ }
+
+ return new V128(GetStorage().V[index * 2 + 0], GetStorage().V[index * 2 + 1]);
+ }
+
+ public unsafe void SetV(int index, V128 value)
+ {
+ if ((uint)index >= RegisterConsts.VecRegsCount)
+ {
+ throw new ArgumentOutOfRangeException(nameof(index));
+ }
+
+ GetStorage().V[index * 2 + 0] = value.Extract<ulong>(0);
+ GetStorage().V[index * 2 + 1] = value.Extract<ulong>(1);
+ }
+
+ public unsafe bool GetPstateFlag(PState flag)
+ {
+ if ((uint)flag >= RegisterConsts.FlagsCount)
+ {
+ throw new ArgumentException($"Invalid flag \"{flag}\" specified.");
+ }
+
+ return GetStorage().Flags[(int)flag] != 0;
+ }
+
+ public unsafe void SetPstateFlag(PState flag, bool value)
+ {
+ if ((uint)flag >= RegisterConsts.FlagsCount)
+ {
+ throw new ArgumentException($"Invalid flag \"{flag}\" specified.");
+ }
+
+ GetStorage().Flags[(int)flag] = value ? 1u : 0u;
+ }
+
+ public unsafe uint GetPstate()
+ {
+ uint value = 0;
+ for (int flag = 0; flag < RegisterConsts.FlagsCount; flag++)
+ {
+ value |= GetStorage().Flags[flag] != 0 ? 1u << flag : 0u;
+ }
+ return value;
+ }
+
+ public unsafe void SetPstate(uint value)
+ {
+ for (int flag = 0; flag < RegisterConsts.FlagsCount; flag++)
+ {
+ uint bit = 1u << flag;
+ GetStorage().Flags[flag] = (value & bit) == bit ? 1u : 0u;
+ }
+ }
+
+ public unsafe bool GetFPStateFlag(FPState flag)
+ {
+ if ((uint)flag >= RegisterConsts.FpFlagsCount)
+ {
+ throw new ArgumentException($"Invalid flag \"{flag}\" specified.");
+ }
+
+ return GetStorage().FpFlags[(int)flag] != 0;
+ }
+
+ public unsafe void SetFPStateFlag(FPState flag, bool value)
+ {
+ if ((uint)flag >= RegisterConsts.FpFlagsCount)
+ {
+ throw new ArgumentException($"Invalid flag \"{flag}\" specified.");
+ }
+
+ GetStorage().FpFlags[(int)flag] = value ? 1u : 0u;
+ }
+
+ public unsafe uint GetFPState(uint mask = uint.MaxValue)
+ {
+ uint value = 0;
+ for (int flag = 0; flag < RegisterConsts.FpFlagsCount; flag++)
+ {
+ uint bit = 1u << flag;
+
+ if ((mask & bit) == bit)
+ {
+ value |= GetStorage().FpFlags[flag] != 0 ? bit : 0u;
+ }
+ }
+ return value;
+ }
+
+ public unsafe void SetFPState(uint value, uint mask = uint.MaxValue)
+ {
+ for (int flag = 0; flag < RegisterConsts.FpFlagsCount; flag++)
+ {
+ uint bit = 1u << flag;
+
+ if ((mask & bit) == bit)
+ {
+ GetStorage().FpFlags[flag] = (value & bit) == bit ? 1u : 0u;
+ }
+ }
+ }
+
+ public long GetTpidrEl0() => GetStorage().TpidrEl0;
+ public void SetTpidrEl0(long value) => GetStorage().TpidrEl0 = value;
+
+ public long GetTpidrroEl0() => GetStorage().TpidrroEl0;
+ public void SetTpidrroEl0(long value) => GetStorage().TpidrroEl0 = value;
+
+ public int GetCounter() => GetStorage().Counter;
+ public void SetCounter(int value) => GetStorage().Counter = value;
+
+ public bool GetRunning() => GetStorage().Running != 0;
+ public void SetRunning(bool value) => GetStorage().Running = value ? 1 : 0;
+
+ public unsafe static int GetRegisterOffset(Register reg)
+ {
+ if (reg.Type == RegisterType.Integer)
+ {
+ if ((uint)reg.Index >= RegisterConsts.IntRegsCount)
+ {
+ throw new ArgumentException("Invalid register.");
+ }
+
+ return StorageOffset(ref _dummyStorage, ref _dummyStorage.X[reg.Index]);
+ }
+ else if (reg.Type == RegisterType.Vector)
+ {
+ if ((uint)reg.Index >= RegisterConsts.VecRegsCount)
+ {
+ throw new ArgumentException("Invalid register.");
+ }
+
+ return StorageOffset(ref _dummyStorage, ref _dummyStorage.V[reg.Index * 2]);
+ }
+ else if (reg.Type == RegisterType.Flag)
+ {
+ if ((uint)reg.Index >= RegisterConsts.FlagsCount)
+ {
+ throw new ArgumentException("Invalid register.");
+ }
+
+ return StorageOffset(ref _dummyStorage, ref _dummyStorage.Flags[reg.Index]);
+ }
+ else /* if (reg.Type == RegisterType.FpFlag) */
+ {
+ if ((uint)reg.Index >= RegisterConsts.FpFlagsCount)
+ {
+ throw new ArgumentException("Invalid register.");
+ }
+
+ return StorageOffset(ref _dummyStorage, ref _dummyStorage.FpFlags[reg.Index]);
+ }
+ }
+
+ public static int GetTpidrEl0Offset()
+ {
+ return StorageOffset(ref _dummyStorage, ref _dummyStorage.TpidrEl0);
+ }
+
+ public static int GetTpidrroEl0Offset()
+ {
+ return StorageOffset(ref _dummyStorage, ref _dummyStorage.TpidrroEl0);
+ }
+
+ public static int GetCounterOffset()
+ {
+ return StorageOffset(ref _dummyStorage, ref _dummyStorage.Counter);
+ }
+
+ public static int GetDispatchAddressOffset()
+ {
+ return StorageOffset(ref _dummyStorage, ref _dummyStorage.DispatchAddress);
+ }
+
+ public static int GetExclusiveAddressOffset()
+ {
+ return StorageOffset(ref _dummyStorage, ref _dummyStorage.ExclusiveAddress);
+ }
+
+ public static int GetExclusiveValueOffset()
+ {
+ return StorageOffset(ref _dummyStorage, ref _dummyStorage.ExclusiveValueLow);
+ }
+
+ public static int GetRunningOffset()
+ {
+ return StorageOffset(ref _dummyStorage, ref _dummyStorage.Running);
+ }
+
+ private static int StorageOffset<T>(ref NativeCtxStorage storage, ref T target)
+ {
+ return (int)Unsafe.ByteOffset(ref Unsafe.As<NativeCtxStorage, T>(ref storage), ref target);
+ }
+
+ private unsafe ref NativeCtxStorage GetStorage() => ref Unsafe.AsRef<NativeCtxStorage>((void*)_block.Pointer);
+
+ public void Dispose() => _block.Dispose();
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/State/PState.cs b/src/ARMeilleure/State/PState.cs
new file mode 100644
index 00000000..9a80bc57
--- /dev/null
+++ b/src/ARMeilleure/State/PState.cs
@@ -0,0 +1,17 @@
+namespace ARMeilleure.State
+{
+ public enum PState
+ {
+ TFlag = 5,
+ EFlag = 9,
+ GE0Flag = 16,
+ GE1Flag = 17,
+ GE2Flag = 18,
+ GE3Flag = 19,
+ QFlag = 27,
+ VFlag = 28,
+ CFlag = 29,
+ ZFlag = 30,
+ NFlag = 31
+ }
+}
diff --git a/src/ARMeilleure/State/RegisterAlias.cs b/src/ARMeilleure/State/RegisterAlias.cs
new file mode 100644
index 00000000..7ebfa275
--- /dev/null
+++ b/src/ARMeilleure/State/RegisterAlias.cs
@@ -0,0 +1,42 @@
+namespace ARMeilleure.State
+{
+ static class RegisterAlias
+ {
+ public const int R8Usr = 8;
+ public const int R9Usr = 9;
+ public const int R10Usr = 10;
+ public const int R11Usr = 11;
+ public const int R12Usr = 12;
+ public const int SpUsr = 13;
+ public const int LrUsr = 14;
+
+ public const int SpHyp = 15;
+
+ public const int LrIrq = 16;
+ public const int SpIrq = 17;
+
+ public const int LrSvc = 18;
+ public const int SpSvc = 19;
+
+ public const int LrAbt = 20;
+ public const int SpAbt = 21;
+
+ public const int LrUnd = 22;
+ public const int SpUnd = 23;
+
+ public const int R8Fiq = 24;
+ public const int R9Fiq = 25;
+ public const int R10Fiq = 26;
+ public const int R11Fiq = 27;
+ public const int R12Fiq = 28;
+ public const int SpFiq = 29;
+ public const int LrFiq = 30;
+
+ public const int Aarch32Sp = 13;
+ public const int Aarch32Lr = 14;
+ public const int Aarch32Pc = 15;
+
+ public const int Lr = 30;
+ public const int Zr = 31;
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/State/RegisterConsts.cs b/src/ARMeilleure/State/RegisterConsts.cs
new file mode 100644
index 00000000..d6294080
--- /dev/null
+++ b/src/ARMeilleure/State/RegisterConsts.cs
@@ -0,0 +1,15 @@
+namespace ARMeilleure.State
+{
+ static class RegisterConsts
+ {
+ public const int IntRegsCount = 32;
+ public const int VecRegsCount = 32;
+ public const int FlagsCount = 32;
+ public const int FpFlagsCount = 32;
+ public const int IntAndVecRegsCount = IntRegsCount + VecRegsCount;
+ public const int FpFlagsOffset = IntRegsCount + VecRegsCount + FlagsCount;
+ public const int TotalCount = IntRegsCount + VecRegsCount + FlagsCount + FpFlagsCount;
+
+ public const int ZeroIndex = 31;
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/State/V128.cs b/src/ARMeilleure/State/V128.cs
new file mode 100644
index 00000000..3fa9f9a9
--- /dev/null
+++ b/src/ARMeilleure/State/V128.cs
@@ -0,0 +1,312 @@
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+namespace ARMeilleure.State
+{
+ /// <summary>
+ /// Represents a 128-bit vector.
+ /// </summary>
+ [StructLayout(LayoutKind.Sequential, Size = 16)]
+ public struct V128 : IEquatable<V128>
+ {
+ // _e0 & _e1 could be marked as readonly, however they are not readonly because we modify them through the Unsafe
+ // APIs. This also means that one should be careful when changing the layout of this struct.
+
+ private ulong _e0;
+ private ulong _e1;
+
+ /// <summary>
+ /// Gets a new <see cref="V128"/> with all bits set to zero.
+ /// </summary>
+ public static V128 Zero => new V128(0, 0);
+
+ /// <summary>
+ /// Initializes a new instance of the <see cref="V128"/> struct with the specified <see cref="double"/> value
+ /// as a scalar.
+ /// </summary>
+ /// <param name="value">Scalar value</param>
+ public V128(double value) : this(value, 0) { }
+
+ /// <summary>
+ /// Initializes a new instance of the <see cref="V128"/> struct with the specified <see cref="double"/> elements.
+ /// </summary>
+ /// <param name="e0">Element 0</param>
+ /// <param name="e1">Element 1</param>
+ public V128(double e0, double e1)
+ {
+ _e0 = (ulong)BitConverter.DoubleToInt64Bits(e0);
+ _e1 = (ulong)BitConverter.DoubleToInt64Bits(e1);
+ }
+
+ /// <summary>
+ /// Initializes a new instance of the <see cref="V128"/> struct with the specified <see cref="float"/> value as a
+ /// scalar.
+ /// </summary>
+ /// <param name="value">Scalar value</param>
+ public V128(float value) : this(value, 0, 0, 0) { }
+
+ /// <summary>
+ /// Initializes a new instance of the <see cref="V128"/> struct with the specified <see cref="float"/> elements.
+ /// </summary>
+ /// <param name="e0">Element 0</param>
+ /// <param name="e1">Element 1</param>
+ /// <param name="e2">Element 2</param>
+ /// <param name="e3">Element 3</param>
+ public V128(float e0, float e1, float e2, float e3)
+ {
+ _e0 = (ulong)(uint)BitConverter.SingleToInt32Bits(e0) << 0;
+ _e0 |= (ulong)(uint)BitConverter.SingleToInt32Bits(e1) << 32;
+ _e1 = (ulong)(uint)BitConverter.SingleToInt32Bits(e2) << 0;
+ _e1 |= (ulong)(uint)BitConverter.SingleToInt32Bits(e3) << 32;
+ }
+
+ /// <summary>
+ /// Initializes a new instance of the <see cref="V128"/> struct with the specified <see cref="ulong"/>
+ /// elements.
+ /// </summary>
+ /// <param name="e0">Element 0</param>
+ /// <param name="e1">Element 1</param>
+ public V128(long e0, long e1) : this((ulong)e0, (ulong)e1) { }
+
+ /// <summary>
+ /// Initializes a new instance of the <see cref="V128"/> struct with the specified <see cref="long"/> elements.
+ /// </summary>
+ /// <param name="e0">Element 0</param>
+ /// <param name="e1">Element 1</param>
+ public V128(ulong e0, ulong e1)
+ {
+ _e0 = e0;
+ _e1 = e1;
+ }
+
+ /// <summary>
+ /// Initializes a new instance of the <see cref="V128"/> struct with the specified <see cref="int"/> elements.
+ /// </summary>
+ /// <param name="e0">Element 0</param>
+ /// <param name="e1">Element 1</param>
+ /// <param name="e2">Element 2</param>
+ /// <param name="e3">Element 3</param>
+ public V128(int e0, int e1, int e2, int e3) : this((uint)e0, (uint)e1, (uint)e2, (uint)e3) { }
+
+ /// <summary>
+ /// Initializes a new instance of the <see cref="V128"/> struct with the specified <see cref="uint"/> elements.
+ /// </summary>
+ /// <param name="e0">Element 0</param>
+ /// <param name="e1">Element 1</param>
+ /// <param name="e2">Element 2</param>
+ /// <param name="e3">Element 3</param>
+ public V128(uint e0, uint e1, uint e2, uint e3)
+ {
+ _e0 = (ulong)e0 << 0;
+ _e0 |= (ulong)e1 << 32;
+ _e1 = (ulong)e2 << 0;
+ _e1 |= (ulong)e3 << 32;
+ }
+
+ /// <summary>
+ /// Initializes a new instance of the <see cref="V128"/> struct from the specified <see cref="byte"/> array.
+ /// </summary>
+ /// <param name="data"><see cref="byte"/> array to use</param>
+ public V128(byte[] data)
+ {
+ _e0 = (ulong)BitConverter.ToInt64(data, 0);
+ _e1 = (ulong)BitConverter.ToInt64(data, 8);
+ }
+
+ /// <summary>
+ /// Returns the value of the <see cref="V128"/> as a <typeparamref name="T"/> scalar.
+ /// </summary>
+ /// <typeparam name="T">Type of scalar</typeparam>
+ /// <returns>Value of the <see cref="V128"/> as a <typeparamref name="T"/> scalar</returns>
+ /// <exception cref="ArgumentOutOfRangeException">Size of <typeparamref name="T"/> is larger than 16 bytes</exception>
+ public T As<T>() where T : unmanaged
+ {
+ return Extract<T>(0);
+ }
+
+ /// <summary>
+ /// Extracts the element at the specified index as a <typeparamref name="T"/> from the <see cref="V128"/>.
+ /// </summary>
+ /// <typeparam name="T">Element type</typeparam>
+ /// <param name="index">Index of element</param>
+ /// <returns>Element at the specified index as a <typeparamref name="T"/> from the <see cref="V128"/></returns>
+ /// <exception cref="ArgumentOutOfRangeException">
+ /// <paramref name="index"/> is out of bound or the size of <typeparamref name="T"/> is larger than 16 bytes
+ /// </exception>
+ public T Extract<T>(int index) where T : unmanaged
+ {
+ if ((uint)index >= GetElementCount<T>())
+ ThrowIndexOutOfRange();
+
+ // Performs:
+ // return *((*T)this + index);
+ return Unsafe.Add(ref Unsafe.As<V128, T>(ref this), index);
+ }
+
+ /// <summary>
+ /// Inserts the specified value into the element at the specified index in the <see cref="V128"/>.
+ /// </summary>
+ /// <typeparam name="T">Element type</typeparam>
+ /// <param name="index">Index of element</param>
+ /// <param name="value">Value to insert</param>
+ /// <exception cref="ArgumentOutOfRangeException">
+ /// <paramref name="index"/> is out of bound or the size of <typeparamref name="T"/> is larger than 16 bytes
+ /// </exception>
+ public void Insert<T>(int index, T value) where T : unmanaged
+ {
+ if ((uint)index >= GetElementCount<T>())
+ ThrowIndexOutOfRange();
+
+ // Performs:
+ // *((*T)this + index) = value;
+ Unsafe.Add(ref Unsafe.As<V128, T>(ref this), index) = value;
+ }
+
+ /// <summary>
+ /// Returns a new <see cref="byte"/> array which represents the <see cref="V128"/>.
+ /// </summary>
+ /// <returns>A new <see cref="byte"/> array which represents the <see cref="V128"/></returns>
+ public byte[] ToArray()
+ {
+ byte[] data = new byte[16];
+ Span<byte> span = data;
+
+ BitConverter.TryWriteBytes(span, _e0);
+ BitConverter.TryWriteBytes(span.Slice(8), _e1);
+
+ return data;
+ }
+
+ /// <summary>
+ /// Performs a bitwise logical left shift on the specified <see cref="V128"/> by the specified shift count.
+ /// </summary>
+ /// <param name="x"><see cref="V128"/> instance</param>
+ /// <param name="shift">Number of shifts</param>
+ /// <returns>Result of left shift</returns>
+ /// <remarks>
+ /// This supports shift counts up to 63; anything above may result in unexpected behaviour.
+ /// </remarks>
+ public static V128 operator <<(V128 x, int shift)
+ {
+ if (shift == 0)
+ {
+ return new V128(x._e0, x._e1);
+ }
+
+ ulong shiftOut = x._e0 >> (64 - shift);
+
+ return new V128(x._e0 << shift, (x._e1 << shift) | shiftOut);
+ }
+
+ /// <summary>
+ /// Performs a bitwise logical right shift on the specified <see cref="V128"/> by the specified shift count.
+ /// </summary>
+ /// <param name="x"><see cref="V128"/> instance</param>
+ /// <param name="shift">Number of shifts</param>
+ /// <returns>Result of right shift</returns>
+ /// <remarks>
+ /// This supports shift counts up to 63; anything above may result in unexpected behaviour.
+ /// </remarks>
+ public static V128 operator >>(V128 x, int shift)
+ {
+ if (shift == 0)
+ {
+ return new V128(x._e0, x._e1);
+ }
+
+ ulong shiftOut = x._e1 & ((1UL << shift) - 1);
+
+ return new V128((x._e0 >> shift) | (shiftOut << (64 - shift)), x._e1 >> shift);
+ }
+
+ /// <summary>
+ /// Performs a bitwise not on the specified <see cref="V128"/>.
+ /// </summary>
+ /// <param name="x">Target <see cref="V128"/></param>
+ /// <returns>Result of not operation</returns>
+ public static V128 operator ~(V128 x) => new V128(~x._e0, ~x._e1);
+
+ /// <summary>
+ /// Performs a bitwise and on the specified <see cref="V128"/> instances.
+ /// </summary>
+ /// <param name="x">First instance</param>
+ /// <param name="y">Second instance</param>
+ /// <returns>Result of and operation</returns>
+ public static V128 operator &(V128 x, V128 y) => new V128(x._e0 & y._e0, x._e1 & y._e1);
+
+ /// <summary>
+ /// Performs a bitwise or on the specified <see cref="V128"/> instances.
+ /// </summary>
+ /// <param name="x">First instance</param>
+ /// <param name="y">Second instance</param>
+ /// <returns>Result of or operation</returns>
+ public static V128 operator |(V128 x, V128 y) => new V128(x._e0 | y._e0, x._e1 | y._e1);
+
+ /// <summary>
+ /// Performs a bitwise exlusive or on the specified <see cref="V128"/> instances.
+ /// </summary>
+ /// <param name="x">First instance</param>
+ /// <param name="y">Second instance</param>
+ /// <returns>Result of exclusive or operation</returns>
+ public static V128 operator ^(V128 x, V128 y) => new V128(x._e0 ^ y._e0, x._e1 ^ y._e1);
+
+ /// <summary>
+ /// Determines if the specified <see cref="V128"/> instances are equal.
+ /// </summary>
+ /// <param name="x">First instance</param>
+ /// <param name="y">Second instance</param>
+ /// <returns>true if equal; otherwise false</returns>
+ public static bool operator ==(V128 x, V128 y) => x.Equals(y);
+
+ /// <summary>
+ /// Determines if the specified <see cref="V128"/> instances are not equal.
+ /// </summary>
+ /// <param name="x">First instance</param>
+ /// <param name="y">Second instance</param>
+ /// <returns>true if not equal; otherwise false</returns>
+ public static bool operator !=(V128 x, V128 y) => !x.Equals(y);
+
+ /// <summary>
+ /// Determines if the specified <see cref="V128"/> is equal to this <see cref="V128"/> instance.
+ /// </summary>
+ /// <param name="other">Other <see cref="V128"/> instance</param>
+ /// <returns>true if equal; otherwise false</returns>
+ public bool Equals(V128 other)
+ {
+ return other._e0 == _e0 && other._e1 == _e1;
+ }
+
+ /// <summary>
+ /// Determines if the specified <see cref="object"/> is equal to this <see cref="V128"/> instance.
+ /// </summary>
+ /// <param name="obj">Other <see cref="object"/> instance</param>
+ /// <returns>true if equal; otherwise false</returns>
+ public override bool Equals(object obj)
+ {
+ return obj is V128 vector && Equals(vector);
+ }
+
+ /// <inheritdoc/>
+ public override int GetHashCode()
+ {
+ return HashCode.Combine(_e0, _e1);
+ }
+
+ /// <inheritdoc/>
+ public override string ToString()
+ {
+ return $"0x{_e1:X16}{_e0:X16}";
+ }
+
+ private uint GetElementCount<T>() where T : unmanaged
+ {
+ return (uint)(Unsafe.SizeOf<V128>() / Unsafe.SizeOf<T>());
+ }
+
+ private static void ThrowIndexOutOfRange()
+ {
+ throw new ArgumentOutOfRangeException("index");
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Statistics.cs b/src/ARMeilleure/Statistics.cs
new file mode 100644
index 00000000..fbc64708
--- /dev/null
+++ b/src/ARMeilleure/Statistics.cs
@@ -0,0 +1,94 @@
+using System;
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using System.Text;
+
+namespace ARMeilleure
+{
+ public static class Statistics
+ {
+ private const int ReportMaxFunctions = 100;
+
+#pragma warning disable CS0169
+ [ThreadStatic]
+ private static Stopwatch _executionTimer;
+#pragma warning restore CS0169
+
+ private static ConcurrentDictionary<ulong, long> _ticksPerFunction;
+
+ static Statistics()
+ {
+ _ticksPerFunction = new ConcurrentDictionary<ulong, long>();
+ }
+
+ public static void InitializeTimer()
+ {
+#if M_PROFILE
+ if (_executionTimer == null)
+ {
+ _executionTimer = new Stopwatch();
+ }
+#endif
+ }
+
+ internal static void StartTimer()
+ {
+#if M_PROFILE
+ _executionTimer.Restart();
+#endif
+ }
+
+ internal static void StopTimer(ulong funcAddr)
+ {
+#if M_PROFILE
+ _executionTimer.Stop();
+
+ long ticks = _executionTimer.ElapsedTicks;
+
+ _ticksPerFunction.AddOrUpdate(funcAddr, ticks, (key, oldTicks) => oldTicks + ticks);
+#endif
+ }
+
+ internal static void ResumeTimer()
+ {
+#if M_PROFILE
+ _executionTimer.Start();
+#endif
+ }
+
+ internal static void PauseTimer()
+ {
+#if M_PROFILE
+ _executionTimer.Stop();
+#endif
+ }
+
+ public static string GetReport()
+ {
+ int count = 0;
+
+ StringBuilder sb = new StringBuilder();
+
+ sb.AppendLine(" Function address | Time");
+ sb.AppendLine("--------------------------");
+
+ KeyValuePair<ulong, long>[] funcTable = _ticksPerFunction.ToArray();
+
+ foreach (KeyValuePair<ulong, long> kv in funcTable.OrderByDescending(x => x.Value))
+ {
+ long timeInMs = (kv.Value * 1000) / Stopwatch.Frequency;
+
+ sb.AppendLine($" 0x{kv.Key:X16} | {timeInMs} ms");
+
+ if (count++ >= ReportMaxFunctions)
+ {
+ break;
+ }
+ }
+
+ return sb.ToString();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Translation/ArmEmitterContext.cs b/src/ARMeilleure/Translation/ArmEmitterContext.cs
new file mode 100644
index 00000000..565d2aad
--- /dev/null
+++ b/src/ARMeilleure/Translation/ArmEmitterContext.cs
@@ -0,0 +1,282 @@
+using ARMeilleure.CodeGen.Linking;
+using ARMeilleure.Common;
+using ARMeilleure.Decoders;
+using ARMeilleure.Diagnostics;
+using ARMeilleure.Instructions;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Memory;
+using ARMeilleure.State;
+using System;
+using System.Collections.Generic;
+using System.Reflection;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Translation
+{
+ class ArmEmitterContext : EmitterContext
+ {
+ private readonly Dictionary<ulong, Operand> _labels;
+
+ private OpCode _optOpLastCompare;
+ private OpCode _optOpLastFlagSet;
+
+ private Operand _optCmpTempN;
+ private Operand _optCmpTempM;
+
+ private Block _currBlock;
+
+ public Block CurrBlock
+ {
+ get
+ {
+ return _currBlock;
+ }
+ set
+ {
+ _currBlock = value;
+
+ ResetBlockState();
+ }
+ }
+
+ private bool _pendingQcFlagSync;
+
+ public OpCode CurrOp { get; set; }
+
+ public IMemoryManager Memory { get; }
+
+ public EntryTable<uint> CountTable { get; }
+ public AddressTable<ulong> FunctionTable { get; }
+ public TranslatorStubs Stubs { get; }
+
+ public ulong EntryAddress { get; }
+ public bool HighCq { get; }
+ public bool HasPtc { get; }
+ public Aarch32Mode Mode { get; }
+
+ private int _ifThenBlockStateIndex = 0;
+ private Condition[] _ifThenBlockState = { };
+ public bool IsInIfThenBlock => _ifThenBlockStateIndex < _ifThenBlockState.Length;
+ public Condition CurrentIfThenBlockCond => _ifThenBlockState[_ifThenBlockStateIndex];
+
+ public ArmEmitterContext(
+ IMemoryManager memory,
+ EntryTable<uint> countTable,
+ AddressTable<ulong> funcTable,
+ TranslatorStubs stubs,
+ ulong entryAddress,
+ bool highCq,
+ bool hasPtc,
+ Aarch32Mode mode)
+ {
+ Memory = memory;
+ CountTable = countTable;
+ FunctionTable = funcTable;
+ Stubs = stubs;
+ EntryAddress = entryAddress;
+ HighCq = highCq;
+ HasPtc = hasPtc;
+ Mode = mode;
+
+ _labels = new Dictionary<ulong, Operand>();
+ }
+
+ public override Operand Call(MethodInfo info, params Operand[] callArgs)
+ {
+ SyncQcFlag();
+
+ if (!HasPtc)
+ {
+ return base.Call(info, callArgs);
+ }
+ else
+ {
+ int index = Delegates.GetDelegateIndex(info);
+ IntPtr funcPtr = Delegates.GetDelegateFuncPtrByIndex(index);
+
+ OperandType returnType = GetOperandType(info.ReturnType);
+
+ Symbol symbol = new Symbol(SymbolType.DelegateTable, (ulong)index);
+
+ Symbols.Add((ulong)funcPtr.ToInt64(), info.Name);
+
+ return Call(Const(funcPtr.ToInt64(), symbol), returnType, callArgs);
+ }
+ }
+
+ public Operand GetLabel(ulong address)
+ {
+ if (!_labels.TryGetValue(address, out Operand label))
+ {
+ label = Label();
+
+ _labels.Add(address, label);
+ }
+
+ return label;
+ }
+
+ public void MarkComparison(Operand n, Operand m)
+ {
+ _optOpLastCompare = CurrOp;
+
+ _optCmpTempN = Copy(n);
+ _optCmpTempM = Copy(m);
+ }
+
+ public void MarkFlagSet(PState stateFlag)
+ {
+ // Set this only if any of the NZCV flag bits were modified.
+ // This is used to ensure that when emiting a direct IL branch
+ // instruction for compare + branch sequences, we're not expecting
+ // to use comparison values from an old instruction, when in fact
+ // the flags were already overwritten by another instruction further along.
+ if (stateFlag >= PState.VFlag)
+ {
+ _optOpLastFlagSet = CurrOp;
+ }
+ }
+
+ private void ResetBlockState()
+ {
+ _optOpLastCompare = null;
+ _optOpLastFlagSet = null;
+ }
+
+ public void SetPendingQcFlagSync()
+ {
+ _pendingQcFlagSync = true;
+ }
+
+ public void SyncQcFlag()
+ {
+ if (_pendingQcFlagSync)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ Operand fpsr = AddIntrinsicInt(Intrinsic.Arm64MrsFpsr);
+
+ uint qcFlagMask = (uint)FPSR.Qc;
+
+ Operand qcClearLabel = Label();
+
+ BranchIfFalse(qcClearLabel, BitwiseAnd(fpsr, Const(qcFlagMask)));
+
+ AddIntrinsicNoRet(Intrinsic.Arm64MsrFpsr, Const(0));
+ InstEmitHelper.SetFpFlag(this, FPState.QcFlag, Const(1));
+
+ MarkLabel(qcClearLabel);
+ }
+
+ _pendingQcFlagSync = false;
+ }
+ }
+
+ public void ClearQcFlag()
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ AddIntrinsicNoRet(Intrinsic.Arm64MsrFpsr, Const(0));
+ }
+ }
+
+ public void ClearQcFlagIfModified()
+ {
+ if (_pendingQcFlagSync && Optimizations.UseAdvSimd)
+ {
+ AddIntrinsicNoRet(Intrinsic.Arm64MsrFpsr, Const(0));
+ }
+ }
+
+ public void EnterArmFpMode()
+ {
+ InstEmitSimdHelper.EnterArmFpMode(this, InstEmitHelper.GetFpFlag);
+ }
+
+ public void UpdateArmFpMode()
+ {
+ EnterArmFpMode();
+ }
+
+ public void ExitArmFpMode()
+ {
+ InstEmitSimdHelper.ExitArmFpMode(this, (flag, value) => InstEmitHelper.SetFpFlag(this, flag, value));
+ }
+
+ public Operand TryGetComparisonResult(Condition condition)
+ {
+ if (_optOpLastCompare == null || _optOpLastCompare != _optOpLastFlagSet)
+ {
+ return default;
+ }
+
+ Operand n = _optCmpTempN;
+ Operand m = _optCmpTempM;
+
+ InstName cmpName = _optOpLastCompare.Instruction.Name;
+
+ if (cmpName == InstName.Subs)
+ {
+ switch (condition)
+ {
+ case Condition.Eq: return ICompareEqual (n, m);
+ case Condition.Ne: return ICompareNotEqual (n, m);
+ case Condition.GeUn: return ICompareGreaterOrEqualUI(n, m);
+ case Condition.LtUn: return ICompareLessUI (n, m);
+ case Condition.GtUn: return ICompareGreaterUI (n, m);
+ case Condition.LeUn: return ICompareLessOrEqualUI (n, m);
+ case Condition.Ge: return ICompareGreaterOrEqual (n, m);
+ case Condition.Lt: return ICompareLess (n, m);
+ case Condition.Gt: return ICompareGreater (n, m);
+ case Condition.Le: return ICompareLessOrEqual (n, m);
+ }
+ }
+ else if (cmpName == InstName.Adds && _optOpLastCompare is IOpCodeAluImm op)
+ {
+ // There are several limitations that needs to be taken into account for CMN comparisons:
+ // - The unsigned comparisons are not valid, as they depend on the
+ // carry flag value, and they will have different values for addition and
+ // subtraction. For addition, it's carry, and for subtraction, it's borrow.
+ // So, we need to make sure we're not doing a unsigned compare for the CMN case.
+ // - We can only do the optimization for the immediate variants,
+ // because when the second operand value is exactly INT_MIN, we can't
+ // negate the value as theres no positive counterpart.
+ // Such invalid values can't be encoded on the immediate encodings.
+ if (op.RegisterSize == RegisterSize.Int32)
+ {
+ m = Const((int)-op.Immediate);
+ }
+ else
+ {
+ m = Const(-op.Immediate);
+ }
+
+ switch (condition)
+ {
+ case Condition.Eq: return ICompareEqual (n, m);
+ case Condition.Ne: return ICompareNotEqual (n, m);
+ case Condition.Ge: return ICompareGreaterOrEqual(n, m);
+ case Condition.Lt: return ICompareLess (n, m);
+ case Condition.Gt: return ICompareGreater (n, m);
+ case Condition.Le: return ICompareLessOrEqual (n, m);
+ }
+ }
+
+ return default;
+ }
+
+ public void SetIfThenBlockState(Condition[] state)
+ {
+ _ifThenBlockState = state;
+ _ifThenBlockStateIndex = 0;
+ }
+
+ public void AdvanceIfThenBlockState()
+ {
+ if (IsInIfThenBlock)
+ {
+ _ifThenBlockStateIndex++;
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Translation/Cache/CacheEntry.cs b/src/ARMeilleure/Translation/Cache/CacheEntry.cs
new file mode 100644
index 00000000..dc5503b1
--- /dev/null
+++ b/src/ARMeilleure/Translation/Cache/CacheEntry.cs
@@ -0,0 +1,26 @@
+using ARMeilleure.CodeGen.Unwinding;
+using System;
+using System.Diagnostics.CodeAnalysis;
+
+namespace ARMeilleure.Translation.Cache
+{
+ readonly struct CacheEntry : IComparable<CacheEntry>
+ {
+ public int Offset { get; }
+ public int Size { get; }
+
+ public UnwindInfo UnwindInfo { get; }
+
+ public CacheEntry(int offset, int size, UnwindInfo unwindInfo)
+ {
+ Offset = offset;
+ Size = size;
+ UnwindInfo = unwindInfo;
+ }
+
+ public int CompareTo([AllowNull] CacheEntry other)
+ {
+ return Offset.CompareTo(other.Offset);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Translation/Cache/CacheMemoryAllocator.cs b/src/ARMeilleure/Translation/Cache/CacheMemoryAllocator.cs
new file mode 100644
index 00000000..4c22de40
--- /dev/null
+++ b/src/ARMeilleure/Translation/Cache/CacheMemoryAllocator.cs
@@ -0,0 +1,96 @@
+using System;
+using System.Collections.Generic;
+using System.Diagnostics.CodeAnalysis;
+
+namespace ARMeilleure.Translation.Cache
+{
+ class CacheMemoryAllocator
+ {
+ private readonly struct MemoryBlock : IComparable<MemoryBlock>
+ {
+ public int Offset { get; }
+ public int Size { get; }
+
+ public MemoryBlock(int offset, int size)
+ {
+ Offset = offset;
+ Size = size;
+ }
+
+ public int CompareTo([AllowNull] MemoryBlock other)
+ {
+ return Offset.CompareTo(other.Offset);
+ }
+ }
+
+ private readonly List<MemoryBlock> _blocks = new List<MemoryBlock>();
+
+ public CacheMemoryAllocator(int capacity)
+ {
+ _blocks.Add(new MemoryBlock(0, capacity));
+ }
+
+ public int Allocate(int size)
+ {
+ for (int i = 0; i < _blocks.Count; i++)
+ {
+ MemoryBlock block = _blocks[i];
+
+ if (block.Size > size)
+ {
+ _blocks[i] = new MemoryBlock(block.Offset + size, block.Size - size);
+ return block.Offset;
+ }
+ else if (block.Size == size)
+ {
+ _blocks.RemoveAt(i);
+ return block.Offset;
+ }
+ }
+
+ // We don't have enough free memory to perform the allocation.
+ return -1;
+ }
+
+ public void Free(int offset, int size)
+ {
+ Insert(new MemoryBlock(offset, size));
+ }
+
+ private void Insert(MemoryBlock block)
+ {
+ int index = _blocks.BinarySearch(block);
+
+ if (index < 0)
+ {
+ index = ~index;
+ }
+
+ if (index < _blocks.Count)
+ {
+ MemoryBlock next = _blocks[index];
+
+ int endOffs = block.Offset + block.Size;
+
+ if (next.Offset == endOffs)
+ {
+ block = new MemoryBlock(block.Offset, block.Size + next.Size);
+ _blocks.RemoveAt(index);
+ }
+ }
+
+ if (index > 0)
+ {
+ MemoryBlock prev = _blocks[index - 1];
+
+ if (prev.Offset + prev.Size == block.Offset)
+ {
+ block = new MemoryBlock(block.Offset - prev.Size, block.Size + prev.Size);
+ _blocks.RemoveAt(--index);
+ }
+ }
+
+ _blocks.Insert(index, block);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Translation/Cache/JitCache.cs b/src/ARMeilleure/Translation/Cache/JitCache.cs
new file mode 100644
index 00000000..f496a8e9
--- /dev/null
+++ b/src/ARMeilleure/Translation/Cache/JitCache.cs
@@ -0,0 +1,198 @@
+using ARMeilleure.CodeGen;
+using ARMeilleure.CodeGen.Unwinding;
+using ARMeilleure.Memory;
+using ARMeilleure.Native;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Runtime.InteropServices;
+
+namespace ARMeilleure.Translation.Cache
+{
+ static class JitCache
+ {
+ private const int PageSize = 4 * 1024;
+ private const int PageMask = PageSize - 1;
+
+ private const int CodeAlignment = 4; // Bytes.
+ private const int CacheSize = 2047 * 1024 * 1024;
+
+ private static ReservedRegion _jitRegion;
+ private static JitCacheInvalidation _jitCacheInvalidator;
+
+ private static CacheMemoryAllocator _cacheAllocator;
+
+ private static readonly List<CacheEntry> _cacheEntries = new List<CacheEntry>();
+
+ private static readonly object _lock = new object();
+ private static bool _initialized;
+
+ public static void Initialize(IJitMemoryAllocator allocator)
+ {
+ if (_initialized) return;
+
+ lock (_lock)
+ {
+ if (_initialized) return;
+
+ _jitRegion = new ReservedRegion(allocator, CacheSize);
+ _jitCacheInvalidator = new JitCacheInvalidation(allocator);
+
+ _cacheAllocator = new CacheMemoryAllocator(CacheSize);
+
+ if (OperatingSystem.IsWindows())
+ {
+ JitUnwindWindows.InstallFunctionTableHandler(_jitRegion.Pointer, CacheSize, _jitRegion.Pointer + Allocate(PageSize));
+ }
+
+ _initialized = true;
+ }
+ }
+
+ public static IntPtr Map(CompiledFunction func)
+ {
+ byte[] code = func.Code;
+
+ lock (_lock)
+ {
+ Debug.Assert(_initialized);
+
+ int funcOffset = Allocate(code.Length);
+
+ IntPtr funcPtr = _jitRegion.Pointer + funcOffset;
+
+ if (OperatingSystem.IsMacOS() && RuntimeInformation.ProcessArchitecture == Architecture.Arm64)
+ {
+ unsafe
+ {
+ fixed (byte *codePtr = code)
+ {
+ JitSupportDarwin.Copy(funcPtr, (IntPtr)codePtr, (ulong)code.Length);
+ }
+ }
+ }
+ else
+ {
+ ReprotectAsWritable(funcOffset, code.Length);
+ Marshal.Copy(code, 0, funcPtr, code.Length);
+ ReprotectAsExecutable(funcOffset, code.Length);
+
+ _jitCacheInvalidator.Invalidate(funcPtr, (ulong)code.Length);
+ }
+
+ Add(funcOffset, code.Length, func.UnwindInfo);
+
+ return funcPtr;
+ }
+ }
+
+ public static void Unmap(IntPtr pointer)
+ {
+ lock (_lock)
+ {
+ Debug.Assert(_initialized);
+
+ int funcOffset = (int)(pointer.ToInt64() - _jitRegion.Pointer.ToInt64());
+
+ bool result = TryFind(funcOffset, out CacheEntry entry);
+ Debug.Assert(result);
+
+ _cacheAllocator.Free(funcOffset, AlignCodeSize(entry.Size));
+
+ Remove(funcOffset);
+ }
+ }
+
+ private static void ReprotectAsWritable(int offset, int size)
+ {
+ int endOffs = offset + size;
+
+ int regionStart = offset & ~PageMask;
+ int regionEnd = (endOffs + PageMask) & ~PageMask;
+
+ _jitRegion.Block.MapAsRwx((ulong)regionStart, (ulong)(regionEnd - regionStart));
+ }
+
+ private static void ReprotectAsExecutable(int offset, int size)
+ {
+ int endOffs = offset + size;
+
+ int regionStart = offset & ~PageMask;
+ int regionEnd = (endOffs + PageMask) & ~PageMask;
+
+ _jitRegion.Block.MapAsRx((ulong)regionStart, (ulong)(regionEnd - regionStart));
+ }
+
+ private static int Allocate(int codeSize)
+ {
+ codeSize = AlignCodeSize(codeSize);
+
+ int allocOffset = _cacheAllocator.Allocate(codeSize);
+
+ if (allocOffset < 0)
+ {
+ throw new OutOfMemoryException("JIT Cache exhausted.");
+ }
+
+ _jitRegion.ExpandIfNeeded((ulong)allocOffset + (ulong)codeSize);
+
+ return allocOffset;
+ }
+
+ private static int AlignCodeSize(int codeSize)
+ {
+ return checked(codeSize + (CodeAlignment - 1)) & ~(CodeAlignment - 1);
+ }
+
+ private static void Add(int offset, int size, UnwindInfo unwindInfo)
+ {
+ CacheEntry entry = new CacheEntry(offset, size, unwindInfo);
+
+ int index = _cacheEntries.BinarySearch(entry);
+
+ if (index < 0)
+ {
+ index = ~index;
+ }
+
+ _cacheEntries.Insert(index, entry);
+ }
+
+ private static void Remove(int offset)
+ {
+ int index = _cacheEntries.BinarySearch(new CacheEntry(offset, 0, default));
+
+ if (index < 0)
+ {
+ index = ~index - 1;
+ }
+
+ if (index >= 0)
+ {
+ _cacheEntries.RemoveAt(index);
+ }
+ }
+
+ public static bool TryFind(int offset, out CacheEntry entry)
+ {
+ lock (_lock)
+ {
+ int index = _cacheEntries.BinarySearch(new CacheEntry(offset, 0, default));
+
+ if (index < 0)
+ {
+ index = ~index - 1;
+ }
+
+ if (index >= 0)
+ {
+ entry = _cacheEntries[index];
+ return true;
+ }
+ }
+
+ entry = default;
+ return false;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Translation/Cache/JitCacheInvalidation.cs b/src/ARMeilleure/Translation/Cache/JitCacheInvalidation.cs
new file mode 100644
index 00000000..ec2ae73b
--- /dev/null
+++ b/src/ARMeilleure/Translation/Cache/JitCacheInvalidation.cs
@@ -0,0 +1,79 @@
+using ARMeilleure.Memory;
+using System;
+using System.Runtime.InteropServices;
+
+namespace ARMeilleure.Translation.Cache
+{
+ class JitCacheInvalidation
+ {
+ private static int[] _invalidationCode = new int[]
+ {
+ unchecked((int)0xd53b0022), // mrs x2, ctr_el0
+ unchecked((int)0xd3504c44), // ubfx x4, x2, #16, #4
+ unchecked((int)0x52800083), // mov w3, #0x4
+ unchecked((int)0x12000c45), // and w5, w2, #0xf
+ unchecked((int)0x1ac42064), // lsl w4, w3, w4
+ unchecked((int)0x51000482), // sub w2, w4, #0x1
+ unchecked((int)0x8a220002), // bic x2, x0, x2
+ unchecked((int)0x1ac52063), // lsl w3, w3, w5
+ unchecked((int)0xeb01005f), // cmp x2, x1
+ unchecked((int)0x93407c84), // sxtw x4, w4
+ unchecked((int)0x540000a2), // b.cs 3c <do_ic_clear>
+ unchecked((int)0xd50b7b22), // dc cvau, x2
+ unchecked((int)0x8b040042), // add x2, x2, x4
+ unchecked((int)0xeb02003f), // cmp x1, x2
+ unchecked((int)0x54ffffa8), // b.hi 2c <dc_clear_loop>
+ unchecked((int)0xd5033b9f), // dsb ish
+ unchecked((int)0x51000462), // sub w2, w3, #0x1
+ unchecked((int)0x93407c63), // sxtw x3, w3
+ unchecked((int)0x8a220000), // bic x0, x0, x2
+ unchecked((int)0xeb00003f), // cmp x1, x0
+ unchecked((int)0x540000a9), // b.ls 64 <exit>
+ unchecked((int)0xd50b7520), // ic ivau, x0
+ unchecked((int)0x8b030000), // add x0, x0, x3
+ unchecked((int)0xeb00003f), // cmp x1, x0
+ unchecked((int)0x54ffffa8), // b.hi 54 <ic_clear_loop>
+ unchecked((int)0xd5033b9f), // dsb ish
+ unchecked((int)0xd5033fdf), // isb
+ unchecked((int)0xd65f03c0), // ret
+ };
+
+ private delegate void InvalidateCache(ulong start, ulong end);
+
+ private InvalidateCache _invalidateCache;
+ private ReservedRegion _invalidateCacheCodeRegion;
+
+ private readonly bool _needsInvalidation;
+
+ public JitCacheInvalidation(IJitMemoryAllocator allocator)
+ {
+ // On macOS, a different path is used to write to the JIT cache, which does the invalidation.
+ if (!OperatingSystem.IsMacOS() && RuntimeInformation.ProcessArchitecture == Architecture.Arm64)
+ {
+ ulong size = (ulong)_invalidationCode.Length * sizeof(int);
+ ulong mask = (ulong)ReservedRegion.DefaultGranularity - 1;
+
+ size = (size + mask) & ~mask;
+
+ _invalidateCacheCodeRegion = new ReservedRegion(allocator, size);
+ _invalidateCacheCodeRegion.ExpandIfNeeded(size);
+
+ Marshal.Copy(_invalidationCode, 0, _invalidateCacheCodeRegion.Pointer, _invalidationCode.Length);
+
+ _invalidateCacheCodeRegion.Block.MapAsRx(0, size);
+
+ _invalidateCache = Marshal.GetDelegateForFunctionPointer<InvalidateCache>(_invalidateCacheCodeRegion.Pointer);
+
+ _needsInvalidation = true;
+ }
+ }
+
+ public void Invalidate(IntPtr basePointer, ulong size)
+ {
+ if (_needsInvalidation)
+ {
+ _invalidateCache((ulong)basePointer, (ulong)basePointer + size);
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Translation/Cache/JitUnwindWindows.cs b/src/ARMeilleure/Translation/Cache/JitUnwindWindows.cs
new file mode 100644
index 00000000..77727bf1
--- /dev/null
+++ b/src/ARMeilleure/Translation/Cache/JitUnwindWindows.cs
@@ -0,0 +1,189 @@
+// https://github.com/MicrosoftDocs/cpp-docs/blob/master/docs/build/exception-handling-x64.md
+
+using ARMeilleure.CodeGen.Unwinding;
+using System;
+using System.Diagnostics;
+using System.Runtime.InteropServices;
+
+namespace ARMeilleure.Translation.Cache
+{
+ static partial class JitUnwindWindows
+ {
+ private const int MaxUnwindCodesArraySize = 32; // Must be an even value.
+
+ private struct RuntimeFunction
+ {
+ public uint BeginAddress;
+ public uint EndAddress;
+ public uint UnwindData;
+ }
+
+ private struct UnwindInfo
+ {
+ public byte VersionAndFlags;
+ public byte SizeOfProlog;
+ public byte CountOfUnwindCodes;
+ public byte FrameRegister;
+ public unsafe fixed ushort UnwindCodes[MaxUnwindCodesArraySize];
+ }
+
+ private enum UnwindOp
+ {
+ PushNonvol = 0,
+ AllocLarge = 1,
+ AllocSmall = 2,
+ SetFpreg = 3,
+ SaveNonvol = 4,
+ SaveNonvolFar = 5,
+ SaveXmm128 = 8,
+ SaveXmm128Far = 9,
+ PushMachframe = 10
+ }
+
+ private unsafe delegate RuntimeFunction* GetRuntimeFunctionCallback(ulong controlPc, IntPtr context);
+
+ [LibraryImport("kernel32.dll")]
+ [return: MarshalAs(UnmanagedType.Bool)]
+ private static unsafe partial bool RtlInstallFunctionTableCallback(
+ ulong tableIdentifier,
+ ulong baseAddress,
+ uint length,
+ GetRuntimeFunctionCallback callback,
+ IntPtr context,
+ [MarshalAs(UnmanagedType.LPWStr)] string outOfProcessCallbackDll);
+
+ private static GetRuntimeFunctionCallback _getRuntimeFunctionCallback;
+
+ private static int _sizeOfRuntimeFunction;
+
+ private unsafe static RuntimeFunction* _runtimeFunction;
+
+ private unsafe static UnwindInfo* _unwindInfo;
+
+ public static void InstallFunctionTableHandler(IntPtr codeCachePointer, uint codeCacheLength, IntPtr workBufferPtr)
+ {
+ ulong codeCachePtr = (ulong)codeCachePointer.ToInt64();
+
+ _sizeOfRuntimeFunction = Marshal.SizeOf<RuntimeFunction>();
+
+ bool result;
+
+ unsafe
+ {
+ _runtimeFunction = (RuntimeFunction*)workBufferPtr;
+
+ _unwindInfo = (UnwindInfo*)(workBufferPtr + _sizeOfRuntimeFunction);
+
+ _getRuntimeFunctionCallback = new GetRuntimeFunctionCallback(FunctionTableHandler);
+
+ result = RtlInstallFunctionTableCallback(
+ codeCachePtr | 3,
+ codeCachePtr,
+ codeCacheLength,
+ _getRuntimeFunctionCallback,
+ codeCachePointer,
+ null);
+ }
+
+ if (!result)
+ {
+ throw new InvalidOperationException("Failure installing function table callback.");
+ }
+ }
+
+ private static unsafe RuntimeFunction* FunctionTableHandler(ulong controlPc, IntPtr context)
+ {
+ int offset = (int)((long)controlPc - context.ToInt64());
+
+ if (!JitCache.TryFind(offset, out CacheEntry funcEntry))
+ {
+ return null; // Not found.
+ }
+
+ var unwindInfo = funcEntry.UnwindInfo;
+
+ int codeIndex = 0;
+
+ for (int index = unwindInfo.PushEntries.Length - 1; index >= 0; index--)
+ {
+ var entry = unwindInfo.PushEntries[index];
+
+ switch (entry.PseudoOp)
+ {
+ case UnwindPseudoOp.SaveXmm128:
+ {
+ int stackOffset = entry.StackOffsetOrAllocSize;
+
+ Debug.Assert(stackOffset % 16 == 0);
+
+ if (stackOffset <= 0xFFFF0)
+ {
+ _unwindInfo->UnwindCodes[codeIndex++] = PackUnwindOp(UnwindOp.SaveXmm128, entry.PrologOffset, entry.RegIndex);
+ _unwindInfo->UnwindCodes[codeIndex++] = (ushort)(stackOffset / 16);
+ }
+ else
+ {
+ _unwindInfo->UnwindCodes[codeIndex++] = PackUnwindOp(UnwindOp.SaveXmm128Far, entry.PrologOffset, entry.RegIndex);
+ _unwindInfo->UnwindCodes[codeIndex++] = (ushort)(stackOffset >> 0);
+ _unwindInfo->UnwindCodes[codeIndex++] = (ushort)(stackOffset >> 16);
+ }
+
+ break;
+ }
+
+ case UnwindPseudoOp.AllocStack:
+ {
+ int allocSize = entry.StackOffsetOrAllocSize;
+
+ Debug.Assert(allocSize % 8 == 0);
+
+ if (allocSize <= 128)
+ {
+ _unwindInfo->UnwindCodes[codeIndex++] = PackUnwindOp(UnwindOp.AllocSmall, entry.PrologOffset, (allocSize / 8) - 1);
+ }
+ else if (allocSize <= 0x7FFF8)
+ {
+ _unwindInfo->UnwindCodes[codeIndex++] = PackUnwindOp(UnwindOp.AllocLarge, entry.PrologOffset, 0);
+ _unwindInfo->UnwindCodes[codeIndex++] = (ushort)(allocSize / 8);
+ }
+ else
+ {
+ _unwindInfo->UnwindCodes[codeIndex++] = PackUnwindOp(UnwindOp.AllocLarge, entry.PrologOffset, 1);
+ _unwindInfo->UnwindCodes[codeIndex++] = (ushort)(allocSize >> 0);
+ _unwindInfo->UnwindCodes[codeIndex++] = (ushort)(allocSize >> 16);
+ }
+
+ break;
+ }
+
+ case UnwindPseudoOp.PushReg:
+ {
+ _unwindInfo->UnwindCodes[codeIndex++] = PackUnwindOp(UnwindOp.PushNonvol, entry.PrologOffset, entry.RegIndex);
+
+ break;
+ }
+
+ default: throw new NotImplementedException($"({nameof(entry.PseudoOp)} = {entry.PseudoOp})");
+ }
+ }
+
+ Debug.Assert(codeIndex <= MaxUnwindCodesArraySize);
+
+ _unwindInfo->VersionAndFlags = 1; // Flags: The function has no handler.
+ _unwindInfo->SizeOfProlog = (byte)unwindInfo.PrologSize;
+ _unwindInfo->CountOfUnwindCodes = (byte)codeIndex;
+ _unwindInfo->FrameRegister = 0;
+
+ _runtimeFunction->BeginAddress = (uint)funcEntry.Offset;
+ _runtimeFunction->EndAddress = (uint)(funcEntry.Offset + funcEntry.Size);
+ _runtimeFunction->UnwindData = (uint)_sizeOfRuntimeFunction;
+
+ return _runtimeFunction;
+ }
+
+ private static ushort PackUnwindOp(UnwindOp op, int prologOffset, int opInfo)
+ {
+ return (ushort)(prologOffset | ((int)op << 8) | (opInfo << 12));
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Translation/Compiler.cs b/src/ARMeilleure/Translation/Compiler.cs
new file mode 100644
index 00000000..d4aa5cd9
--- /dev/null
+++ b/src/ARMeilleure/Translation/Compiler.cs
@@ -0,0 +1,68 @@
+using ARMeilleure.CodeGen;
+using ARMeilleure.CodeGen.Optimizations;
+using ARMeilleure.Diagnostics;
+using ARMeilleure.IntermediateRepresentation;
+using System;
+using System.Runtime.InteropServices;
+
+namespace ARMeilleure.Translation
+{
+ static class Compiler
+ {
+ public static CompiledFunction Compile(
+ ControlFlowGraph cfg,
+ OperandType[] argTypes,
+ OperandType retType,
+ CompilerOptions options,
+ Architecture target)
+ {
+ CompilerContext cctx = new(cfg, argTypes, retType, options);
+
+ if (options.HasFlag(CompilerOptions.Optimize))
+ {
+ Logger.StartPass(PassName.TailMerge);
+
+ TailMerge.RunPass(cctx);
+
+ Logger.EndPass(PassName.TailMerge, cfg);
+ }
+
+ if (options.HasFlag(CompilerOptions.SsaForm))
+ {
+ Logger.StartPass(PassName.Dominance);
+
+ Dominance.FindDominators(cfg);
+ Dominance.FindDominanceFrontiers(cfg);
+
+ Logger.EndPass(PassName.Dominance);
+
+ Logger.StartPass(PassName.SsaConstruction);
+
+ Ssa.Construct(cfg);
+
+ Logger.EndPass(PassName.SsaConstruction, cfg);
+ }
+ else
+ {
+ Logger.StartPass(PassName.RegisterToLocal);
+
+ RegisterToLocal.Rename(cfg);
+
+ Logger.EndPass(PassName.RegisterToLocal, cfg);
+ }
+
+ if (target == Architecture.X64)
+ {
+ return CodeGen.X86.CodeGenerator.Generate(cctx);
+ }
+ else if (target == Architecture.Arm64)
+ {
+ return CodeGen.Arm64.CodeGenerator.Generate(cctx);
+ }
+ else
+ {
+ throw new NotImplementedException(target.ToString());
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Translation/CompilerContext.cs b/src/ARMeilleure/Translation/CompilerContext.cs
new file mode 100644
index 00000000..510dec58
--- /dev/null
+++ b/src/ARMeilleure/Translation/CompilerContext.cs
@@ -0,0 +1,26 @@
+using ARMeilleure.IntermediateRepresentation;
+
+namespace ARMeilleure.Translation
+{
+ readonly struct CompilerContext
+ {
+ public ControlFlowGraph Cfg { get; }
+
+ public OperandType[] FuncArgTypes { get; }
+ public OperandType FuncReturnType { get; }
+
+ public CompilerOptions Options { get; }
+
+ public CompilerContext(
+ ControlFlowGraph cfg,
+ OperandType[] funcArgTypes,
+ OperandType funcReturnType,
+ CompilerOptions options)
+ {
+ Cfg = cfg;
+ FuncArgTypes = funcArgTypes;
+ FuncReturnType = funcReturnType;
+ Options = options;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Translation/CompilerOptions.cs b/src/ARMeilleure/Translation/CompilerOptions.cs
new file mode 100644
index 00000000..0a07ed4a
--- /dev/null
+++ b/src/ARMeilleure/Translation/CompilerOptions.cs
@@ -0,0 +1,17 @@
+using System;
+
+namespace ARMeilleure.Translation
+{
+ [Flags]
+ enum CompilerOptions
+ {
+ None = 0,
+ SsaForm = 1 << 0,
+ Optimize = 1 << 1,
+ Lsra = 1 << 2,
+ Relocatable = 1 << 3,
+
+ MediumCq = SsaForm | Optimize,
+ HighCq = SsaForm | Optimize | Lsra
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Translation/ControlFlowGraph.cs b/src/ARMeilleure/Translation/ControlFlowGraph.cs
new file mode 100644
index 00000000..c935f152
--- /dev/null
+++ b/src/ARMeilleure/Translation/ControlFlowGraph.cs
@@ -0,0 +1,155 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+
+namespace ARMeilleure.Translation
+{
+ class ControlFlowGraph
+ {
+ private BasicBlock[] _postOrderBlocks;
+ private int[] _postOrderMap;
+
+ public int LocalsCount { get; private set; }
+ public BasicBlock Entry { get; }
+ public IntrusiveList<BasicBlock> Blocks { get; }
+ public BasicBlock[] PostOrderBlocks => _postOrderBlocks;
+ public int[] PostOrderMap => _postOrderMap;
+
+ public ControlFlowGraph(BasicBlock entry, IntrusiveList<BasicBlock> blocks, int localsCount)
+ {
+ Entry = entry;
+ Blocks = blocks;
+ LocalsCount = localsCount;
+
+ Update();
+ }
+
+ public Operand AllocateLocal(OperandType type)
+ {
+ Operand result = Operand.Factory.Local(type);
+
+ result.NumberLocal(++LocalsCount);
+
+ return result;
+ }
+
+ public void Update()
+ {
+ RemoveUnreachableBlocks(Blocks);
+
+ var visited = new HashSet<BasicBlock>();
+ var blockStack = new Stack<BasicBlock>();
+
+ Array.Resize(ref _postOrderBlocks, Blocks.Count);
+ Array.Resize(ref _postOrderMap, Blocks.Count);
+
+ visited.Add(Entry);
+ blockStack.Push(Entry);
+
+ int index = 0;
+
+ while (blockStack.TryPop(out BasicBlock block))
+ {
+ bool visitedNew = false;
+
+ for (int i = 0; i < block.SuccessorsCount; i++)
+ {
+ BasicBlock succ = block.GetSuccessor(i);
+
+ if (visited.Add(succ))
+ {
+ blockStack.Push(block);
+ blockStack.Push(succ);
+
+ visitedNew = true;
+
+ break;
+ }
+ }
+
+ if (!visitedNew)
+ {
+ PostOrderMap[block.Index] = index;
+
+ PostOrderBlocks[index++] = block;
+ }
+ }
+ }
+
+ private void RemoveUnreachableBlocks(IntrusiveList<BasicBlock> blocks)
+ {
+ var visited = new HashSet<BasicBlock>();
+ var workQueue = new Queue<BasicBlock>();
+
+ visited.Add(Entry);
+ workQueue.Enqueue(Entry);
+
+ while (workQueue.TryDequeue(out BasicBlock block))
+ {
+ Debug.Assert(block.Index != -1, "Invalid block index.");
+
+ for (int i = 0; i < block.SuccessorsCount; i++)
+ {
+ BasicBlock succ = block.GetSuccessor(i);
+
+ if (visited.Add(succ))
+ {
+ workQueue.Enqueue(succ);
+ }
+ }
+ }
+
+ if (visited.Count < blocks.Count)
+ {
+ // Remove unreachable blocks and renumber.
+ int index = 0;
+
+ for (BasicBlock block = blocks.First; block != null;)
+ {
+ BasicBlock nextBlock = block.ListNext;
+
+ if (!visited.Contains(block))
+ {
+ while (block.SuccessorsCount > 0)
+ {
+ block.RemoveSuccessor(index: block.SuccessorsCount - 1);
+ }
+
+ blocks.Remove(block);
+ }
+ else
+ {
+ block.Index = index++;
+ }
+
+ block = nextBlock;
+ }
+ }
+ }
+
+ public BasicBlock SplitEdge(BasicBlock predecessor, BasicBlock successor)
+ {
+ BasicBlock splitBlock = new BasicBlock(Blocks.Count);
+
+ for (int i = 0; i < predecessor.SuccessorsCount; i++)
+ {
+ if (predecessor.GetSuccessor(i) == successor)
+ {
+ predecessor.SetSuccessor(i, splitBlock);
+ }
+ }
+
+ if (splitBlock.Predecessors.Count == 0)
+ {
+ throw new ArgumentException("Predecessor and successor are not connected.");
+ }
+
+ splitBlock.AddSuccessor(successor);
+
+ Blocks.AddBefore(successor, splitBlock);
+
+ return splitBlock;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Translation/DelegateHelper.cs b/src/ARMeilleure/Translation/DelegateHelper.cs
new file mode 100644
index 00000000..43a39bab
--- /dev/null
+++ b/src/ARMeilleure/Translation/DelegateHelper.cs
@@ -0,0 +1,104 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Reflection;
+using System.Reflection.Emit;
+
+namespace ARMeilleure.Translation
+{
+ static class DelegateHelper
+ {
+ private const string DelegateTypesAssemblyName = "JitDelegateTypes";
+
+ private static readonly ModuleBuilder _modBuilder;
+
+ private static readonly Dictionary<string, Type> _delegateTypesCache;
+
+ static DelegateHelper()
+ {
+ AssemblyBuilder asmBuilder = AssemblyBuilder.DefineDynamicAssembly(new AssemblyName(DelegateTypesAssemblyName), AssemblyBuilderAccess.Run);
+
+ _modBuilder = asmBuilder.DefineDynamicModule(DelegateTypesAssemblyName);
+
+ _delegateTypesCache = new Dictionary<string, Type>();
+ }
+
+ public static Delegate GetDelegate(MethodInfo info)
+ {
+ ArgumentNullException.ThrowIfNull(info);
+
+ Type[] parameters = info.GetParameters().Select(pI => pI.ParameterType).ToArray();
+ Type returnType = info.ReturnType;
+
+ Type delegateType = GetDelegateType(parameters, returnType);
+
+ return Delegate.CreateDelegate(delegateType, info);
+ }
+
+ private static Type GetDelegateType(Type[] parameters, Type returnType)
+ {
+ string key = GetFunctionSignatureKey(parameters, returnType);
+
+ if (!_delegateTypesCache.TryGetValue(key, out Type delegateType))
+ {
+ delegateType = MakeDelegateType(parameters, returnType, key);
+
+ _delegateTypesCache.TryAdd(key, delegateType);
+ }
+
+ return delegateType;
+ }
+
+ private static string GetFunctionSignatureKey(Type[] parameters, Type returnType)
+ {
+ string sig = GetTypeName(returnType);
+
+ foreach (Type type in parameters)
+ {
+ sig += '_' + GetTypeName(type);
+ }
+
+ return sig;
+ }
+
+ private static string GetTypeName(Type type)
+ {
+ return type.FullName.Replace(".", string.Empty);
+ }
+
+ private const MethodAttributes CtorAttributes =
+ MethodAttributes.RTSpecialName |
+ MethodAttributes.HideBySig |
+ MethodAttributes.Public;
+
+ private const TypeAttributes DelegateTypeAttributes =
+ TypeAttributes.Class |
+ TypeAttributes.Public |
+ TypeAttributes.Sealed |
+ TypeAttributes.AnsiClass |
+ TypeAttributes.AutoClass;
+
+ private const MethodImplAttributes ImplAttributes =
+ MethodImplAttributes.Runtime |
+ MethodImplAttributes.Managed;
+
+ private const MethodAttributes InvokeAttributes =
+ MethodAttributes.Public |
+ MethodAttributes.HideBySig |
+ MethodAttributes.NewSlot |
+ MethodAttributes.Virtual;
+
+ private static readonly Type[] _delegateCtorSignature = { typeof(object), typeof(IntPtr) };
+
+ private static Type MakeDelegateType(Type[] parameters, Type returnType, string name)
+ {
+ TypeBuilder builder = _modBuilder.DefineType(name, DelegateTypeAttributes, typeof(MulticastDelegate));
+
+ builder.DefineConstructor(CtorAttributes, CallingConventions.Standard, _delegateCtorSignature).SetImplementationFlags(ImplAttributes);
+
+ builder.DefineMethod("Invoke", InvokeAttributes, returnType, parameters).SetImplementationFlags(ImplAttributes);
+
+ return builder.CreateTypeInfo();
+ }
+ }
+}
diff --git a/src/ARMeilleure/Translation/DelegateInfo.cs b/src/ARMeilleure/Translation/DelegateInfo.cs
new file mode 100644
index 00000000..36320ac3
--- /dev/null
+++ b/src/ARMeilleure/Translation/DelegateInfo.cs
@@ -0,0 +1,19 @@
+using System;
+using System.Runtime.InteropServices;
+
+namespace ARMeilleure.Translation
+{
+ class DelegateInfo
+ {
+ private readonly Delegate _dlg; // Ensure that this delegate will not be garbage collected.
+
+ public IntPtr FuncPtr { get; }
+
+ public DelegateInfo(Delegate dlg)
+ {
+ _dlg = dlg;
+
+ FuncPtr = Marshal.GetFunctionPointerForDelegate<Delegate>(dlg);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Translation/Delegates.cs b/src/ARMeilleure/Translation/Delegates.cs
new file mode 100644
index 00000000..55f1e514
--- /dev/null
+++ b/src/ARMeilleure/Translation/Delegates.cs
@@ -0,0 +1,261 @@
+using ARMeilleure.Instructions;
+using System;
+using System.Collections.Generic;
+using System.Reflection;
+
+namespace ARMeilleure.Translation
+{
+ static class Delegates
+ {
+ public static bool TryGetDelegateFuncPtrByIndex(int index, out IntPtr funcPtr)
+ {
+ if (index >= 0 && index < _delegates.Count)
+ {
+ funcPtr = _delegates.Values[index].FuncPtr; // O(1).
+
+ return true;
+ }
+ else
+ {
+ funcPtr = default;
+
+ return false;
+ }
+ }
+
+ public static IntPtr GetDelegateFuncPtrByIndex(int index)
+ {
+ if (index < 0 || index >= _delegates.Count)
+ {
+ throw new ArgumentOutOfRangeException($"({nameof(index)} = {index})");
+ }
+
+ return _delegates.Values[index].FuncPtr; // O(1).
+ }
+
+ public static IntPtr GetDelegateFuncPtr(MethodInfo info)
+ {
+ ArgumentNullException.ThrowIfNull(info);
+
+ string key = GetKey(info);
+
+ if (!_delegates.TryGetValue(key, out DelegateInfo dlgInfo)) // O(log(n)).
+ {
+ throw new KeyNotFoundException($"({nameof(key)} = {key})");
+ }
+
+ return dlgInfo.FuncPtr;
+ }
+
+ public static int GetDelegateIndex(MethodInfo info)
+ {
+ ArgumentNullException.ThrowIfNull(info);
+
+ string key = GetKey(info);
+
+ int index = _delegates.IndexOfKey(key); // O(log(n)).
+
+ if (index == -1)
+ {
+ throw new KeyNotFoundException($"({nameof(key)} = {key})");
+ }
+
+ return index;
+ }
+
+ private static void SetDelegateInfo(MethodInfo info)
+ {
+ string key = GetKey(info);
+
+ Delegate dlg = DelegateHelper.GetDelegate(info);
+
+ _delegates.Add(key, new DelegateInfo(dlg)); // ArgumentException (key).
+ }
+
+ private static string GetKey(MethodInfo info)
+ {
+ return $"{info.DeclaringType.Name}.{info.Name}";
+ }
+
+ private static readonly SortedList<string, DelegateInfo> _delegates;
+
+ static Delegates()
+ {
+ _delegates = new SortedList<string, DelegateInfo>();
+
+ SetDelegateInfo(typeof(Math).GetMethod(nameof(Math.Abs), new Type[] { typeof(double) }));
+ SetDelegateInfo(typeof(Math).GetMethod(nameof(Math.Ceiling), new Type[] { typeof(double) }));
+ SetDelegateInfo(typeof(Math).GetMethod(nameof(Math.Floor), new Type[] { typeof(double) }));
+ SetDelegateInfo(typeof(Math).GetMethod(nameof(Math.Round), new Type[] { typeof(double), typeof(MidpointRounding) }));
+ SetDelegateInfo(typeof(Math).GetMethod(nameof(Math.Truncate), new Type[] { typeof(double) }));
+
+ SetDelegateInfo(typeof(MathF).GetMethod(nameof(MathF.Abs), new Type[] { typeof(float) }));
+ SetDelegateInfo(typeof(MathF).GetMethod(nameof(MathF.Ceiling), new Type[] { typeof(float) }));
+ SetDelegateInfo(typeof(MathF).GetMethod(nameof(MathF.Floor), new Type[] { typeof(float) }));
+ SetDelegateInfo(typeof(MathF).GetMethod(nameof(MathF.Round), new Type[] { typeof(float), typeof(MidpointRounding) }));
+ SetDelegateInfo(typeof(MathF).GetMethod(nameof(MathF.Truncate), new Type[] { typeof(float) }));
+
+ SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.Break)));
+ SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.CheckSynchronization)));
+ SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.EnqueueForRejit)));
+ SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetCntfrqEl0)));
+ SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetCntpctEl0)));
+ SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetCntvctEl0)));
+ SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetCtrEl0)));
+ SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetDczidEl0)));
+ SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetFunctionAddress)));
+ SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.InvalidateCacheLine)));
+ SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadByte)));
+ SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt16)));
+ SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt32)));
+ SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt64)));
+ SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadVector128)));
+ SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.SignalMemoryTracking)));
+ SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.SupervisorCall)));
+ SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.ThrowInvalidMemoryAccess)));
+ SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.Undefined)));
+ SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteByte)));
+ SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt16)));
+ SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt32)));
+ SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt64)));
+ SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteVector128)));
+
+ SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.CountLeadingSigns)));
+ SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.CountLeadingZeros)));
+ SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Crc32b)));
+ SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Crc32cb)));
+ SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Crc32ch)));
+ SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Crc32cw)));
+ SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Crc32cx)));
+ SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Crc32h)));
+ SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Crc32w)));
+ SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Crc32x)));
+ SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Decrypt)));
+ SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Encrypt)));
+ SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.FixedRotate)));
+ SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.HashChoose)));
+ SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.HashLower)));
+ SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.HashMajority)));
+ SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.HashParity)));
+ SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.HashUpper)));
+ SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.InverseMixColumns)));
+ SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.MixColumns)));
+ SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.PolynomialMult64_128)));
+ SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS32)));
+ SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS64)));
+ SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU32)));
+ SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU64)));
+ SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToS32)));
+ SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToS64)));
+ SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToU32)));
+ SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToU64)));
+ SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha1SchedulePart1)));
+ SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha1SchedulePart2)));
+ SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha256SchedulePart1)));
+ SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha256SchedulePart2)));
+ SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.SignedShrImm64)));
+ SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbl1)));
+ SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbl2)));
+ SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbl3)));
+ SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbl4)));
+ SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbx1)));
+ SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbx2)));
+ SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbx3)));
+ SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbx4)));
+ SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.UnsignedShrImm64)));
+
+ SetDelegateInfo(typeof(SoftFloat16_32).GetMethod(nameof(SoftFloat16_32.FPConvert)));
+ SetDelegateInfo(typeof(SoftFloat16_64).GetMethod(nameof(SoftFloat16_64.FPConvert)));
+
+ SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPAdd)));
+ SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPAddFpscr))); // A32 only.
+ SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPCompare)));
+ SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPCompareEQ)));
+ SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPCompareEQFpscr))); // A32 only.
+ SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPCompareGE)));
+ SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPCompareGEFpscr))); // A32 only.
+ SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPCompareGT)));
+ SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPCompareGTFpscr))); // A32 only.
+ SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPCompareLE)));
+ SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPCompareLEFpscr))); // A32 only.
+ SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPCompareLT)));
+ SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPCompareLTFpscr))); // A32 only.
+ SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPDiv)));
+ SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMax)));
+ SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMaxFpscr))); // A32 only.
+ SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMaxNum)));
+ SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMaxNumFpscr))); // A32 only.
+ SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMin)));
+ SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMinFpscr))); // A32 only.
+ SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMinNum)));
+ SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMinNumFpscr))); // A32 only.
+ SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMul)));
+ SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMulFpscr))); // A32 only.
+ SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMulAdd)));
+ SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMulAddFpscr))); // A32 only.
+ SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMulSub)));
+ SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMulSubFpscr))); // A32 only.
+ SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMulX)));
+ SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPNegMulAdd)));
+ SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPNegMulSub)));
+ SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPRecipEstimate)));
+ SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPRecipEstimateFpscr))); // A32 only.
+ SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPRecipStep))); // A32 only.
+ SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPRecipStepFused)));
+ SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPRecpX)));
+ SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPRSqrtEstimate)));
+ SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPRSqrtEstimateFpscr))); // A32 only.
+ SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPRSqrtStep))); // A32 only.
+ SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPRSqrtStepFused)));
+ SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPSqrt)));
+ SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPSub)));
+
+ SetDelegateInfo(typeof(SoftFloat32_16).GetMethod(nameof(SoftFloat32_16.FPConvert)));
+
+ SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPAdd)));
+ SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPAddFpscr))); // A32 only.
+ SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPCompare)));
+ SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPCompareEQ)));
+ SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPCompareEQFpscr))); // A32 only.
+ SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPCompareGE)));
+ SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPCompareGEFpscr))); // A32 only.
+ SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPCompareGT)));
+ SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPCompareGTFpscr))); // A32 only.
+ SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPCompareLE)));
+ SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPCompareLEFpscr))); // A32 only.
+ SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPCompareLT)));
+ SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPCompareLTFpscr))); // A32 only.
+ SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPDiv)));
+ SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPMax)));
+ SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPMaxFpscr))); // A32 only.
+ SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPMaxNum)));
+ SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPMaxNumFpscr))); // A32 only.
+ SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPMin)));
+ SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPMinFpscr))); // A32 only.
+ SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPMinNum)));
+ SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPMinNumFpscr))); // A32 only.
+ SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPMul)));
+ SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPMulFpscr))); // A32 only.
+ SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPMulAdd)));
+ SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPMulAddFpscr))); // A32 only.
+ SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPMulSub)));
+ SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPMulSubFpscr))); // A32 only.
+ SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPMulX)));
+ SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPNegMulAdd)));
+ SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPNegMulSub)));
+ SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPRecipEstimate)));
+ SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPRecipEstimateFpscr))); // A32 only.
+ SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPRecipStep))); // A32 only.
+ SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPRecipStepFused)));
+ SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPRecpX)));
+ SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPRSqrtEstimate)));
+ SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPRSqrtEstimateFpscr))); // A32 only.
+ SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPRSqrtStep))); // A32 only.
+ SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPRSqrtStepFused)));
+ SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPSqrt)));
+ SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPSub)));
+
+ SetDelegateInfo(typeof(SoftFloat64_16).GetMethod(nameof(SoftFloat64_16.FPConvert)));
+ }
+ }
+}
diff --git a/src/ARMeilleure/Translation/DispatcherFunction.cs b/src/ARMeilleure/Translation/DispatcherFunction.cs
new file mode 100644
index 00000000..7d5a3388
--- /dev/null
+++ b/src/ARMeilleure/Translation/DispatcherFunction.cs
@@ -0,0 +1,7 @@
+using System;
+
+namespace ARMeilleure.Translation
+{
+ delegate void DispatcherFunction(IntPtr nativeContext, ulong startAddress);
+ delegate ulong WrapperFunction(IntPtr nativeContext, ulong startAddress);
+}
diff --git a/src/ARMeilleure/Translation/Dominance.cs b/src/ARMeilleure/Translation/Dominance.cs
new file mode 100644
index 00000000..b9b961d1
--- /dev/null
+++ b/src/ARMeilleure/Translation/Dominance.cs
@@ -0,0 +1,95 @@
+using ARMeilleure.IntermediateRepresentation;
+using System.Diagnostics;
+
+namespace ARMeilleure.Translation
+{
+ static class Dominance
+ {
+ // Those methods are an implementation of the algorithms on "A Simple, Fast Dominance Algorithm".
+ // https://www.cs.rice.edu/~keith/EMBED/dom.pdf
+ public static void FindDominators(ControlFlowGraph cfg)
+ {
+ BasicBlock Intersect(BasicBlock block1, BasicBlock block2)
+ {
+ while (block1 != block2)
+ {
+ while (cfg.PostOrderMap[block1.Index] < cfg.PostOrderMap[block2.Index])
+ {
+ block1 = block1.ImmediateDominator;
+ }
+
+ while (cfg.PostOrderMap[block2.Index] < cfg.PostOrderMap[block1.Index])
+ {
+ block2 = block2.ImmediateDominator;
+ }
+ }
+
+ return block1;
+ }
+
+ cfg.Entry.ImmediateDominator = cfg.Entry;
+
+ Debug.Assert(cfg.Entry == cfg.PostOrderBlocks[cfg.PostOrderBlocks.Length - 1]);
+
+ bool modified;
+
+ do
+ {
+ modified = false;
+
+ for (int blkIndex = cfg.PostOrderBlocks.Length - 2; blkIndex >= 0; blkIndex--)
+ {
+ BasicBlock block = cfg.PostOrderBlocks[blkIndex];
+
+ BasicBlock newIDom = null;
+
+ foreach (BasicBlock predecessor in block.Predecessors)
+ {
+ if (predecessor.ImmediateDominator != null)
+ {
+ if (newIDom != null)
+ {
+ newIDom = Intersect(predecessor, newIDom);
+ }
+ else
+ {
+ newIDom = predecessor;
+ }
+ }
+ }
+
+ if (block.ImmediateDominator != newIDom)
+ {
+ block.ImmediateDominator = newIDom;
+
+ modified = true;
+ }
+ }
+ }
+ while (modified);
+ }
+
+ public static void FindDominanceFrontiers(ControlFlowGraph cfg)
+ {
+ for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
+ {
+ if (block.Predecessors.Count < 2)
+ {
+ continue;
+ }
+
+ for (int pBlkIndex = 0; pBlkIndex < block.Predecessors.Count; pBlkIndex++)
+ {
+ BasicBlock current = block.Predecessors[pBlkIndex];
+
+ while (current != block.ImmediateDominator)
+ {
+ current.DominanceFrontiers.Add(block);
+
+ current = current.ImmediateDominator;
+ }
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Translation/EmitterContext.cs b/src/ARMeilleure/Translation/EmitterContext.cs
new file mode 100644
index 00000000..8fcb4dee
--- /dev/null
+++ b/src/ARMeilleure/Translation/EmitterContext.cs
@@ -0,0 +1,680 @@
+using ARMeilleure.Diagnostics;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using System;
+using System.Collections.Generic;
+using System.Reflection;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Translation
+{
+ class EmitterContext
+ {
+ private int _localsCount;
+
+ private readonly Dictionary<Operand, BasicBlock> _irLabels;
+ private readonly IntrusiveList<BasicBlock> _irBlocks;
+
+ private BasicBlock _irBlock;
+ private BasicBlock _ifBlock;
+
+ private bool _needsNewBlock;
+ private BasicBlockFrequency _nextBlockFreq;
+
+ public EmitterContext()
+ {
+ _localsCount = 0;
+
+ _irLabels = new Dictionary<Operand, BasicBlock>();
+ _irBlocks = new IntrusiveList<BasicBlock>();
+
+ _needsNewBlock = true;
+ _nextBlockFreq = BasicBlockFrequency.Default;
+ }
+
+ public Operand AllocateLocal(OperandType type)
+ {
+ Operand local = Local(type);
+
+ local.NumberLocal(++_localsCount);
+
+ return local;
+ }
+
+ public Operand Add(Operand op1, Operand op2)
+ {
+ return Add(Instruction.Add, Local(op1.Type), op1, op2);
+ }
+
+ public Operand BitwiseAnd(Operand op1, Operand op2)
+ {
+ return Add(Instruction.BitwiseAnd, Local(op1.Type), op1, op2);
+ }
+
+ public Operand BitwiseExclusiveOr(Operand op1, Operand op2)
+ {
+ return Add(Instruction.BitwiseExclusiveOr, Local(op1.Type), op1, op2);
+ }
+
+ public Operand BitwiseNot(Operand op1)
+ {
+ return Add(Instruction.BitwiseNot, Local(op1.Type), op1);
+ }
+
+ public Operand BitwiseOr(Operand op1, Operand op2)
+ {
+ return Add(Instruction.BitwiseOr, Local(op1.Type), op1, op2);
+ }
+
+ public void Branch(Operand label)
+ {
+ NewNextBlockIfNeeded();
+
+ BranchToLabel(label, uncond: true, BasicBlockFrequency.Default);
+ }
+
+ public void BranchIf(Operand label, Operand op1, Operand op2, Comparison comp, BasicBlockFrequency falseFreq = default)
+ {
+ Add(Instruction.BranchIf, default, op1, op2, Const((int)comp));
+
+ BranchToLabel(label, uncond: false, falseFreq);
+ }
+
+ public void BranchIfFalse(Operand label, Operand op1, BasicBlockFrequency falseFreq = default)
+ {
+ BranchIf(label, op1, Const(op1.Type, 0), Comparison.Equal, falseFreq);
+ }
+
+ public void BranchIfTrue(Operand label, Operand op1, BasicBlockFrequency falseFreq = default)
+ {
+ BranchIf(label, op1, Const(op1.Type, 0), Comparison.NotEqual, falseFreq);
+ }
+
+ public Operand ByteSwap(Operand op1)
+ {
+ return Add(Instruction.ByteSwap, Local(op1.Type), op1);
+ }
+
+ public virtual Operand Call(MethodInfo info, params Operand[] callArgs)
+ {
+ IntPtr funcPtr = Delegates.GetDelegateFuncPtr(info);
+
+ OperandType returnType = GetOperandType(info.ReturnType);
+
+ Symbols.Add((ulong)funcPtr.ToInt64(), info.Name);
+
+ return Call(Const(funcPtr.ToInt64()), returnType, callArgs);
+ }
+
+ protected static OperandType GetOperandType(Type type)
+ {
+ if (type == typeof(bool) || type == typeof(byte) ||
+ type == typeof(char) || type == typeof(short) ||
+ type == typeof(int) || type == typeof(sbyte) ||
+ type == typeof(ushort) || type == typeof(uint))
+ {
+ return OperandType.I32;
+ }
+ else if (type == typeof(long) || type == typeof(ulong))
+ {
+ return OperandType.I64;
+ }
+ else if (type == typeof(double))
+ {
+ return OperandType.FP64;
+ }
+ else if (type == typeof(float))
+ {
+ return OperandType.FP32;
+ }
+ else if (type == typeof(V128))
+ {
+ return OperandType.V128;
+ }
+ else if (type == typeof(void))
+ {
+ return OperandType.None;
+ }
+ else
+ {
+ throw new ArgumentException($"Invalid type \"{type.Name}\".");
+ }
+ }
+
+ public Operand Call(Operand address, OperandType returnType, params Operand[] callArgs)
+ {
+ Operand[] args = new Operand[callArgs.Length + 1];
+
+ args[0] = address;
+
+ Array.Copy(callArgs, 0, args, 1, callArgs.Length);
+
+ if (returnType != OperandType.None)
+ {
+ return Add(Instruction.Call, Local(returnType), args);
+ }
+ else
+ {
+ return Add(Instruction.Call, default, args);
+ }
+ }
+
+ public void Tailcall(Operand address, params Operand[] callArgs)
+ {
+ Operand[] args = new Operand[callArgs.Length + 1];
+
+ args[0] = address;
+
+ Array.Copy(callArgs, 0, args, 1, callArgs.Length);
+
+ Add(Instruction.Tailcall, default, args);
+
+ _needsNewBlock = true;
+ }
+
+ public Operand CompareAndSwap(Operand address, Operand expected, Operand desired)
+ {
+ return Add(Instruction.CompareAndSwap, Local(desired.Type), address, expected, desired);
+ }
+
+ public Operand CompareAndSwap16(Operand address, Operand expected, Operand desired)
+ {
+ return Add(Instruction.CompareAndSwap16, Local(OperandType.I32), address, expected, desired);
+ }
+
+ public Operand CompareAndSwap8(Operand address, Operand expected, Operand desired)
+ {
+ return Add(Instruction.CompareAndSwap8, Local(OperandType.I32), address, expected, desired);
+ }
+
+ public Operand ConditionalSelect(Operand op1, Operand op2, Operand op3)
+ {
+ return Add(Instruction.ConditionalSelect, Local(op2.Type), op1, op2, op3);
+ }
+
+ public Operand ConvertI64ToI32(Operand op1)
+ {
+ if (op1.Type != OperandType.I64)
+ {
+ throw new ArgumentException($"Invalid operand type \"{op1.Type}\".");
+ }
+
+ return Add(Instruction.ConvertI64ToI32, Local(OperandType.I32), op1);
+ }
+
+ public Operand ConvertToFP(OperandType type, Operand op1)
+ {
+ return Add(Instruction.ConvertToFP, Local(type), op1);
+ }
+
+ public Operand ConvertToFPUI(OperandType type, Operand op1)
+ {
+ return Add(Instruction.ConvertToFPUI, Local(type), op1);
+ }
+
+ public Operand Copy(Operand op1)
+ {
+ return Add(Instruction.Copy, Local(op1.Type), op1);
+ }
+
+ public Operand Copy(Operand dest, Operand op1)
+ {
+ if (dest.Kind != OperandKind.Register &&
+ (dest.Kind != OperandKind.LocalVariable || dest.GetLocalNumber() == 0))
+ {
+ throw new ArgumentException($"Destination operand must be a Register or a numbered LocalVariable.");
+ }
+
+ return Add(Instruction.Copy, dest, op1);
+ }
+
+ public Operand CountLeadingZeros(Operand op1)
+ {
+ return Add(Instruction.CountLeadingZeros, Local(op1.Type), op1);
+ }
+
+ public Operand Divide(Operand op1, Operand op2)
+ {
+ return Add(Instruction.Divide, Local(op1.Type), op1, op2);
+ }
+
+ public Operand DivideUI(Operand op1, Operand op2)
+ {
+ return Add(Instruction.DivideUI, Local(op1.Type), op1, op2);
+ }
+
+ public Operand ICompare(Operand op1, Operand op2, Comparison comp)
+ {
+ return Add(Instruction.Compare, Local(OperandType.I32), op1, op2, Const((int)comp));
+ }
+
+ public Operand ICompareEqual(Operand op1, Operand op2)
+ {
+ return ICompare(op1, op2, Comparison.Equal);
+ }
+
+ public Operand ICompareGreater(Operand op1, Operand op2)
+ {
+ return ICompare(op1, op2, Comparison.Greater);
+ }
+
+ public Operand ICompareGreaterOrEqual(Operand op1, Operand op2)
+ {
+ return ICompare(op1, op2, Comparison.GreaterOrEqual);
+ }
+
+ public Operand ICompareGreaterOrEqualUI(Operand op1, Operand op2)
+ {
+ return ICompare(op1, op2, Comparison.GreaterOrEqualUI);
+ }
+
+ public Operand ICompareGreaterUI(Operand op1, Operand op2)
+ {
+ return ICompare(op1, op2, Comparison.GreaterUI);
+ }
+
+ public Operand ICompareLess(Operand op1, Operand op2)
+ {
+ return ICompare(op1, op2, Comparison.Less);
+ }
+
+ public Operand ICompareLessOrEqual(Operand op1, Operand op2)
+ {
+ return ICompare(op1, op2, Comparison.LessOrEqual);
+ }
+
+ public Operand ICompareLessOrEqualUI(Operand op1, Operand op2)
+ {
+ return ICompare(op1, op2, Comparison.LessOrEqualUI);
+ }
+
+ public Operand ICompareLessUI(Operand op1, Operand op2)
+ {
+ return ICompare(op1, op2, Comparison.LessUI);
+ }
+
+ public Operand ICompareNotEqual(Operand op1, Operand op2)
+ {
+ return ICompare(op1, op2, Comparison.NotEqual);
+ }
+
+ public Operand Load(OperandType type, Operand address)
+ {
+ return Add(Instruction.Load, Local(type), address);
+ }
+
+ public Operand Load16(Operand address)
+ {
+ return Add(Instruction.Load16, Local(OperandType.I32), address);
+ }
+
+ public Operand Load8(Operand address)
+ {
+ return Add(Instruction.Load8, Local(OperandType.I32), address);
+ }
+
+ public Operand LoadArgument(OperandType type, int index)
+ {
+ return Add(Instruction.LoadArgument, Local(type), Const(index));
+ }
+
+ public void LoadFromContext()
+ {
+ _needsNewBlock = true;
+
+ Add(Instruction.LoadFromContext);
+ }
+
+ public void MemoryBarrier()
+ {
+ Add(Instruction.MemoryBarrier);
+ }
+
+ public Operand Multiply(Operand op1, Operand op2)
+ {
+ return Add(Instruction.Multiply, Local(op1.Type), op1, op2);
+ }
+
+ public Operand Multiply64HighSI(Operand op1, Operand op2)
+ {
+ return Add(Instruction.Multiply64HighSI, Local(OperandType.I64), op1, op2);
+ }
+
+ public Operand Multiply64HighUI(Operand op1, Operand op2)
+ {
+ return Add(Instruction.Multiply64HighUI, Local(OperandType.I64), op1, op2);
+ }
+
+ public Operand Negate(Operand op1)
+ {
+ return Add(Instruction.Negate, Local(op1.Type), op1);
+ }
+
+ public void Return()
+ {
+ Add(Instruction.Return);
+
+ _needsNewBlock = true;
+ }
+
+ public void Return(Operand op1)
+ {
+ Add(Instruction.Return, default, op1);
+
+ _needsNewBlock = true;
+ }
+
+ public Operand RotateRight(Operand op1, Operand op2)
+ {
+ return Add(Instruction.RotateRight, Local(op1.Type), op1, op2);
+ }
+
+ public Operand ShiftLeft(Operand op1, Operand op2)
+ {
+ return Add(Instruction.ShiftLeft, Local(op1.Type), op1, op2);
+ }
+
+ public Operand ShiftRightSI(Operand op1, Operand op2)
+ {
+ return Add(Instruction.ShiftRightSI, Local(op1.Type), op1, op2);
+ }
+
+ public Operand ShiftRightUI(Operand op1, Operand op2)
+ {
+ return Add(Instruction.ShiftRightUI, Local(op1.Type), op1, op2);
+ }
+
+ public Operand SignExtend16(OperandType type, Operand op1)
+ {
+ return Add(Instruction.SignExtend16, Local(type), op1);
+ }
+
+ public Operand SignExtend32(OperandType type, Operand op1)
+ {
+ return Add(Instruction.SignExtend32, Local(type), op1);
+ }
+
+ public Operand SignExtend8(OperandType type, Operand op1)
+ {
+ return Add(Instruction.SignExtend8, Local(type), op1);
+ }
+
+ public void Store(Operand address, Operand value)
+ {
+ Add(Instruction.Store, default, address, value);
+ }
+
+ public void Store16(Operand address, Operand value)
+ {
+ Add(Instruction.Store16, default, address, value);
+ }
+
+ public void Store8(Operand address, Operand value)
+ {
+ Add(Instruction.Store8, default, address, value);
+ }
+
+ public void StoreToContext()
+ {
+ Add(Instruction.StoreToContext);
+
+ _needsNewBlock = true;
+ }
+
+ public Operand Subtract(Operand op1, Operand op2)
+ {
+ return Add(Instruction.Subtract, Local(op1.Type), op1, op2);
+ }
+
+ public Operand VectorCreateScalar(Operand value)
+ {
+ return Add(Instruction.VectorCreateScalar, Local(OperandType.V128), value);
+ }
+
+ public Operand VectorExtract(OperandType type, Operand vector, int index)
+ {
+ return Add(Instruction.VectorExtract, Local(type), vector, Const(index));
+ }
+
+ public Operand VectorExtract16(Operand vector, int index)
+ {
+ return Add(Instruction.VectorExtract16, Local(OperandType.I32), vector, Const(index));
+ }
+
+ public Operand VectorExtract8(Operand vector, int index)
+ {
+ return Add(Instruction.VectorExtract8, Local(OperandType.I32), vector, Const(index));
+ }
+
+ public Operand VectorInsert(Operand vector, Operand value, int index)
+ {
+ return Add(Instruction.VectorInsert, Local(OperandType.V128), vector, value, Const(index));
+ }
+
+ public Operand VectorInsert16(Operand vector, Operand value, int index)
+ {
+ return Add(Instruction.VectorInsert16, Local(OperandType.V128), vector, value, Const(index));
+ }
+
+ public Operand VectorInsert8(Operand vector, Operand value, int index)
+ {
+ return Add(Instruction.VectorInsert8, Local(OperandType.V128), vector, value, Const(index));
+ }
+
+ public Operand VectorOne()
+ {
+ return Add(Instruction.VectorOne, Local(OperandType.V128));
+ }
+
+ public Operand VectorZero()
+ {
+ return Add(Instruction.VectorZero, Local(OperandType.V128));
+ }
+
+ public Operand VectorZeroUpper64(Operand vector)
+ {
+ return Add(Instruction.VectorZeroUpper64, Local(OperandType.V128), vector);
+ }
+
+ public Operand VectorZeroUpper96(Operand vector)
+ {
+ return Add(Instruction.VectorZeroUpper96, Local(OperandType.V128), vector);
+ }
+
+ public Operand ZeroExtend16(OperandType type, Operand op1)
+ {
+ return Add(Instruction.ZeroExtend16, Local(type), op1);
+ }
+
+ public Operand ZeroExtend32(OperandType type, Operand op1)
+ {
+ return Add(Instruction.ZeroExtend32, Local(type), op1);
+ }
+
+ public Operand ZeroExtend8(OperandType type, Operand op1)
+ {
+ return Add(Instruction.ZeroExtend8, Local(type), op1);
+ }
+
+ private void NewNextBlockIfNeeded()
+ {
+ if (_needsNewBlock)
+ {
+ NewNextBlock();
+ }
+ }
+
+ private Operand Add(Instruction inst, Operand dest = default)
+ {
+ NewNextBlockIfNeeded();
+
+ Operation operation = Operation.Factory.Operation(inst, dest);
+
+ _irBlock.Operations.AddLast(operation);
+
+ return dest;
+ }
+
+ private Operand Add(Instruction inst, Operand dest, Operand[] sources)
+ {
+ NewNextBlockIfNeeded();
+
+ Operation operation = Operation.Factory.Operation(inst, dest, sources);
+
+ _irBlock.Operations.AddLast(operation);
+
+ return dest;
+ }
+
+ private Operand Add(Instruction inst, Operand dest, Operand source0)
+ {
+ NewNextBlockIfNeeded();
+
+ Operation operation = Operation.Factory.Operation(inst, dest, source0);
+
+ _irBlock.Operations.AddLast(operation);
+
+ return dest;
+ }
+
+ private Operand Add(Instruction inst, Operand dest, Operand source0, Operand source1)
+ {
+ NewNextBlockIfNeeded();
+
+ Operation operation = Operation.Factory.Operation(inst, dest, source0, source1);
+
+ _irBlock.Operations.AddLast(operation);
+
+ return dest;
+ }
+
+ private Operand Add(Instruction inst, Operand dest, Operand source0, Operand source1, Operand source2)
+ {
+ NewNextBlockIfNeeded();
+
+ Operation operation = Operation.Factory.Operation(inst, dest, source0, source1, source2);
+
+ _irBlock.Operations.AddLast(operation);
+
+ return dest;
+ }
+
+ public Operand AddIntrinsic(Intrinsic intrin, params Operand[] args)
+ {
+ return Add(intrin, Local(OperandType.V128), args);
+ }
+
+ public Operand AddIntrinsicInt(Intrinsic intrin, params Operand[] args)
+ {
+ return Add(intrin, Local(OperandType.I32), args);
+ }
+
+ public Operand AddIntrinsicLong(Intrinsic intrin, params Operand[] args)
+ {
+ return Add(intrin, Local(OperandType.I64), args);
+ }
+
+ public void AddIntrinsicNoRet(Intrinsic intrin, params Operand[] args)
+ {
+ Add(intrin, default, args);
+ }
+
+ private Operand Add(Intrinsic intrin, Operand dest, params Operand[] sources)
+ {
+ NewNextBlockIfNeeded();
+
+ Operation operation = Operation.Factory.Operation(intrin, dest, sources);
+
+ _irBlock.Operations.AddLast(operation);
+
+ return dest;
+ }
+
+ private void BranchToLabel(Operand label, bool uncond, BasicBlockFrequency nextFreq)
+ {
+ if (!_irLabels.TryGetValue(label, out BasicBlock branchBlock))
+ {
+ branchBlock = new BasicBlock();
+
+ _irLabels.Add(label, branchBlock);
+ }
+
+ if (uncond)
+ {
+ _irBlock.AddSuccessor(branchBlock);
+ }
+ else
+ {
+ // Defer registration of successor to _irBlock so that the order of successors is correct.
+ _ifBlock = branchBlock;
+ }
+
+ _needsNewBlock = true;
+ _nextBlockFreq = nextFreq;
+ }
+
+ public void MarkLabel(Operand label, BasicBlockFrequency nextFreq = default)
+ {
+ _nextBlockFreq = nextFreq;
+
+ if (_irLabels.TryGetValue(label, out BasicBlock nextBlock))
+ {
+ nextBlock.Index = _irBlocks.Count;
+
+ _irBlocks.AddLast(nextBlock);
+
+ NextBlock(nextBlock);
+ }
+ else
+ {
+ NewNextBlock();
+
+ _irLabels.Add(label, _irBlock);
+ }
+ }
+
+ private void NewNextBlock()
+ {
+ BasicBlock block = new BasicBlock(_irBlocks.Count);
+
+ _irBlocks.AddLast(block);
+
+ NextBlock(block);
+ }
+
+ private void NextBlock(BasicBlock nextBlock)
+ {
+ if (_irBlock?.SuccessorsCount == 0 && !EndsWithUnconditional(_irBlock))
+ {
+ _irBlock.AddSuccessor(nextBlock);
+
+ if (_ifBlock != null)
+ {
+ _irBlock.AddSuccessor(_ifBlock);
+
+ _ifBlock = null;
+ }
+ }
+
+ _irBlock = nextBlock;
+ _irBlock.Frequency = _nextBlockFreq;
+
+ _needsNewBlock = false;
+ _nextBlockFreq = BasicBlockFrequency.Default;
+ }
+
+ private static bool EndsWithUnconditional(BasicBlock block)
+ {
+ Operation last = block.Operations.Last;
+
+ return last != default &&
+ (last.Instruction == Instruction.Return ||
+ last.Instruction == Instruction.Tailcall);
+ }
+
+ public ControlFlowGraph GetControlFlowGraph()
+ {
+ return new ControlFlowGraph(_irBlocks.First, _irBlocks, _localsCount);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Translation/GuestFunction.cs b/src/ARMeilleure/Translation/GuestFunction.cs
new file mode 100644
index 00000000..ac131a0d
--- /dev/null
+++ b/src/ARMeilleure/Translation/GuestFunction.cs
@@ -0,0 +1,6 @@
+using System;
+
+namespace ARMeilleure.Translation
+{
+ delegate ulong GuestFunction(IntPtr nativeContextPtr);
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Translation/IntervalTree.cs b/src/ARMeilleure/Translation/IntervalTree.cs
new file mode 100644
index 00000000..9af01bea
--- /dev/null
+++ b/src/ARMeilleure/Translation/IntervalTree.cs
@@ -0,0 +1,745 @@
+using System;
+using System.Collections.Generic;
+
+namespace ARMeilleure.Translation
+{
+ /// <summary>
+ /// An Augmented Interval Tree based off of the "TreeDictionary"'s Red-Black Tree. Allows fast overlap checking of ranges.
+ /// </summary>
+ /// <typeparam name="K">Key</typeparam>
+ /// <typeparam name="V">Value</typeparam>
+ class IntervalTree<K, V> where K : IComparable<K>
+ {
+ private const int ArrayGrowthSize = 32;
+
+ private const bool Black = true;
+ private const bool Red = false;
+ private IntervalTreeNode<K, V> _root = null;
+ private int _count = 0;
+
+ public int Count => _count;
+
+ #region Public Methods
+
+ /// <summary>
+ /// Gets the values of the interval whose key is <paramref name="key"/>.
+ /// </summary>
+ /// <param name="key">Key of the node value to get</param>
+ /// <param name="value">Value with the given <paramref name="key"/></param>
+ /// <returns>True if the key is on the dictionary, false otherwise</returns>
+ public bool TryGet(K key, out V value)
+ {
+ IntervalTreeNode<K, V> node = GetNode(key);
+
+ if (node == null)
+ {
+ value = default;
+ return false;
+ }
+
+ value = node.Value;
+ return true;
+ }
+
+ /// <summary>
+ /// Returns the start addresses of the intervals whose start and end keys overlap the given range.
+ /// </summary>
+ /// <param name="start">Start of the range</param>
+ /// <param name="end">End of the range</param>
+ /// <param name="overlaps">Overlaps array to place results in</param>
+ /// <param name="overlapCount">Index to start writing results into the array. Defaults to 0</param>
+ /// <returns>Number of intervals found</returns>
+ public int Get(K start, K end, ref K[] overlaps, int overlapCount = 0)
+ {
+ GetKeys(_root, start, end, ref overlaps, ref overlapCount);
+
+ return overlapCount;
+ }
+
+ /// <summary>
+ /// Adds a new interval into the tree whose start is <paramref name="start"/>, end is <paramref name="end"/> and value is <paramref name="value"/>.
+ /// </summary>
+ /// <param name="start">Start of the range to add</param>
+ /// <param name="end">End of the range to insert</param>
+ /// <param name="value">Value to add</param>
+ /// <param name="updateFactoryCallback">Optional factory used to create a new value if <paramref name="start"/> is already on the tree</param>
+ /// <exception cref="ArgumentNullException"><paramref name="value"/> is null</exception>
+ /// <returns>True if the value was added, false if the start key was already in the dictionary</returns>
+ public bool AddOrUpdate(K start, K end, V value, Func<K, V, V> updateFactoryCallback)
+ {
+ ArgumentNullException.ThrowIfNull(value);
+
+ return BSTInsert(start, end, value, updateFactoryCallback, out IntervalTreeNode<K, V> node);
+ }
+
+ /// <summary>
+ /// Gets an existing or adds a new interval into the tree whose start is <paramref name="start"/>, end is <paramref name="end"/> and value is <paramref name="value"/>.
+ /// </summary>
+ /// <param name="start">Start of the range to add</param>
+ /// <param name="end">End of the range to insert</param>
+ /// <param name="value">Value to add</param>
+ /// <exception cref="ArgumentNullException"><paramref name="value"/> is null</exception>
+ /// <returns><paramref name="value"/> if <paramref name="start"/> is not yet on the tree, or the existing value otherwise</returns>
+ public V GetOrAdd(K start, K end, V value)
+ {
+ ArgumentNullException.ThrowIfNull(value);
+
+ BSTInsert(start, end, value, null, out IntervalTreeNode<K, V> node);
+ return node.Value;
+ }
+
+ /// <summary>
+ /// Removes a value from the tree, searching for it with <paramref name="key"/>.
+ /// </summary>
+ /// <param name="key">Key of the node to remove</param>
+ /// <returns>Number of deleted values</returns>
+ public int Remove(K key)
+ {
+ int removed = Delete(key);
+
+ _count -= removed;
+
+ return removed;
+ }
+
+ /// <summary>
+ /// Adds all the nodes in the dictionary into <paramref name="list"/>.
+ /// </summary>
+ /// <returns>A list of all values sorted by Key Order</returns>
+ public List<V> AsList()
+ {
+ List<V> list = new List<V>();
+
+ AddToList(_root, list);
+
+ return list;
+ }
+
+ #endregion
+
+ #region Private Methods (BST)
+
+ /// <summary>
+ /// Adds all values that are children of or contained within <paramref name="node"/> into <paramref name="list"/>, in Key Order.
+ /// </summary>
+ /// <param name="node">The node to search for values within</param>
+ /// <param name="list">The list to add values to</param>
+ private void AddToList(IntervalTreeNode<K, V> node, List<V> list)
+ {
+ if (node == null)
+ {
+ return;
+ }
+
+ AddToList(node.Left, list);
+
+ list.Add(node.Value);
+
+ AddToList(node.Right, list);
+ }
+
+ /// <summary>
+ /// Retrieve the node reference whose key is <paramref name="key"/>, or null if no such node exists.
+ /// </summary>
+ /// <param name="key">Key of the node to get</param>
+ /// <exception cref="ArgumentNullException"><paramref name="key"/> is null</exception>
+ /// <returns>Node reference in the tree</returns>
+ private IntervalTreeNode<K, V> GetNode(K key)
+ {
+ ArgumentNullException.ThrowIfNull(key);
+
+ IntervalTreeNode<K, V> node = _root;
+ while (node != null)
+ {
+ int cmp = key.CompareTo(node.Start);
+ if (cmp < 0)
+ {
+ node = node.Left;
+ }
+ else if (cmp > 0)
+ {
+ node = node.Right;
+ }
+ else
+ {
+ return node;
+ }
+ }
+ return null;
+ }
+
+ /// <summary>
+ /// Retrieve all keys that overlap the given start and end keys.
+ /// </summary>
+ /// <param name="start">Start of the range</param>
+ /// <param name="end">End of the range</param>
+ /// <param name="overlaps">Overlaps array to place results in</param>
+ /// <param name="overlapCount">Overlaps count to update</param>
+ private void GetKeys(IntervalTreeNode<K, V> node, K start, K end, ref K[] overlaps, ref int overlapCount)
+ {
+ if (node == null || start.CompareTo(node.Max) >= 0)
+ {
+ return;
+ }
+
+ GetKeys(node.Left, start, end, ref overlaps, ref overlapCount);
+
+ bool endsOnRight = end.CompareTo(node.Start) > 0;
+ if (endsOnRight)
+ {
+ if (start.CompareTo(node.End) < 0)
+ {
+ if (overlaps.Length >= overlapCount)
+ {
+ Array.Resize(ref overlaps, overlapCount + ArrayGrowthSize);
+ }
+
+ overlaps[overlapCount++] = node.Start;
+ }
+
+ GetKeys(node.Right, start, end, ref overlaps, ref overlapCount);
+ }
+ }
+
+ /// <summary>
+ /// Propagate an increase in max value starting at the given node, heading up the tree.
+ /// This should only be called if the max increases - not for rebalancing or removals.
+ /// </summary>
+ /// <param name="node">The node to start propagating from</param>
+ private void PropagateIncrease(IntervalTreeNode<K, V> node)
+ {
+ K max = node.Max;
+ IntervalTreeNode<K, V> ptr = node;
+
+ while ((ptr = ptr.Parent) != null)
+ {
+ if (max.CompareTo(ptr.Max) > 0)
+ {
+ ptr.Max = max;
+ }
+ else
+ {
+ break;
+ }
+ }
+ }
+
+ /// <summary>
+ /// Propagate recalculating max value starting at the given node, heading up the tree.
+ /// This fully recalculates the max value from all children when there is potential for it to decrease.
+ /// </summary>
+ /// <param name="node">The node to start propagating from</param>
+ private void PropagateFull(IntervalTreeNode<K, V> node)
+ {
+ IntervalTreeNode<K, V> ptr = node;
+
+ do
+ {
+ K max = ptr.End;
+
+ if (ptr.Left != null && ptr.Left.Max.CompareTo(max) > 0)
+ {
+ max = ptr.Left.Max;
+ }
+
+ if (ptr.Right != null && ptr.Right.Max.CompareTo(max) > 0)
+ {
+ max = ptr.Right.Max;
+ }
+
+ ptr.Max = max;
+ } while ((ptr = ptr.Parent) != null);
+ }
+
+ /// <summary>
+ /// Insertion Mechanism for the interval tree. Similar to a BST insert, with the start of the range as the key.
+ /// Iterates the tree starting from the root and inserts a new node where all children in the left subtree are less than <paramref name="start"/>, and all children in the right subtree are greater than <paramref name="start"/>.
+ /// Each node can contain multiple values, and has an end address which is the maximum of all those values.
+ /// Post insertion, the "max" value of the node and all parents are updated.
+ /// </summary>
+ /// <param name="start">Start of the range to insert</param>
+ /// <param name="end">End of the range to insert</param>
+ /// <param name="value">Value to insert</param>
+ /// <param name="updateFactoryCallback">Optional factory used to create a new value if <paramref name="start"/> is already on the tree</param>
+ /// <param name="outNode">Node that was inserted or modified</param>
+ /// <returns>True if <paramref name="start"/> was not yet on the tree, false otherwise</returns>
+ private bool BSTInsert(K start, K end, V value, Func<K, V, V> updateFactoryCallback, out IntervalTreeNode<K, V> outNode)
+ {
+ IntervalTreeNode<K, V> parent = null;
+ IntervalTreeNode<K, V> node = _root;
+
+ while (node != null)
+ {
+ parent = node;
+ int cmp = start.CompareTo(node.Start);
+ if (cmp < 0)
+ {
+ node = node.Left;
+ }
+ else if (cmp > 0)
+ {
+ node = node.Right;
+ }
+ else
+ {
+ outNode = node;
+
+ if (updateFactoryCallback != null)
+ {
+ // Replace
+ node.Value = updateFactoryCallback(start, node.Value);
+
+ int endCmp = end.CompareTo(node.End);
+
+ if (endCmp > 0)
+ {
+ node.End = end;
+ if (end.CompareTo(node.Max) > 0)
+ {
+ node.Max = end;
+ PropagateIncrease(node);
+ RestoreBalanceAfterInsertion(node);
+ }
+ }
+ else if (endCmp < 0)
+ {
+ node.End = end;
+ PropagateFull(node);
+ }
+ }
+
+ return false;
+ }
+ }
+ IntervalTreeNode<K, V> newNode = new IntervalTreeNode<K, V>(start, end, value, parent);
+ if (newNode.Parent == null)
+ {
+ _root = newNode;
+ }
+ else if (start.CompareTo(parent.Start) < 0)
+ {
+ parent.Left = newNode;
+ }
+ else
+ {
+ parent.Right = newNode;
+ }
+
+ PropagateIncrease(newNode);
+ _count++;
+ RestoreBalanceAfterInsertion(newNode);
+ outNode = newNode;
+ return true;
+ }
+
+ /// <summary>
+ /// Removes the value from the dictionary after searching for it with <paramref name="key"/>.
+ /// </summary>
+ /// <param name="key">Key to search for</param>
+ /// <returns>Number of deleted values</returns>
+ private int Delete(K key)
+ {
+ IntervalTreeNode<K, V> nodeToDelete = GetNode(key);
+
+ if (nodeToDelete == null)
+ {
+ return 0;
+ }
+
+ IntervalTreeNode<K, V> replacementNode;
+
+ if (LeftOf(nodeToDelete) == null || RightOf(nodeToDelete) == null)
+ {
+ replacementNode = nodeToDelete;
+ }
+ else
+ {
+ replacementNode = PredecessorOf(nodeToDelete);
+ }
+
+ IntervalTreeNode<K, V> tmp = LeftOf(replacementNode) ?? RightOf(replacementNode);
+
+ if (tmp != null)
+ {
+ tmp.Parent = ParentOf(replacementNode);
+ }
+
+ if (ParentOf(replacementNode) == null)
+ {
+ _root = tmp;
+ }
+ else if (replacementNode == LeftOf(ParentOf(replacementNode)))
+ {
+ ParentOf(replacementNode).Left = tmp;
+ }
+ else
+ {
+ ParentOf(replacementNode).Right = tmp;
+ }
+
+ if (replacementNode != nodeToDelete)
+ {
+ nodeToDelete.Start = replacementNode.Start;
+ nodeToDelete.Value = replacementNode.Value;
+ nodeToDelete.End = replacementNode.End;
+ nodeToDelete.Max = replacementNode.Max;
+ }
+
+ PropagateFull(replacementNode);
+
+ if (tmp != null && ColorOf(replacementNode) == Black)
+ {
+ RestoreBalanceAfterRemoval(tmp);
+ }
+
+ return 1;
+ }
+
+ /// <summary>
+ /// Returns the node with the largest key where <paramref name="node"/> is considered the root node.
+ /// </summary>
+ /// <param name="node">Root Node</param>
+ /// <returns>Node with the maximum key in the tree of <paramref name="node"/></returns>
+ private static IntervalTreeNode<K, V> Maximum(IntervalTreeNode<K, V> node)
+ {
+ IntervalTreeNode<K, V> tmp = node;
+ while (tmp.Right != null)
+ {
+ tmp = tmp.Right;
+ }
+
+ return tmp;
+ }
+
+ /// <summary>
+ /// Finds the node whose key is immediately less than <paramref name="node"/>.
+ /// </summary>
+ /// <param name="node">Node to find the predecessor of</param>
+ /// <returns>Predecessor of <paramref name="node"/></returns>
+ private static IntervalTreeNode<K, V> PredecessorOf(IntervalTreeNode<K, V> node)
+ {
+ if (node.Left != null)
+ {
+ return Maximum(node.Left);
+ }
+ IntervalTreeNode<K, V> parent = node.Parent;
+ while (parent != null && node == parent.Left)
+ {
+ node = parent;
+ parent = parent.Parent;
+ }
+ return parent;
+ }
+
+ #endregion
+
+ #region Private Methods (RBL)
+
+ private void RestoreBalanceAfterRemoval(IntervalTreeNode<K, V> balanceNode)
+ {
+ IntervalTreeNode<K, V> ptr = balanceNode;
+
+ while (ptr != _root && ColorOf(ptr) == Black)
+ {
+ if (ptr == LeftOf(ParentOf(ptr)))
+ {
+ IntervalTreeNode<K, V> sibling = RightOf(ParentOf(ptr));
+
+ if (ColorOf(sibling) == Red)
+ {
+ SetColor(sibling, Black);
+ SetColor(ParentOf(ptr), Red);
+ RotateLeft(ParentOf(ptr));
+ sibling = RightOf(ParentOf(ptr));
+ }
+ if (ColorOf(LeftOf(sibling)) == Black && ColorOf(RightOf(sibling)) == Black)
+ {
+ SetColor(sibling, Red);
+ ptr = ParentOf(ptr);
+ }
+ else
+ {
+ if (ColorOf(RightOf(sibling)) == Black)
+ {
+ SetColor(LeftOf(sibling), Black);
+ SetColor(sibling, Red);
+ RotateRight(sibling);
+ sibling = RightOf(ParentOf(ptr));
+ }
+ SetColor(sibling, ColorOf(ParentOf(ptr)));
+ SetColor(ParentOf(ptr), Black);
+ SetColor(RightOf(sibling), Black);
+ RotateLeft(ParentOf(ptr));
+ ptr = _root;
+ }
+ }
+ else
+ {
+ IntervalTreeNode<K, V> sibling = LeftOf(ParentOf(ptr));
+
+ if (ColorOf(sibling) == Red)
+ {
+ SetColor(sibling, Black);
+ SetColor(ParentOf(ptr), Red);
+ RotateRight(ParentOf(ptr));
+ sibling = LeftOf(ParentOf(ptr));
+ }
+ if (ColorOf(RightOf(sibling)) == Black && ColorOf(LeftOf(sibling)) == Black)
+ {
+ SetColor(sibling, Red);
+ ptr = ParentOf(ptr);
+ }
+ else
+ {
+ if (ColorOf(LeftOf(sibling)) == Black)
+ {
+ SetColor(RightOf(sibling), Black);
+ SetColor(sibling, Red);
+ RotateLeft(sibling);
+ sibling = LeftOf(ParentOf(ptr));
+ }
+ SetColor(sibling, ColorOf(ParentOf(ptr)));
+ SetColor(ParentOf(ptr), Black);
+ SetColor(LeftOf(sibling), Black);
+ RotateRight(ParentOf(ptr));
+ ptr = _root;
+ }
+ }
+ }
+ SetColor(ptr, Black);
+ }
+
+ private void RestoreBalanceAfterInsertion(IntervalTreeNode<K, V> balanceNode)
+ {
+ SetColor(balanceNode, Red);
+ while (balanceNode != null && balanceNode != _root && ColorOf(ParentOf(balanceNode)) == Red)
+ {
+ if (ParentOf(balanceNode) == LeftOf(ParentOf(ParentOf(balanceNode))))
+ {
+ IntervalTreeNode<K, V> sibling = RightOf(ParentOf(ParentOf(balanceNode)));
+
+ if (ColorOf(sibling) == Red)
+ {
+ SetColor(ParentOf(balanceNode), Black);
+ SetColor(sibling, Black);
+ SetColor(ParentOf(ParentOf(balanceNode)), Red);
+ balanceNode = ParentOf(ParentOf(balanceNode));
+ }
+ else
+ {
+ if (balanceNode == RightOf(ParentOf(balanceNode)))
+ {
+ balanceNode = ParentOf(balanceNode);
+ RotateLeft(balanceNode);
+ }
+ SetColor(ParentOf(balanceNode), Black);
+ SetColor(ParentOf(ParentOf(balanceNode)), Red);
+ RotateRight(ParentOf(ParentOf(balanceNode)));
+ }
+ }
+ else
+ {
+ IntervalTreeNode<K, V> sibling = LeftOf(ParentOf(ParentOf(balanceNode)));
+
+ if (ColorOf(sibling) == Red)
+ {
+ SetColor(ParentOf(balanceNode), Black);
+ SetColor(sibling, Black);
+ SetColor(ParentOf(ParentOf(balanceNode)), Red);
+ balanceNode = ParentOf(ParentOf(balanceNode));
+ }
+ else
+ {
+ if (balanceNode == LeftOf(ParentOf(balanceNode)))
+ {
+ balanceNode = ParentOf(balanceNode);
+ RotateRight(balanceNode);
+ }
+ SetColor(ParentOf(balanceNode), Black);
+ SetColor(ParentOf(ParentOf(balanceNode)), Red);
+ RotateLeft(ParentOf(ParentOf(balanceNode)));
+ }
+ }
+ }
+ SetColor(_root, Black);
+ }
+
+ private void RotateLeft(IntervalTreeNode<K, V> node)
+ {
+ if (node != null)
+ {
+ IntervalTreeNode<K, V> right = RightOf(node);
+ node.Right = LeftOf(right);
+ if (node.Right != null)
+ {
+ node.Right.Parent = node;
+ }
+ IntervalTreeNode<K, V> nodeParent = ParentOf(node);
+ right.Parent = nodeParent;
+ if (nodeParent == null)
+ {
+ _root = right;
+ }
+ else if (node == LeftOf(nodeParent))
+ {
+ nodeParent.Left = right;
+ }
+ else
+ {
+ nodeParent.Right = right;
+ }
+ right.Left = node;
+ node.Parent = right;
+
+ PropagateFull(node);
+ }
+ }
+
+ private void RotateRight(IntervalTreeNode<K, V> node)
+ {
+ if (node != null)
+ {
+ IntervalTreeNode<K, V> left = LeftOf(node);
+ node.Left = RightOf(left);
+ if (node.Left != null)
+ {
+ node.Left.Parent = node;
+ }
+ IntervalTreeNode<K, V> nodeParent = ParentOf(node);
+ left.Parent = nodeParent;
+ if (nodeParent == null)
+ {
+ _root = left;
+ }
+ else if (node == RightOf(nodeParent))
+ {
+ nodeParent.Right = left;
+ }
+ else
+ {
+ nodeParent.Left = left;
+ }
+ left.Right = node;
+ node.Parent = left;
+
+ PropagateFull(node);
+ }
+ }
+
+ #endregion
+
+ #region Safety-Methods
+
+ // These methods save memory by allowing us to forego sentinel nil nodes, as well as serve as protection against NullReferenceExceptions.
+
+ /// <summary>
+ /// Returns the color of <paramref name="node"/>, or Black if it is null.
+ /// </summary>
+ /// <param name="node">Node</param>
+ /// <returns>The boolean color of <paramref name="node"/>, or black if null</returns>
+ private static bool ColorOf(IntervalTreeNode<K, V> node)
+ {
+ return node == null || node.Color;
+ }
+
+ /// <summary>
+ /// Sets the color of <paramref name="node"/> node to <paramref name="color"/>.
+ /// <br></br>
+ /// This method does nothing if <paramref name="node"/> is null.
+ /// </summary>
+ /// <param name="node">Node to set the color of</param>
+ /// <param name="color">Color (Boolean)</param>
+ private static void SetColor(IntervalTreeNode<K, V> node, bool color)
+ {
+ if (node != null)
+ {
+ node.Color = color;
+ }
+ }
+
+ /// <summary>
+ /// This method returns the left node of <paramref name="node"/>, or null if <paramref name="node"/> is null.
+ /// </summary>
+ /// <param name="node">Node to retrieve the left child from</param>
+ /// <returns>Left child of <paramref name="node"/></returns>
+ private static IntervalTreeNode<K, V> LeftOf(IntervalTreeNode<K, V> node)
+ {
+ return node?.Left;
+ }
+
+ /// <summary>
+ /// This method returns the right node of <paramref name="node"/>, or null if <paramref name="node"/> is null.
+ /// </summary>
+ /// <param name="node">Node to retrieve the right child from</param>
+ /// <returns>Right child of <paramref name="node"/></returns>
+ private static IntervalTreeNode<K, V> RightOf(IntervalTreeNode<K, V> node)
+ {
+ return node?.Right;
+ }
+
+ /// <summary>
+ /// Returns the parent node of <paramref name="node"/>, or null if <paramref name="node"/> is null.
+ /// </summary>
+ /// <param name="node">Node to retrieve the parent from</param>
+ /// <returns>Parent of <paramref name="node"/></returns>
+ private static IntervalTreeNode<K, V> ParentOf(IntervalTreeNode<K, V> node)
+ {
+ return node?.Parent;
+ }
+
+ #endregion
+
+ public bool ContainsKey(K key)
+ {
+ return GetNode(key) != null;
+ }
+
+ public void Clear()
+ {
+ _root = null;
+ _count = 0;
+ }
+ }
+
+ /// <summary>
+ /// Represents a node in the IntervalTree which contains start and end keys of type K, and a value of generic type V.
+ /// </summary>
+ /// <typeparam name="K">Key type of the node</typeparam>
+ /// <typeparam name="V">Value type of the node</typeparam>
+ class IntervalTreeNode<K, V>
+ {
+ public bool Color = true;
+ public IntervalTreeNode<K, V> Left = null;
+ public IntervalTreeNode<K, V> Right = null;
+ public IntervalTreeNode<K, V> Parent = null;
+
+ /// <summary>
+ /// The start of the range.
+ /// </summary>
+ public K Start;
+
+ /// <summary>
+ /// The end of the range.
+ /// </summary>
+ public K End;
+
+ /// <summary>
+ /// The maximum end value of this node and all its children.
+ /// </summary>
+ public K Max;
+
+ /// <summary>
+ /// Value stored on this node.
+ /// </summary>
+ public V Value;
+
+ public IntervalTreeNode(K start, K end, V value, IntervalTreeNode<K, V> parent)
+ {
+ Start = start;
+ End = end;
+ Max = end;
+ Value = value;
+ Parent = parent;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Translation/PTC/EncodingCache.cs b/src/ARMeilleure/Translation/PTC/EncodingCache.cs
new file mode 100644
index 00000000..90d40c47
--- /dev/null
+++ b/src/ARMeilleure/Translation/PTC/EncodingCache.cs
@@ -0,0 +1,9 @@
+using System.Text;
+
+namespace ARMeilleure.Translation.PTC
+{
+ static class EncodingCache
+ {
+ public static readonly Encoding UTF8NoBOM = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false, throwOnInvalidBytes: true);
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Translation/PTC/IPtcLoadState.cs b/src/ARMeilleure/Translation/PTC/IPtcLoadState.cs
new file mode 100644
index 00000000..1b11ac0b
--- /dev/null
+++ b/src/ARMeilleure/Translation/PTC/IPtcLoadState.cs
@@ -0,0 +1,10 @@
+using System;
+
+namespace ARMeilleure.Translation.PTC
+{
+ public interface IPtcLoadState
+ {
+ event Action<PtcLoadingState, int, int> PtcStateChanged;
+ void Continue();
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Translation/PTC/Ptc.cs b/src/ARMeilleure/Translation/PTC/Ptc.cs
new file mode 100644
index 00000000..ea4e715b
--- /dev/null
+++ b/src/ARMeilleure/Translation/PTC/Ptc.cs
@@ -0,0 +1,1131 @@
+using ARMeilleure.CodeGen;
+using ARMeilleure.CodeGen.Linking;
+using ARMeilleure.CodeGen.Unwinding;
+using ARMeilleure.Common;
+using ARMeilleure.Memory;
+using Ryujinx.Common;
+using Ryujinx.Common.Configuration;
+using Ryujinx.Common.Logging;
+using Ryujinx.Common.Memory;
+using System;
+using System.Buffers.Binary;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+using System.IO.Compression;
+using System.Runtime;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Threading;
+
+using static ARMeilleure.Translation.PTC.PtcFormatter;
+
+namespace ARMeilleure.Translation.PTC
+{
+ using Arm64HardwareCapabilities = ARMeilleure.CodeGen.Arm64.HardwareCapabilities;
+ using X86HardwareCapabilities = ARMeilleure.CodeGen.X86.HardwareCapabilities;
+
+ class Ptc : IPtcLoadState
+ {
+ private const string OuterHeaderMagicString = "PTCohd\0\0";
+ private const string InnerHeaderMagicString = "PTCihd\0\0";
+
+ private const uint InternalVersion = 4661; //! To be incremented manually for each change to the ARMeilleure project.
+
+ private const string ActualDir = "0";
+ private const string BackupDir = "1";
+
+ private const string TitleIdTextDefault = "0000000000000000";
+ private const string DisplayVersionDefault = "0";
+
+ public static readonly Symbol PageTableSymbol = new(SymbolType.Special, 1);
+ public static readonly Symbol CountTableSymbol = new(SymbolType.Special, 2);
+ public static readonly Symbol DispatchStubSymbol = new(SymbolType.Special, 3);
+
+ private const byte FillingByte = 0x00;
+ private const CompressionLevel SaveCompressionLevel = CompressionLevel.Fastest;
+
+ public PtcProfiler Profiler { get; }
+
+ // Carriers.
+ private MemoryStream _infosStream;
+ private List<byte[]> _codesList;
+ private MemoryStream _relocsStream;
+ private MemoryStream _unwindInfosStream;
+
+ private readonly ulong _outerHeaderMagic;
+ private readonly ulong _innerHeaderMagic;
+
+ private readonly ManualResetEvent _waitEvent;
+
+ private readonly object _lock;
+
+ private bool _disposed;
+
+ public string TitleIdText { get; private set; }
+ public string DisplayVersion { get; private set; }
+
+ private MemoryManagerType _memoryMode;
+
+ public string CachePathActual { get; private set; }
+ public string CachePathBackup { get; private set; }
+
+ public PtcState State { get; private set; }
+
+ // Progress reporting helpers.
+ private volatile int _translateCount;
+ private volatile int _translateTotalCount;
+ public event Action<PtcLoadingState, int, int> PtcStateChanged;
+
+ public Ptc()
+ {
+ Profiler = new PtcProfiler(this);
+
+ InitializeCarriers();
+
+ _outerHeaderMagic = BinaryPrimitives.ReadUInt64LittleEndian(EncodingCache.UTF8NoBOM.GetBytes(OuterHeaderMagicString).AsSpan());
+ _innerHeaderMagic = BinaryPrimitives.ReadUInt64LittleEndian(EncodingCache.UTF8NoBOM.GetBytes(InnerHeaderMagicString).AsSpan());
+
+ _waitEvent = new ManualResetEvent(true);
+
+ _lock = new object();
+
+ _disposed = false;
+
+ TitleIdText = TitleIdTextDefault;
+ DisplayVersion = DisplayVersionDefault;
+
+ CachePathActual = string.Empty;
+ CachePathBackup = string.Empty;
+
+ Disable();
+ }
+
+ public void Initialize(string titleIdText, string displayVersion, bool enabled, MemoryManagerType memoryMode)
+ {
+ Wait();
+
+ Profiler.Wait();
+ Profiler.ClearEntries();
+
+ Logger.Info?.Print(LogClass.Ptc, $"Initializing Profiled Persistent Translation Cache (enabled: {enabled}).");
+
+ if (!enabled || string.IsNullOrEmpty(titleIdText) || titleIdText == TitleIdTextDefault)
+ {
+ TitleIdText = TitleIdTextDefault;
+ DisplayVersion = DisplayVersionDefault;
+
+ CachePathActual = string.Empty;
+ CachePathBackup = string.Empty;
+
+ Disable();
+
+ return;
+ }
+
+ TitleIdText = titleIdText;
+ DisplayVersion = !string.IsNullOrEmpty(displayVersion) ? displayVersion : DisplayVersionDefault;
+ _memoryMode = memoryMode;
+
+ string workPathActual = Path.Combine(AppDataManager.GamesDirPath, TitleIdText, "cache", "cpu", ActualDir);
+ string workPathBackup = Path.Combine(AppDataManager.GamesDirPath, TitleIdText, "cache", "cpu", BackupDir);
+
+ if (!Directory.Exists(workPathActual))
+ {
+ Directory.CreateDirectory(workPathActual);
+ }
+
+ if (!Directory.Exists(workPathBackup))
+ {
+ Directory.CreateDirectory(workPathBackup);
+ }
+
+ CachePathActual = Path.Combine(workPathActual, DisplayVersion);
+ CachePathBackup = Path.Combine(workPathBackup, DisplayVersion);
+
+ PreLoad();
+ Profiler.PreLoad();
+
+ Enable();
+ }
+
+ private void InitializeCarriers()
+ {
+ _infosStream = MemoryStreamManager.Shared.GetStream();
+ _codesList = new List<byte[]>();
+ _relocsStream = MemoryStreamManager.Shared.GetStream();
+ _unwindInfosStream = MemoryStreamManager.Shared.GetStream();
+ }
+
+ private void DisposeCarriers()
+ {
+ _infosStream.Dispose();
+ _codesList.Clear();
+ _relocsStream.Dispose();
+ _unwindInfosStream.Dispose();
+ }
+
+ private bool AreCarriersEmpty()
+ {
+ return _infosStream.Length == 0L && _codesList.Count == 0 && _relocsStream.Length == 0L && _unwindInfosStream.Length == 0L;
+ }
+
+ private void ResetCarriersIfNeeded()
+ {
+ if (AreCarriersEmpty())
+ {
+ return;
+ }
+
+ DisposeCarriers();
+
+ InitializeCarriers();
+ }
+
+ private void PreLoad()
+ {
+ string fileNameActual = $"{CachePathActual}.cache";
+ string fileNameBackup = $"{CachePathBackup}.cache";
+
+ FileInfo fileInfoActual = new FileInfo(fileNameActual);
+ FileInfo fileInfoBackup = new FileInfo(fileNameBackup);
+
+ if (fileInfoActual.Exists && fileInfoActual.Length != 0L)
+ {
+ if (!Load(fileNameActual, false))
+ {
+ if (fileInfoBackup.Exists && fileInfoBackup.Length != 0L)
+ {
+ Load(fileNameBackup, true);
+ }
+ }
+ }
+ else if (fileInfoBackup.Exists && fileInfoBackup.Length != 0L)
+ {
+ Load(fileNameBackup, true);
+ }
+ }
+
+ private unsafe bool Load(string fileName, bool isBackup)
+ {
+ using (FileStream compressedStream = new(fileName, FileMode.Open))
+ using (DeflateStream deflateStream = new(compressedStream, CompressionMode.Decompress, true))
+ {
+ OuterHeader outerHeader = DeserializeStructure<OuterHeader>(compressedStream);
+
+ if (!outerHeader.IsHeaderValid())
+ {
+ InvalidateCompressedStream(compressedStream);
+
+ return false;
+ }
+
+ if (outerHeader.Magic != _outerHeaderMagic)
+ {
+ InvalidateCompressedStream(compressedStream);
+
+ return false;
+ }
+
+ if (outerHeader.CacheFileVersion != InternalVersion)
+ {
+ InvalidateCompressedStream(compressedStream);
+
+ return false;
+ }
+
+ if (outerHeader.Endianness != GetEndianness())
+ {
+ InvalidateCompressedStream(compressedStream);
+
+ return false;
+ }
+
+ if (outerHeader.FeatureInfo != GetFeatureInfo())
+ {
+ InvalidateCompressedStream(compressedStream);
+
+ return false;
+ }
+
+ if (outerHeader.MemoryManagerMode != GetMemoryManagerMode())
+ {
+ InvalidateCompressedStream(compressedStream);
+
+ return false;
+ }
+
+ if (outerHeader.OSPlatform != GetOSPlatform())
+ {
+ InvalidateCompressedStream(compressedStream);
+
+ return false;
+ }
+
+ if (outerHeader.Architecture != (uint)RuntimeInformation.ProcessArchitecture)
+ {
+ InvalidateCompressedStream(compressedStream);
+
+ return false;
+ }
+
+ IntPtr intPtr = IntPtr.Zero;
+
+ try
+ {
+ intPtr = Marshal.AllocHGlobal(new IntPtr(outerHeader.UncompressedStreamSize));
+
+ using (UnmanagedMemoryStream stream = new((byte*)intPtr.ToPointer(), outerHeader.UncompressedStreamSize, outerHeader.UncompressedStreamSize, FileAccess.ReadWrite))
+ {
+ try
+ {
+ deflateStream.CopyTo(stream);
+ }
+ catch
+ {
+ InvalidateCompressedStream(compressedStream);
+
+ return false;
+ }
+
+ Debug.Assert(stream.Position == stream.Length);
+
+ stream.Seek(0L, SeekOrigin.Begin);
+
+ InnerHeader innerHeader = DeserializeStructure<InnerHeader>(stream);
+
+ if (!innerHeader.IsHeaderValid())
+ {
+ InvalidateCompressedStream(compressedStream);
+
+ return false;
+ }
+
+ if (innerHeader.Magic != _innerHeaderMagic)
+ {
+ InvalidateCompressedStream(compressedStream);
+
+ return false;
+ }
+
+ ReadOnlySpan<byte> infosBytes = new(stream.PositionPointer, innerHeader.InfosLength);
+ stream.Seek(innerHeader.InfosLength, SeekOrigin.Current);
+
+ Hash128 infosHash = XXHash128.ComputeHash(infosBytes);
+
+ if (innerHeader.InfosHash != infosHash)
+ {
+ InvalidateCompressedStream(compressedStream);
+
+ return false;
+ }
+
+ ReadOnlySpan<byte> codesBytes = (int)innerHeader.CodesLength > 0 ? new(stream.PositionPointer, (int)innerHeader.CodesLength) : ReadOnlySpan<byte>.Empty;
+ stream.Seek(innerHeader.CodesLength, SeekOrigin.Current);
+
+ Hash128 codesHash = XXHash128.ComputeHash(codesBytes);
+
+ if (innerHeader.CodesHash != codesHash)
+ {
+ InvalidateCompressedStream(compressedStream);
+
+ return false;
+ }
+
+ ReadOnlySpan<byte> relocsBytes = new(stream.PositionPointer, innerHeader.RelocsLength);
+ stream.Seek(innerHeader.RelocsLength, SeekOrigin.Current);
+
+ Hash128 relocsHash = XXHash128.ComputeHash(relocsBytes);
+
+ if (innerHeader.RelocsHash != relocsHash)
+ {
+ InvalidateCompressedStream(compressedStream);
+
+ return false;
+ }
+
+ ReadOnlySpan<byte> unwindInfosBytes = new(stream.PositionPointer, innerHeader.UnwindInfosLength);
+ stream.Seek(innerHeader.UnwindInfosLength, SeekOrigin.Current);
+
+ Hash128 unwindInfosHash = XXHash128.ComputeHash(unwindInfosBytes);
+
+ if (innerHeader.UnwindInfosHash != unwindInfosHash)
+ {
+ InvalidateCompressedStream(compressedStream);
+
+ return false;
+ }
+
+ Debug.Assert(stream.Position == stream.Length);
+
+ stream.Seek((long)Unsafe.SizeOf<InnerHeader>(), SeekOrigin.Begin);
+
+ _infosStream.Write(infosBytes);
+ stream.Seek(innerHeader.InfosLength, SeekOrigin.Current);
+
+ _codesList.ReadFrom(stream);
+
+ _relocsStream.Write(relocsBytes);
+ stream.Seek(innerHeader.RelocsLength, SeekOrigin.Current);
+
+ _unwindInfosStream.Write(unwindInfosBytes);
+ stream.Seek(innerHeader.UnwindInfosLength, SeekOrigin.Current);
+
+ Debug.Assert(stream.Position == stream.Length);
+ }
+ }
+ finally
+ {
+ if (intPtr != IntPtr.Zero)
+ {
+ Marshal.FreeHGlobal(intPtr);
+ }
+ }
+ }
+
+ long fileSize = new FileInfo(fileName).Length;
+
+ Logger.Info?.Print(LogClass.Ptc, $"{(isBackup ? "Loaded Backup Translation Cache" : "Loaded Translation Cache")} (size: {fileSize} bytes, translated functions: {GetEntriesCount()}).");
+
+ return true;
+ }
+
+ private void InvalidateCompressedStream(FileStream compressedStream)
+ {
+ compressedStream.SetLength(0L);
+ }
+
+ private void PreSave()
+ {
+ _waitEvent.Reset();
+
+ try
+ {
+ string fileNameActual = $"{CachePathActual}.cache";
+ string fileNameBackup = $"{CachePathBackup}.cache";
+
+ FileInfo fileInfoActual = new FileInfo(fileNameActual);
+
+ if (fileInfoActual.Exists && fileInfoActual.Length != 0L)
+ {
+ File.Copy(fileNameActual, fileNameBackup, true);
+ }
+
+ Save(fileNameActual);
+ }
+ finally
+ {
+ ResetCarriersIfNeeded();
+
+ GCSettings.LargeObjectHeapCompactionMode = GCLargeObjectHeapCompactionMode.CompactOnce;
+ }
+
+ _waitEvent.Set();
+ }
+
+ private unsafe void Save(string fileName)
+ {
+ int translatedFuncsCount;
+
+ InnerHeader innerHeader = new InnerHeader();
+
+ innerHeader.Magic = _innerHeaderMagic;
+
+ innerHeader.InfosLength = (int)_infosStream.Length;
+ innerHeader.CodesLength = _codesList.Length();
+ innerHeader.RelocsLength = (int)_relocsStream.Length;
+ innerHeader.UnwindInfosLength = (int)_unwindInfosStream.Length;
+
+ OuterHeader outerHeader = new OuterHeader();
+
+ outerHeader.Magic = _outerHeaderMagic;
+
+ outerHeader.CacheFileVersion = InternalVersion;
+ outerHeader.Endianness = GetEndianness();
+ outerHeader.FeatureInfo = GetFeatureInfo();
+ outerHeader.MemoryManagerMode = GetMemoryManagerMode();
+ outerHeader.OSPlatform = GetOSPlatform();
+ outerHeader.Architecture = (uint)RuntimeInformation.ProcessArchitecture;
+
+ outerHeader.UncompressedStreamSize =
+ (long)Unsafe.SizeOf<InnerHeader>() +
+ innerHeader.InfosLength +
+ innerHeader.CodesLength +
+ innerHeader.RelocsLength +
+ innerHeader.UnwindInfosLength;
+
+ outerHeader.SetHeaderHash();
+
+ IntPtr intPtr = IntPtr.Zero;
+
+ try
+ {
+ intPtr = Marshal.AllocHGlobal(new IntPtr(outerHeader.UncompressedStreamSize));
+
+ using (UnmanagedMemoryStream stream = new((byte*)intPtr.ToPointer(), outerHeader.UncompressedStreamSize, outerHeader.UncompressedStreamSize, FileAccess.ReadWrite))
+ {
+ stream.Seek((long)Unsafe.SizeOf<InnerHeader>(), SeekOrigin.Begin);
+
+ ReadOnlySpan<byte> infosBytes = new(stream.PositionPointer, innerHeader.InfosLength);
+ _infosStream.WriteTo(stream);
+
+ ReadOnlySpan<byte> codesBytes = (int)innerHeader.CodesLength > 0 ? new(stream.PositionPointer, (int)innerHeader.CodesLength) : ReadOnlySpan<byte>.Empty;
+ _codesList.WriteTo(stream);
+
+ ReadOnlySpan<byte> relocsBytes = new(stream.PositionPointer, innerHeader.RelocsLength);
+ _relocsStream.WriteTo(stream);
+
+ ReadOnlySpan<byte> unwindInfosBytes = new(stream.PositionPointer, innerHeader.UnwindInfosLength);
+ _unwindInfosStream.WriteTo(stream);
+
+ Debug.Assert(stream.Position == stream.Length);
+
+ innerHeader.InfosHash = XXHash128.ComputeHash(infosBytes);
+ innerHeader.CodesHash = XXHash128.ComputeHash(codesBytes);
+ innerHeader.RelocsHash = XXHash128.ComputeHash(relocsBytes);
+ innerHeader.UnwindInfosHash = XXHash128.ComputeHash(unwindInfosBytes);
+
+ innerHeader.SetHeaderHash();
+
+ stream.Seek(0L, SeekOrigin.Begin);
+ SerializeStructure(stream, innerHeader);
+
+ translatedFuncsCount = GetEntriesCount();
+
+ ResetCarriersIfNeeded();
+
+ using (FileStream compressedStream = new(fileName, FileMode.OpenOrCreate))
+ using (DeflateStream deflateStream = new(compressedStream, SaveCompressionLevel, true))
+ {
+ try
+ {
+ SerializeStructure(compressedStream, outerHeader);
+
+ stream.Seek(0L, SeekOrigin.Begin);
+ stream.CopyTo(deflateStream);
+ }
+ catch
+ {
+ compressedStream.Position = 0L;
+ }
+
+ if (compressedStream.Position < compressedStream.Length)
+ {
+ compressedStream.SetLength(compressedStream.Position);
+ }
+ }
+ }
+ }
+ finally
+ {
+ if (intPtr != IntPtr.Zero)
+ {
+ Marshal.FreeHGlobal(intPtr);
+ }
+ }
+
+ long fileSize = new FileInfo(fileName).Length;
+
+ if (fileSize != 0L)
+ {
+ Logger.Info?.Print(LogClass.Ptc, $"Saved Translation Cache (size: {fileSize} bytes, translated functions: {translatedFuncsCount}).");
+ }
+ }
+
+ public void LoadTranslations(Translator translator)
+ {
+ if (AreCarriersEmpty())
+ {
+ return;
+ }
+
+ long infosStreamLength = _infosStream.Length;
+ long relocsStreamLength = _relocsStream.Length;
+ long unwindInfosStreamLength = _unwindInfosStream.Length;
+
+ _infosStream.Seek(0L, SeekOrigin.Begin);
+ _relocsStream.Seek(0L, SeekOrigin.Begin);
+ _unwindInfosStream.Seek(0L, SeekOrigin.Begin);
+
+ using (BinaryReader relocsReader = new(_relocsStream, EncodingCache.UTF8NoBOM, true))
+ using (BinaryReader unwindInfosReader = new(_unwindInfosStream, EncodingCache.UTF8NoBOM, true))
+ {
+ for (int index = 0; index < GetEntriesCount(); index++)
+ {
+ InfoEntry infoEntry = DeserializeStructure<InfoEntry>(_infosStream);
+
+ if (infoEntry.Stubbed)
+ {
+ SkipCode(index, infoEntry.CodeLength);
+ SkipReloc(infoEntry.RelocEntriesCount);
+ SkipUnwindInfo(unwindInfosReader);
+
+ continue;
+ }
+
+ bool isEntryChanged = infoEntry.Hash != ComputeHash(translator.Memory, infoEntry.Address, infoEntry.GuestSize);
+
+ if (isEntryChanged || (!infoEntry.HighCq && Profiler.ProfiledFuncs.TryGetValue(infoEntry.Address, out var value) && value.HighCq))
+ {
+ infoEntry.Stubbed = true;
+ infoEntry.CodeLength = 0;
+ UpdateInfo(infoEntry);
+
+ StubCode(index);
+ StubReloc(infoEntry.RelocEntriesCount);
+ StubUnwindInfo(unwindInfosReader);
+
+ if (isEntryChanged)
+ {
+ Logger.Info?.Print(LogClass.Ptc, $"Invalidated translated function (address: 0x{infoEntry.Address:X16})");
+ }
+
+ continue;
+ }
+
+ byte[] code = ReadCode(index, infoEntry.CodeLength);
+
+ Counter<uint> callCounter = null;
+
+ if (infoEntry.RelocEntriesCount != 0)
+ {
+ RelocEntry[] relocEntries = GetRelocEntries(relocsReader, infoEntry.RelocEntriesCount);
+
+ PatchCode(translator, code, relocEntries, out callCounter);
+ }
+
+ UnwindInfo unwindInfo = ReadUnwindInfo(unwindInfosReader);
+
+ TranslatedFunction func = FastTranslate(code, callCounter, infoEntry.GuestSize, unwindInfo, infoEntry.HighCq);
+
+ translator.RegisterFunction(infoEntry.Address, func);
+
+ bool isAddressUnique = translator.Functions.TryAdd(infoEntry.Address, infoEntry.GuestSize, func);
+
+ Debug.Assert(isAddressUnique, $"The address 0x{infoEntry.Address:X16} is not unique.");
+ }
+ }
+
+ if (_infosStream.Length != infosStreamLength || _infosStream.Position != infosStreamLength ||
+ _relocsStream.Length != relocsStreamLength || _relocsStream.Position != relocsStreamLength ||
+ _unwindInfosStream.Length != unwindInfosStreamLength || _unwindInfosStream.Position != unwindInfosStreamLength)
+ {
+ throw new Exception("The length of a memory stream has changed, or its position has not reached or has exceeded its end.");
+ }
+
+ Logger.Info?.Print(LogClass.Ptc, $"{translator.Functions.Count} translated functions loaded");
+ }
+
+ private int GetEntriesCount()
+ {
+ return _codesList.Count;
+ }
+
+ [Conditional("DEBUG")]
+ private void SkipCode(int index, int codeLength)
+ {
+ Debug.Assert(_codesList[index].Length == 0);
+ Debug.Assert(codeLength == 0);
+ }
+
+ private void SkipReloc(int relocEntriesCount)
+ {
+ _relocsStream.Seek(relocEntriesCount * RelocEntry.Stride, SeekOrigin.Current);
+ }
+
+ private void SkipUnwindInfo(BinaryReader unwindInfosReader)
+ {
+ int pushEntriesLength = unwindInfosReader.ReadInt32();
+
+ _unwindInfosStream.Seek(pushEntriesLength * UnwindPushEntry.Stride + UnwindInfo.Stride, SeekOrigin.Current);
+ }
+
+ private byte[] ReadCode(int index, int codeLength)
+ {
+ Debug.Assert(_codesList[index].Length == codeLength);
+
+ return _codesList[index];
+ }
+
+ private RelocEntry[] GetRelocEntries(BinaryReader relocsReader, int relocEntriesCount)
+ {
+ RelocEntry[] relocEntries = new RelocEntry[relocEntriesCount];
+
+ for (int i = 0; i < relocEntriesCount; i++)
+ {
+ int position = relocsReader.ReadInt32();
+ SymbolType type = (SymbolType)relocsReader.ReadByte();
+ ulong value = relocsReader.ReadUInt64();
+
+ relocEntries[i] = new RelocEntry(position, new Symbol(type, value));
+ }
+
+ return relocEntries;
+ }
+
+ private void PatchCode(Translator translator, Span<byte> code, RelocEntry[] relocEntries, out Counter<uint> callCounter)
+ {
+ callCounter = null;
+
+ foreach (RelocEntry relocEntry in relocEntries)
+ {
+ IntPtr? imm = null;
+ Symbol symbol = relocEntry.Symbol;
+
+ if (symbol.Type == SymbolType.FunctionTable)
+ {
+ ulong guestAddress = symbol.Value;
+
+ if (translator.FunctionTable.IsValid(guestAddress))
+ {
+ unsafe { imm = (IntPtr)Unsafe.AsPointer(ref translator.FunctionTable.GetValue(guestAddress)); }
+ }
+ }
+ else if (symbol.Type == SymbolType.DelegateTable)
+ {
+ int index = (int)symbol.Value;
+
+ if (Delegates.TryGetDelegateFuncPtrByIndex(index, out IntPtr funcPtr))
+ {
+ imm = funcPtr;
+ }
+ }
+ else if (symbol == PageTableSymbol)
+ {
+ imm = translator.Memory.PageTablePointer;
+ }
+ else if (symbol == CountTableSymbol)
+ {
+ if (callCounter == null)
+ {
+ callCounter = new Counter<uint>(translator.CountTable);
+ }
+
+ unsafe { imm = (IntPtr)Unsafe.AsPointer(ref callCounter.Value); }
+ }
+ else if (symbol == DispatchStubSymbol)
+ {
+ imm = translator.Stubs.DispatchStub;
+ }
+
+ if (imm == null)
+ {
+ throw new Exception($"Unexpected reloc entry {relocEntry}.");
+ }
+
+ BinaryPrimitives.WriteUInt64LittleEndian(code.Slice(relocEntry.Position, 8), (ulong)imm.Value);
+ }
+ }
+
+ private UnwindInfo ReadUnwindInfo(BinaryReader unwindInfosReader)
+ {
+ int pushEntriesLength = unwindInfosReader.ReadInt32();
+
+ UnwindPushEntry[] pushEntries = new UnwindPushEntry[pushEntriesLength];
+
+ for (int i = 0; i < pushEntriesLength; i++)
+ {
+ int pseudoOp = unwindInfosReader.ReadInt32();
+ int prologOffset = unwindInfosReader.ReadInt32();
+ int regIndex = unwindInfosReader.ReadInt32();
+ int stackOffsetOrAllocSize = unwindInfosReader.ReadInt32();
+
+ pushEntries[i] = new UnwindPushEntry((UnwindPseudoOp)pseudoOp, prologOffset, regIndex, stackOffsetOrAllocSize);
+ }
+
+ int prologueSize = unwindInfosReader.ReadInt32();
+
+ return new UnwindInfo(pushEntries, prologueSize);
+ }
+
+ private TranslatedFunction FastTranslate(
+ byte[] code,
+ Counter<uint> callCounter,
+ ulong guestSize,
+ UnwindInfo unwindInfo,
+ bool highCq)
+ {
+ var cFunc = new CompiledFunction(code, unwindInfo, RelocInfo.Empty);
+ var gFunc = cFunc.MapWithPointer<GuestFunction>(out IntPtr gFuncPointer);
+
+ return new TranslatedFunction(gFunc, gFuncPointer, callCounter, guestSize, highCq);
+ }
+
+ private void UpdateInfo(InfoEntry infoEntry)
+ {
+ _infosStream.Seek(-Unsafe.SizeOf<InfoEntry>(), SeekOrigin.Current);
+
+ SerializeStructure(_infosStream, infoEntry);
+ }
+
+ private void StubCode(int index)
+ {
+ _codesList[index] = Array.Empty<byte>();
+ }
+
+ private void StubReloc(int relocEntriesCount)
+ {
+ for (int i = 0; i < relocEntriesCount * RelocEntry.Stride; i++)
+ {
+ _relocsStream.WriteByte(FillingByte);
+ }
+ }
+
+ private void StubUnwindInfo(BinaryReader unwindInfosReader)
+ {
+ int pushEntriesLength = unwindInfosReader.ReadInt32();
+
+ for (int i = 0; i < pushEntriesLength * UnwindPushEntry.Stride + UnwindInfo.Stride; i++)
+ {
+ _unwindInfosStream.WriteByte(FillingByte);
+ }
+ }
+
+ public void MakeAndSaveTranslations(Translator translator)
+ {
+ var profiledFuncsToTranslate = Profiler.GetProfiledFuncsToTranslate(translator.Functions);
+
+ _translateCount = 0;
+ _translateTotalCount = profiledFuncsToTranslate.Count;
+
+ if (_translateTotalCount == 0)
+ {
+ ResetCarriersIfNeeded();
+
+ GCSettings.LargeObjectHeapCompactionMode = GCLargeObjectHeapCompactionMode.CompactOnce;
+
+ return;
+ }
+
+ int degreeOfParallelism = Environment.ProcessorCount;
+
+ // If there are enough cores lying around, we leave one alone for other tasks.
+ if (degreeOfParallelism > 4)
+ {
+ degreeOfParallelism--;
+ }
+
+ Logger.Info?.Print(LogClass.Ptc, $"{_translateCount} of {_translateTotalCount} functions translated | Thread count: {degreeOfParallelism}");
+
+ PtcStateChanged?.Invoke(PtcLoadingState.Start, _translateCount, _translateTotalCount);
+
+ using AutoResetEvent progressReportEvent = new AutoResetEvent(false);
+
+ Thread progressReportThread = new Thread(ReportProgress)
+ {
+ Name = "Ptc.ProgressReporter",
+ Priority = ThreadPriority.Lowest,
+ IsBackground = true
+ };
+
+ progressReportThread.Start(progressReportEvent);
+
+ void TranslateFuncs()
+ {
+ while (profiledFuncsToTranslate.TryDequeue(out var item))
+ {
+ ulong address = item.address;
+
+ Debug.Assert(Profiler.IsAddressInStaticCodeRange(address));
+
+ TranslatedFunction func = translator.Translate(address, item.funcProfile.Mode, item.funcProfile.HighCq);
+
+ bool isAddressUnique = translator.Functions.TryAdd(address, func.GuestSize, func);
+
+ Debug.Assert(isAddressUnique, $"The address 0x{address:X16} is not unique.");
+
+ Interlocked.Increment(ref _translateCount);
+
+ translator.RegisterFunction(address, func);
+
+ if (State != PtcState.Enabled)
+ {
+ break;
+ }
+ }
+ }
+
+ List<Thread> threads = new List<Thread>();
+
+ for (int i = 0; i < degreeOfParallelism; i++)
+ {
+ Thread thread = new Thread(TranslateFuncs);
+ thread.IsBackground = true;
+
+ threads.Add(thread);
+ }
+
+ Stopwatch sw = Stopwatch.StartNew();
+
+ threads.ForEach((thread) => thread.Start());
+ threads.ForEach((thread) => thread.Join());
+
+ threads.Clear();
+
+ progressReportEvent.Set();
+ progressReportThread.Join();
+
+ sw.Stop();
+
+ PtcStateChanged?.Invoke(PtcLoadingState.Loaded, _translateCount, _translateTotalCount);
+
+ Logger.Info?.Print(LogClass.Ptc, $"{_translateCount} of {_translateTotalCount} functions translated | Thread count: {degreeOfParallelism} in {sw.Elapsed.TotalSeconds} s");
+
+ Thread preSaveThread = new Thread(PreSave);
+ preSaveThread.IsBackground = true;
+ preSaveThread.Start();
+ }
+
+ private void ReportProgress(object state)
+ {
+ const int refreshRate = 50; // ms.
+
+ AutoResetEvent endEvent = (AutoResetEvent)state;
+
+ int count = 0;
+
+ do
+ {
+ int newCount = _translateCount;
+
+ if (count != newCount)
+ {
+ PtcStateChanged?.Invoke(PtcLoadingState.Loading, newCount, _translateTotalCount);
+ count = newCount;
+ }
+ }
+ while (!endEvent.WaitOne(refreshRate));
+ }
+
+ public static Hash128 ComputeHash(IMemoryManager memory, ulong address, ulong guestSize)
+ {
+ return XXHash128.ComputeHash(memory.GetSpan(address, checked((int)(guestSize))));
+ }
+
+ public void WriteCompiledFunction(ulong address, ulong guestSize, Hash128 hash, bool highCq, CompiledFunction compiledFunc)
+ {
+ lock (_lock)
+ {
+ byte[] code = compiledFunc.Code;
+ RelocInfo relocInfo = compiledFunc.RelocInfo;
+ UnwindInfo unwindInfo = compiledFunc.UnwindInfo;
+
+ InfoEntry infoEntry = new InfoEntry();
+
+ infoEntry.Address = address;
+ infoEntry.GuestSize = guestSize;
+ infoEntry.Hash = hash;
+ infoEntry.HighCq = highCq;
+ infoEntry.Stubbed = false;
+ infoEntry.CodeLength = code.Length;
+ infoEntry.RelocEntriesCount = relocInfo.Entries.Length;
+
+ SerializeStructure(_infosStream, infoEntry);
+
+ WriteCode(code.AsSpan());
+
+ // WriteReloc.
+ using var relocInfoWriter = new BinaryWriter(_relocsStream, EncodingCache.UTF8NoBOM, true);
+
+ foreach (RelocEntry entry in relocInfo.Entries)
+ {
+ relocInfoWriter.Write(entry.Position);
+ relocInfoWriter.Write((byte)entry.Symbol.Type);
+ relocInfoWriter.Write(entry.Symbol.Value);
+ }
+
+ // WriteUnwindInfo.
+ using var unwindInfoWriter = new BinaryWriter(_unwindInfosStream, EncodingCache.UTF8NoBOM, true);
+
+ unwindInfoWriter.Write(unwindInfo.PushEntries.Length);
+
+ foreach (UnwindPushEntry unwindPushEntry in unwindInfo.PushEntries)
+ {
+ unwindInfoWriter.Write((int)unwindPushEntry.PseudoOp);
+ unwindInfoWriter.Write(unwindPushEntry.PrologOffset);
+ unwindInfoWriter.Write(unwindPushEntry.RegIndex);
+ unwindInfoWriter.Write(unwindPushEntry.StackOffsetOrAllocSize);
+ }
+
+ unwindInfoWriter.Write(unwindInfo.PrologSize);
+ }
+ }
+
+ private void WriteCode(ReadOnlySpan<byte> code)
+ {
+ _codesList.Add(code.ToArray());
+ }
+
+ public static bool GetEndianness()
+ {
+ return BitConverter.IsLittleEndian;
+ }
+
+ private static FeatureInfo GetFeatureInfo()
+ {
+ if (RuntimeInformation.ProcessArchitecture == Architecture.Arm64)
+ {
+ return new FeatureInfo(
+ (ulong)Arm64HardwareCapabilities.LinuxFeatureInfoHwCap,
+ (ulong)Arm64HardwareCapabilities.LinuxFeatureInfoHwCap2,
+ (ulong)Arm64HardwareCapabilities.MacOsFeatureInfo,
+ 0,
+ 0);
+ }
+ else if (RuntimeInformation.ProcessArchitecture == Architecture.X64)
+ {
+ return new FeatureInfo(
+ (ulong)X86HardwareCapabilities.FeatureInfo1Ecx,
+ (ulong)X86HardwareCapabilities.FeatureInfo1Edx,
+ (ulong)X86HardwareCapabilities.FeatureInfo7Ebx,
+ (ulong)X86HardwareCapabilities.FeatureInfo7Ecx,
+ (ulong)X86HardwareCapabilities.Xcr0InfoEax);
+ }
+ else
+ {
+ return new FeatureInfo(0, 0, 0, 0, 0);
+ }
+ }
+
+ private byte GetMemoryManagerMode()
+ {
+ return (byte)_memoryMode;
+ }
+
+ private static uint GetOSPlatform()
+ {
+ uint osPlatform = 0u;
+
+ osPlatform |= (OperatingSystem.IsFreeBSD() ? 1u : 0u) << 0;
+ osPlatform |= (OperatingSystem.IsLinux() ? 1u : 0u) << 1;
+ osPlatform |= (OperatingSystem.IsMacOS() ? 1u : 0u) << 2;
+ osPlatform |= (OperatingSystem.IsWindows() ? 1u : 0u) << 3;
+
+ return osPlatform;
+ }
+
+ [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 86*/)]
+ private struct OuterHeader
+ {
+ public ulong Magic;
+
+ public uint CacheFileVersion;
+
+ public bool Endianness;
+ public FeatureInfo FeatureInfo;
+ public byte MemoryManagerMode;
+ public uint OSPlatform;
+ public uint Architecture;
+
+ public long UncompressedStreamSize;
+
+ public Hash128 HeaderHash;
+
+ public void SetHeaderHash()
+ {
+ Span<OuterHeader> spanHeader = MemoryMarshal.CreateSpan(ref this, 1);
+
+ HeaderHash = XXHash128.ComputeHash(MemoryMarshal.AsBytes(spanHeader).Slice(0, Unsafe.SizeOf<OuterHeader>() - Unsafe.SizeOf<Hash128>()));
+ }
+
+ public bool IsHeaderValid()
+ {
+ Span<OuterHeader> spanHeader = MemoryMarshal.CreateSpan(ref this, 1);
+
+ return XXHash128.ComputeHash(MemoryMarshal.AsBytes(spanHeader).Slice(0, Unsafe.SizeOf<OuterHeader>() - Unsafe.SizeOf<Hash128>())) == HeaderHash;
+ }
+ }
+
+ [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 40*/)]
+ private record struct FeatureInfo(ulong FeatureInfo0, ulong FeatureInfo1, ulong FeatureInfo2, ulong FeatureInfo3, ulong FeatureInfo4);
+
+ [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 128*/)]
+ private struct InnerHeader
+ {
+ public ulong Magic;
+
+ public int InfosLength;
+ public long CodesLength;
+ public int RelocsLength;
+ public int UnwindInfosLength;
+
+ public Hash128 InfosHash;
+ public Hash128 CodesHash;
+ public Hash128 RelocsHash;
+ public Hash128 UnwindInfosHash;
+
+ public Hash128 HeaderHash;
+
+ public void SetHeaderHash()
+ {
+ Span<InnerHeader> spanHeader = MemoryMarshal.CreateSpan(ref this, 1);
+
+ HeaderHash = XXHash128.ComputeHash(MemoryMarshal.AsBytes(spanHeader).Slice(0, Unsafe.SizeOf<InnerHeader>() - Unsafe.SizeOf<Hash128>()));
+ }
+
+ public bool IsHeaderValid()
+ {
+ Span<InnerHeader> spanHeader = MemoryMarshal.CreateSpan(ref this, 1);
+
+ return XXHash128.ComputeHash(MemoryMarshal.AsBytes(spanHeader).Slice(0, Unsafe.SizeOf<InnerHeader>() - Unsafe.SizeOf<Hash128>())) == HeaderHash;
+ }
+ }
+
+ [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 42*/)]
+ private struct InfoEntry
+ {
+ public ulong Address;
+ public ulong GuestSize;
+ public Hash128 Hash;
+ public bool HighCq;
+ public bool Stubbed;
+ public int CodeLength;
+ public int RelocEntriesCount;
+ }
+
+ private void Enable()
+ {
+ State = PtcState.Enabled;
+ }
+
+ public void Continue()
+ {
+ if (State == PtcState.Enabled)
+ {
+ State = PtcState.Continuing;
+ }
+ }
+
+ public void Close()
+ {
+ if (State == PtcState.Enabled ||
+ State == PtcState.Continuing)
+ {
+ State = PtcState.Closing;
+ }
+ }
+
+ public void Disable()
+ {
+ State = PtcState.Disabled;
+ }
+
+ private void Wait()
+ {
+ _waitEvent.WaitOne();
+ }
+
+ public void Dispose()
+ {
+ if (!_disposed)
+ {
+ _disposed = true;
+
+ Wait();
+ _waitEvent.Dispose();
+
+ DisposeCarriers();
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Translation/PTC/PtcFormatter.cs b/src/ARMeilleure/Translation/PTC/PtcFormatter.cs
new file mode 100644
index 00000000..2f7a9c21
--- /dev/null
+++ b/src/ARMeilleure/Translation/PTC/PtcFormatter.cs
@@ -0,0 +1,179 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+namespace ARMeilleure.Translation.PTC
+{
+ static class PtcFormatter
+ {
+ #region "Deserialize"
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Dictionary<TKey, TValue> DeserializeDictionary<TKey, TValue>(Stream stream, Func<Stream, TValue> valueFunc) where TKey : struct
+ {
+ Dictionary<TKey, TValue> dictionary = new();
+
+ int count = DeserializeStructure<int>(stream);
+
+ for (int i = 0; i < count; i++)
+ {
+ TKey key = DeserializeStructure<TKey>(stream);
+ TValue value = valueFunc(stream);
+
+ dictionary.Add(key, value);
+ }
+
+ return dictionary;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static List<T> DeserializeList<T>(Stream stream) where T : struct
+ {
+ List<T> list = new();
+
+ int count = DeserializeStructure<int>(stream);
+
+ for (int i = 0; i < count; i++)
+ {
+ T item = DeserializeStructure<T>(stream);
+
+ list.Add(item);
+ }
+
+ return list;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static T DeserializeStructure<T>(Stream stream) where T : struct
+ {
+ T structure = default(T);
+
+ Span<T> spanT = MemoryMarshal.CreateSpan(ref structure, 1);
+ int bytesCount = stream.Read(MemoryMarshal.AsBytes(spanT));
+
+ if (bytesCount != Unsafe.SizeOf<T>())
+ {
+ throw new EndOfStreamException();
+ }
+
+ return structure;
+ }
+ #endregion
+
+ #region "GetSerializeSize"
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static int GetSerializeSizeDictionary<TKey, TValue>(Dictionary<TKey, TValue> dictionary, Func<TValue, int> valueFunc) where TKey : struct
+ {
+ int size = 0;
+
+ size += Unsafe.SizeOf<int>();
+
+ foreach ((_, TValue value) in dictionary)
+ {
+ size += Unsafe.SizeOf<TKey>();
+ size += valueFunc(value);
+ }
+
+ return size;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static int GetSerializeSizeList<T>(List<T> list) where T : struct
+ {
+ int size = 0;
+
+ size += Unsafe.SizeOf<int>();
+
+ size += list.Count * Unsafe.SizeOf<T>();
+
+ return size;
+ }
+ #endregion
+
+ #region "Serialize"
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static void SerializeDictionary<TKey, TValue>(Stream stream, Dictionary<TKey, TValue> dictionary, Action<Stream, TValue> valueAction) where TKey : struct
+ {
+ SerializeStructure<int>(stream, dictionary.Count);
+
+ foreach ((TKey key, TValue value) in dictionary)
+ {
+ SerializeStructure<TKey>(stream, key);
+ valueAction(stream, value);
+ }
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static void SerializeList<T>(Stream stream, List<T> list) where T : struct
+ {
+ SerializeStructure<int>(stream, list.Count);
+
+ foreach (T item in list)
+ {
+ SerializeStructure<T>(stream, item);
+ }
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static void SerializeStructure<T>(Stream stream, T structure) where T : struct
+ {
+ Span<T> spanT = MemoryMarshal.CreateSpan(ref structure, 1);
+ stream.Write(MemoryMarshal.AsBytes(spanT));
+ }
+ #endregion
+
+ #region "Extension methods"
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static void ReadFrom<T>(this List<T[]> list, Stream stream) where T : struct
+ {
+ int count = DeserializeStructure<int>(stream);
+
+ for (int i = 0; i < count; i++)
+ {
+ int itemLength = DeserializeStructure<int>(stream);
+
+ T[] item = new T[itemLength];
+
+ int bytesCount = stream.Read(MemoryMarshal.AsBytes(item.AsSpan()));
+
+ if (bytesCount != itemLength)
+ {
+ throw new EndOfStreamException();
+ }
+
+ list.Add(item);
+ }
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static long Length<T>(this List<T[]> list) where T : struct
+ {
+ long size = 0L;
+
+ size += Unsafe.SizeOf<int>();
+
+ foreach (T[] item in list)
+ {
+ size += Unsafe.SizeOf<int>();
+ size += item.Length;
+ }
+
+ return size;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static void WriteTo<T>(this List<T[]> list, Stream stream) where T : struct
+ {
+ SerializeStructure<int>(stream, list.Count);
+
+ foreach (T[] item in list)
+ {
+ SerializeStructure<int>(stream, item.Length);
+
+ stream.Write(MemoryMarshal.AsBytes(item.AsSpan()));
+ }
+ }
+ #endregion
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Translation/PTC/PtcLoadingState.cs b/src/ARMeilleure/Translation/PTC/PtcLoadingState.cs
new file mode 100644
index 00000000..526cf91f
--- /dev/null
+++ b/src/ARMeilleure/Translation/PTC/PtcLoadingState.cs
@@ -0,0 +1,9 @@
+namespace ARMeilleure.Translation.PTC
+{
+ public enum PtcLoadingState
+ {
+ Start,
+ Loading,
+ Loaded
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Translation/PTC/PtcProfiler.cs b/src/ARMeilleure/Translation/PTC/PtcProfiler.cs
new file mode 100644
index 00000000..391e29c7
--- /dev/null
+++ b/src/ARMeilleure/Translation/PTC/PtcProfiler.cs
@@ -0,0 +1,421 @@
+using ARMeilleure.State;
+using Ryujinx.Common;
+using Ryujinx.Common.Logging;
+using Ryujinx.Common.Memory;
+using System;
+using System.Buffers.Binary;
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+using System.IO.Compression;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Threading;
+
+using static ARMeilleure.Translation.PTC.PtcFormatter;
+
+namespace ARMeilleure.Translation.PTC
+{
+ class PtcProfiler
+ {
+ private const string OuterHeaderMagicString = "Pohd\0\0\0\0";
+
+ private const uint InternalVersion = 1866; //! Not to be incremented manually for each change to the ARMeilleure project.
+
+ private const int SaveInterval = 30; // Seconds.
+
+ private const CompressionLevel SaveCompressionLevel = CompressionLevel.Fastest;
+
+ private readonly Ptc _ptc;
+
+ private readonly System.Timers.Timer _timer;
+
+ private readonly ulong _outerHeaderMagic;
+
+ private readonly ManualResetEvent _waitEvent;
+
+ private readonly object _lock;
+
+ private bool _disposed;
+
+ private Hash128 _lastHash;
+
+ public Dictionary<ulong, FuncProfile> ProfiledFuncs { get; private set; }
+
+ public bool Enabled { get; private set; }
+
+ public ulong StaticCodeStart { get; set; }
+ public ulong StaticCodeSize { get; set; }
+
+ public PtcProfiler(Ptc ptc)
+ {
+ _ptc = ptc;
+
+ _timer = new System.Timers.Timer((double)SaveInterval * 1000d);
+ _timer.Elapsed += PreSave;
+
+ _outerHeaderMagic = BinaryPrimitives.ReadUInt64LittleEndian(EncodingCache.UTF8NoBOM.GetBytes(OuterHeaderMagicString).AsSpan());
+
+ _waitEvent = new ManualResetEvent(true);
+
+ _lock = new object();
+
+ _disposed = false;
+
+ ProfiledFuncs = new Dictionary<ulong, FuncProfile>();
+
+ Enabled = false;
+ }
+
+ public void AddEntry(ulong address, ExecutionMode mode, bool highCq)
+ {
+ if (IsAddressInStaticCodeRange(address))
+ {
+ Debug.Assert(!highCq);
+
+ lock (_lock)
+ {
+ ProfiledFuncs.TryAdd(address, new FuncProfile(mode, highCq: false));
+ }
+ }
+ }
+
+ public void UpdateEntry(ulong address, ExecutionMode mode, bool highCq)
+ {
+ if (IsAddressInStaticCodeRange(address))
+ {
+ Debug.Assert(highCq);
+
+ lock (_lock)
+ {
+ Debug.Assert(ProfiledFuncs.ContainsKey(address));
+
+ ProfiledFuncs[address] = new FuncProfile(mode, highCq: true);
+ }
+ }
+ }
+
+ public bool IsAddressInStaticCodeRange(ulong address)
+ {
+ return address >= StaticCodeStart && address < StaticCodeStart + StaticCodeSize;
+ }
+
+ public ConcurrentQueue<(ulong address, FuncProfile funcProfile)> GetProfiledFuncsToTranslate(TranslatorCache<TranslatedFunction> funcs)
+ {
+ var profiledFuncsToTranslate = new ConcurrentQueue<(ulong address, FuncProfile funcProfile)>();
+
+ foreach (var profiledFunc in ProfiledFuncs)
+ {
+ if (!funcs.ContainsKey(profiledFunc.Key))
+ {
+ profiledFuncsToTranslate.Enqueue((profiledFunc.Key, profiledFunc.Value));
+ }
+ }
+
+ return profiledFuncsToTranslate;
+ }
+
+ public void ClearEntries()
+ {
+ ProfiledFuncs.Clear();
+ ProfiledFuncs.TrimExcess();
+ }
+
+ public void PreLoad()
+ {
+ _lastHash = default;
+
+ string fileNameActual = $"{_ptc.CachePathActual}.info";
+ string fileNameBackup = $"{_ptc.CachePathBackup}.info";
+
+ FileInfo fileInfoActual = new FileInfo(fileNameActual);
+ FileInfo fileInfoBackup = new FileInfo(fileNameBackup);
+
+ if (fileInfoActual.Exists && fileInfoActual.Length != 0L)
+ {
+ if (!Load(fileNameActual, false))
+ {
+ if (fileInfoBackup.Exists && fileInfoBackup.Length != 0L)
+ {
+ Load(fileNameBackup, true);
+ }
+ }
+ }
+ else if (fileInfoBackup.Exists && fileInfoBackup.Length != 0L)
+ {
+ Load(fileNameBackup, true);
+ }
+ }
+
+ private bool Load(string fileName, bool isBackup)
+ {
+ using (FileStream compressedStream = new(fileName, FileMode.Open))
+ using (DeflateStream deflateStream = new(compressedStream, CompressionMode.Decompress, true))
+ {
+ OuterHeader outerHeader = DeserializeStructure<OuterHeader>(compressedStream);
+
+ if (!outerHeader.IsHeaderValid())
+ {
+ InvalidateCompressedStream(compressedStream);
+
+ return false;
+ }
+
+ if (outerHeader.Magic != _outerHeaderMagic)
+ {
+ InvalidateCompressedStream(compressedStream);
+
+ return false;
+ }
+
+ if (outerHeader.InfoFileVersion != InternalVersion)
+ {
+ InvalidateCompressedStream(compressedStream);
+
+ return false;
+ }
+
+ if (outerHeader.Endianness != Ptc.GetEndianness())
+ {
+ InvalidateCompressedStream(compressedStream);
+
+ return false;
+ }
+
+ using (MemoryStream stream = MemoryStreamManager.Shared.GetStream())
+ {
+ Debug.Assert(stream.Seek(0L, SeekOrigin.Begin) == 0L && stream.Length == 0L);
+
+ try
+ {
+ deflateStream.CopyTo(stream);
+ }
+ catch
+ {
+ InvalidateCompressedStream(compressedStream);
+
+ return false;
+ }
+
+ Debug.Assert(stream.Position == stream.Length);
+
+ stream.Seek(0L, SeekOrigin.Begin);
+
+ Hash128 expectedHash = DeserializeStructure<Hash128>(stream);
+
+ Hash128 actualHash = XXHash128.ComputeHash(GetReadOnlySpan(stream));
+
+ if (actualHash != expectedHash)
+ {
+ InvalidateCompressedStream(compressedStream);
+
+ return false;
+ }
+
+ ProfiledFuncs = Deserialize(stream);
+
+ Debug.Assert(stream.Position == stream.Length);
+
+ _lastHash = actualHash;
+ }
+ }
+
+ long fileSize = new FileInfo(fileName).Length;
+
+ Logger.Info?.Print(LogClass.Ptc, $"{(isBackup ? "Loaded Backup Profiling Info" : "Loaded Profiling Info")} (size: {fileSize} bytes, profiled functions: {ProfiledFuncs.Count}).");
+
+ return true;
+ }
+
+ private static Dictionary<ulong, FuncProfile> Deserialize(Stream stream)
+ {
+ return DeserializeDictionary<ulong, FuncProfile>(stream, (stream) => DeserializeStructure<FuncProfile>(stream));
+ }
+
+ private ReadOnlySpan<byte> GetReadOnlySpan(MemoryStream memoryStream)
+ {
+ return new(memoryStream.GetBuffer(), (int)memoryStream.Position, (int)memoryStream.Length - (int)memoryStream.Position);
+ }
+
+ private void InvalidateCompressedStream(FileStream compressedStream)
+ {
+ compressedStream.SetLength(0L);
+ }
+
+ private void PreSave(object source, System.Timers.ElapsedEventArgs e)
+ {
+ _waitEvent.Reset();
+
+ string fileNameActual = $"{_ptc.CachePathActual}.info";
+ string fileNameBackup = $"{_ptc.CachePathBackup}.info";
+
+ FileInfo fileInfoActual = new FileInfo(fileNameActual);
+
+ if (fileInfoActual.Exists && fileInfoActual.Length != 0L)
+ {
+ File.Copy(fileNameActual, fileNameBackup, true);
+ }
+
+ Save(fileNameActual);
+
+ _waitEvent.Set();
+ }
+
+ private void Save(string fileName)
+ {
+ int profiledFuncsCount;
+
+ OuterHeader outerHeader = new OuterHeader();
+
+ outerHeader.Magic = _outerHeaderMagic;
+
+ outerHeader.InfoFileVersion = InternalVersion;
+ outerHeader.Endianness = Ptc.GetEndianness();
+
+ outerHeader.SetHeaderHash();
+
+ using (MemoryStream stream = MemoryStreamManager.Shared.GetStream())
+ {
+ Debug.Assert(stream.Seek(0L, SeekOrigin.Begin) == 0L && stream.Length == 0L);
+
+ stream.Seek((long)Unsafe.SizeOf<Hash128>(), SeekOrigin.Begin);
+
+ lock (_lock)
+ {
+ Serialize(stream, ProfiledFuncs);
+
+ profiledFuncsCount = ProfiledFuncs.Count;
+ }
+
+ Debug.Assert(stream.Position == stream.Length);
+
+ stream.Seek((long)Unsafe.SizeOf<Hash128>(), SeekOrigin.Begin);
+ Hash128 hash = XXHash128.ComputeHash(GetReadOnlySpan(stream));
+
+ stream.Seek(0L, SeekOrigin.Begin);
+ SerializeStructure(stream, hash);
+
+ if (hash == _lastHash)
+ {
+ return;
+ }
+
+ using (FileStream compressedStream = new(fileName, FileMode.OpenOrCreate))
+ using (DeflateStream deflateStream = new(compressedStream, SaveCompressionLevel, true))
+ {
+ try
+ {
+ SerializeStructure(compressedStream, outerHeader);
+
+ stream.WriteTo(deflateStream);
+
+ _lastHash = hash;
+ }
+ catch
+ {
+ compressedStream.Position = 0L;
+
+ _lastHash = default;
+ }
+
+ if (compressedStream.Position < compressedStream.Length)
+ {
+ compressedStream.SetLength(compressedStream.Position);
+ }
+ }
+ }
+
+ long fileSize = new FileInfo(fileName).Length;
+
+ if (fileSize != 0L)
+ {
+ Logger.Info?.Print(LogClass.Ptc, $"Saved Profiling Info (size: {fileSize} bytes, profiled functions: {profiledFuncsCount}).");
+ }
+ }
+
+ private void Serialize(Stream stream, Dictionary<ulong, FuncProfile> profiledFuncs)
+ {
+ SerializeDictionary(stream, profiledFuncs, (stream, structure) => SerializeStructure(stream, structure));
+ }
+
+ [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 29*/)]
+ private struct OuterHeader
+ {
+ public ulong Magic;
+
+ public uint InfoFileVersion;
+
+ public bool Endianness;
+
+ public Hash128 HeaderHash;
+
+ public void SetHeaderHash()
+ {
+ Span<OuterHeader> spanHeader = MemoryMarshal.CreateSpan(ref this, 1);
+
+ HeaderHash = XXHash128.ComputeHash(MemoryMarshal.AsBytes(spanHeader).Slice(0, Unsafe.SizeOf<OuterHeader>() - Unsafe.SizeOf<Hash128>()));
+ }
+
+ public bool IsHeaderValid()
+ {
+ Span<OuterHeader> spanHeader = MemoryMarshal.CreateSpan(ref this, 1);
+
+ return XXHash128.ComputeHash(MemoryMarshal.AsBytes(spanHeader).Slice(0, Unsafe.SizeOf<OuterHeader>() - Unsafe.SizeOf<Hash128>())) == HeaderHash;
+ }
+ }
+
+ [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 5*/)]
+ public struct FuncProfile
+ {
+ public ExecutionMode Mode;
+ public bool HighCq;
+
+ public FuncProfile(ExecutionMode mode, bool highCq)
+ {
+ Mode = mode;
+ HighCq = highCq;
+ }
+ }
+
+ public void Start()
+ {
+ if (_ptc.State == PtcState.Enabled ||
+ _ptc.State == PtcState.Continuing)
+ {
+ Enabled = true;
+
+ _timer.Enabled = true;
+ }
+ }
+
+ public void Stop()
+ {
+ Enabled = false;
+
+ if (!_disposed)
+ {
+ _timer.Enabled = false;
+ }
+ }
+
+ public void Wait()
+ {
+ _waitEvent.WaitOne();
+ }
+
+ public void Dispose()
+ {
+ if (!_disposed)
+ {
+ _disposed = true;
+
+ _timer.Elapsed -= PreSave;
+ _timer.Dispose();
+
+ Wait();
+ _waitEvent.Dispose();
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Translation/PTC/PtcState.cs b/src/ARMeilleure/Translation/PTC/PtcState.cs
new file mode 100644
index 00000000..ca4f4108
--- /dev/null
+++ b/src/ARMeilleure/Translation/PTC/PtcState.cs
@@ -0,0 +1,10 @@
+namespace ARMeilleure.Translation.PTC
+{
+ enum PtcState
+ {
+ Enabled,
+ Continuing,
+ Closing,
+ Disabled
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Translation/RegisterToLocal.cs b/src/ARMeilleure/Translation/RegisterToLocal.cs
new file mode 100644
index 00000000..abb9b373
--- /dev/null
+++ b/src/ARMeilleure/Translation/RegisterToLocal.cs
@@ -0,0 +1,52 @@
+using ARMeilleure.IntermediateRepresentation;
+using System.Collections.Generic;
+
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Translation
+{
+ static class RegisterToLocal
+ {
+ public static void Rename(ControlFlowGraph cfg)
+ {
+ Dictionary<Register, Operand> registerToLocalMap = new Dictionary<Register, Operand>();
+
+ Operand GetLocal(Operand op)
+ {
+ Register register = op.GetRegister();
+
+ if (!registerToLocalMap.TryGetValue(register, out Operand local))
+ {
+ local = Local(op.Type);
+
+ registerToLocalMap.Add(register, local);
+ }
+
+ return local;
+ }
+
+ for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
+ {
+ for (Operation node = block.Operations.First; node != default; node = node.ListNext)
+ {
+ Operand dest = node.Destination;
+
+ if (dest != default && dest.Kind == OperandKind.Register)
+ {
+ node.Destination = GetLocal(dest);
+ }
+
+ for (int index = 0; index < node.SourcesCount; index++)
+ {
+ Operand source = node.GetSource(index);
+
+ if (source.Kind == OperandKind.Register)
+ {
+ node.SetSource(index, GetLocal(source));
+ }
+ }
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Translation/RegisterUsage.cs b/src/ARMeilleure/Translation/RegisterUsage.cs
new file mode 100644
index 00000000..3ec0a7b4
--- /dev/null
+++ b/src/ARMeilleure/Translation/RegisterUsage.cs
@@ -0,0 +1,394 @@
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using System;
+using System.Numerics;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+using static ARMeilleure.IntermediateRepresentation.Operation.Factory;
+
+namespace ARMeilleure.Translation
+{
+ static class RegisterUsage
+ {
+ private const int RegsCount = 32;
+ private const int RegsMask = RegsCount - 1;
+
+ private readonly struct RegisterMask : IEquatable<RegisterMask>
+ {
+ public long IntMask => Mask.GetElement(0);
+ public long VecMask => Mask.GetElement(1);
+
+ public Vector128<long> Mask { get; }
+
+ public RegisterMask(Vector128<long> mask)
+ {
+ Mask = mask;
+ }
+
+ public RegisterMask(long intMask, long vecMask)
+ {
+ Mask = Vector128.Create(intMask, vecMask);
+ }
+
+ public static RegisterMask operator &(RegisterMask x, RegisterMask y)
+ {
+ if (Sse2.IsSupported)
+ {
+ return new RegisterMask(Sse2.And(x.Mask, y.Mask));
+ }
+
+ return new RegisterMask(x.IntMask & y.IntMask, x.VecMask & y.VecMask);
+ }
+
+ public static RegisterMask operator |(RegisterMask x, RegisterMask y)
+ {
+ if (Sse2.IsSupported)
+ {
+ return new RegisterMask(Sse2.Or(x.Mask, y.Mask));
+ }
+
+ return new RegisterMask(x.IntMask | y.IntMask, x.VecMask | y.VecMask);
+ }
+
+ public static RegisterMask operator ~(RegisterMask x)
+ {
+ if (Sse2.IsSupported)
+ {
+ return new RegisterMask(Sse2.AndNot(x.Mask, Vector128<long>.AllBitsSet));
+ }
+
+ return new RegisterMask(~x.IntMask, ~x.VecMask);
+ }
+
+ public static bool operator ==(RegisterMask x, RegisterMask y)
+ {
+ return x.Equals(y);
+ }
+
+ public static bool operator !=(RegisterMask x, RegisterMask y)
+ {
+ return !x.Equals(y);
+ }
+
+ public override bool Equals(object obj)
+ {
+ return obj is RegisterMask regMask && Equals(regMask);
+ }
+
+ public bool Equals(RegisterMask other)
+ {
+ return Mask.Equals(other.Mask);
+ }
+
+ public override int GetHashCode()
+ {
+ return Mask.GetHashCode();
+ }
+ }
+
+ public static void RunPass(ControlFlowGraph cfg, ExecutionMode mode)
+ {
+ // Compute local register inputs and outputs used inside blocks.
+ RegisterMask[] localInputs = new RegisterMask[cfg.Blocks.Count];
+ RegisterMask[] localOutputs = new RegisterMask[cfg.Blocks.Count];
+
+ for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
+ {
+ for (Operation node = block.Operations.First; node != default; node = node.ListNext)
+ {
+ for (int index = 0; index < node.SourcesCount; index++)
+ {
+ Operand source = node.GetSource(index);
+
+ if (source.Kind == OperandKind.Register)
+ {
+ Register register = source.GetRegister();
+
+ localInputs[block.Index] |= GetMask(register) & ~localOutputs[block.Index];
+ }
+ }
+
+ if (node.Destination != default && node.Destination.Kind == OperandKind.Register)
+ {
+ localOutputs[block.Index] |= GetMask(node.Destination.GetRegister());
+ }
+ }
+ }
+
+ // Compute global register inputs and outputs used across blocks.
+ RegisterMask[] globalCmnOutputs = new RegisterMask[cfg.Blocks.Count];
+
+ RegisterMask[] globalInputs = new RegisterMask[cfg.Blocks.Count];
+ RegisterMask[] globalOutputs = new RegisterMask[cfg.Blocks.Count];
+
+ bool modified;
+ bool firstPass = true;
+
+ do
+ {
+ modified = false;
+
+ // Compute register outputs.
+ for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--)
+ {
+ BasicBlock block = cfg.PostOrderBlocks[index];
+
+ if (block.Predecessors.Count != 0 && !HasContextLoad(block))
+ {
+ BasicBlock predecessor = block.Predecessors[0];
+
+ RegisterMask cmnOutputs = localOutputs[predecessor.Index] | globalCmnOutputs[predecessor.Index];
+ RegisterMask outputs = globalOutputs[predecessor.Index];
+
+ for (int pIndex = 1; pIndex < block.Predecessors.Count; pIndex++)
+ {
+ predecessor = block.Predecessors[pIndex];
+
+ cmnOutputs &= localOutputs[predecessor.Index] | globalCmnOutputs[predecessor.Index];
+ outputs |= globalOutputs[predecessor.Index];
+ }
+
+ globalInputs[block.Index] |= outputs & ~cmnOutputs;
+
+ if (!firstPass)
+ {
+ cmnOutputs &= globalCmnOutputs[block.Index];
+ }
+
+ modified |= Exchange(globalCmnOutputs, block.Index, cmnOutputs);
+ outputs |= localOutputs[block.Index];
+ modified |= Exchange(globalOutputs, block.Index, globalOutputs[block.Index] | outputs);
+ }
+ else
+ {
+ modified |= Exchange(globalOutputs, block.Index, localOutputs[block.Index]);
+ }
+ }
+
+ // Compute register inputs.
+ for (int index = 0; index < cfg.PostOrderBlocks.Length; index++)
+ {
+ BasicBlock block = cfg.PostOrderBlocks[index];
+
+ RegisterMask inputs = localInputs[block.Index];
+
+ for (int i = 0; i < block.SuccessorsCount; i++)
+ {
+ inputs |= globalInputs[block.GetSuccessor(i).Index];
+ }
+
+ inputs &= ~globalCmnOutputs[block.Index];
+
+ modified |= Exchange(globalInputs, block.Index, globalInputs[block.Index] | inputs);
+ }
+
+ firstPass = false;
+ }
+ while (modified);
+
+ // Insert load and store context instructions where needed.
+ for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
+ {
+ bool hasContextLoad = HasContextLoad(block);
+
+ if (hasContextLoad)
+ {
+ block.Operations.Remove(block.Operations.First);
+ }
+
+ Operand arg = default;
+
+ // The only block without any predecessor should be the entry block.
+ // It always needs a context load as it is the first block to run.
+ if (block.Predecessors.Count == 0 || hasContextLoad)
+ {
+ long vecMask = globalInputs[block.Index].VecMask;
+ long intMask = globalInputs[block.Index].IntMask;
+
+ if (vecMask != 0 || intMask != 0)
+ {
+ arg = Local(OperandType.I64);
+
+ Operation loadArg = block.Operations.AddFirst(Operation(Instruction.LoadArgument, arg, Const(0)));
+
+ LoadLocals(block, vecMask, RegisterType.Vector, mode, loadArg, arg);
+ LoadLocals(block, intMask, RegisterType.Integer, mode, loadArg, arg);
+ }
+ }
+
+ bool hasContextStore = HasContextStore(block);
+
+ if (hasContextStore)
+ {
+ block.Operations.Remove(block.Operations.Last);
+ }
+
+ if (EndsWithReturn(block) || hasContextStore)
+ {
+ long vecMask = globalOutputs[block.Index].VecMask;
+ long intMask = globalOutputs[block.Index].IntMask;
+
+ if (vecMask != 0 || intMask != 0)
+ {
+ if (arg == default)
+ {
+ arg = Local(OperandType.I64);
+
+ block.Append(Operation(Instruction.LoadArgument, arg, Const(0)));
+ }
+
+ StoreLocals(block, intMask, RegisterType.Integer, mode, arg);
+ StoreLocals(block, vecMask, RegisterType.Vector, mode, arg);
+ }
+ }
+ }
+ }
+
+ private static bool HasContextLoad(BasicBlock block)
+ {
+ return StartsWith(block, Instruction.LoadFromContext) && block.Operations.First.SourcesCount == 0;
+ }
+
+ private static bool HasContextStore(BasicBlock block)
+ {
+ return EndsWith(block, Instruction.StoreToContext) && block.Operations.Last.SourcesCount == 0;
+ }
+
+ private static bool StartsWith(BasicBlock block, Instruction inst)
+ {
+ if (block.Operations.Count > 0)
+ {
+ Operation first = block.Operations.First;
+
+ return first != default && first.Instruction == inst;
+ }
+
+ return false;
+ }
+
+ private static bool EndsWith(BasicBlock block, Instruction inst)
+ {
+ if (block.Operations.Count > 0)
+ {
+ Operation last = block.Operations.Last;
+
+ return last != default && last.Instruction == inst;
+ }
+
+ return false;
+ }
+
+ private static RegisterMask GetMask(Register register)
+ {
+ long intMask = 0;
+ long vecMask = 0;
+
+ switch (register.Type)
+ {
+ case RegisterType.Flag: intMask = (1L << RegsCount) << register.Index; break;
+ case RegisterType.Integer: intMask = 1L << register.Index; break;
+ case RegisterType.FpFlag: vecMask = (1L << RegsCount) << register.Index; break;
+ case RegisterType.Vector: vecMask = 1L << register.Index; break;
+ }
+
+ return new RegisterMask(intMask, vecMask);
+ }
+
+ private static bool Exchange(RegisterMask[] masks, int blkIndex, RegisterMask value)
+ {
+ ref RegisterMask curValue = ref masks[blkIndex];
+
+ bool changed = curValue != value;
+
+ curValue = value;
+
+ return changed;
+ }
+
+ private static void LoadLocals(
+ BasicBlock block,
+ long inputs,
+ RegisterType baseType,
+ ExecutionMode mode,
+ Operation loadArg,
+ Operand arg)
+ {
+ while (inputs != 0)
+ {
+ int bit = 63 - BitOperations.LeadingZeroCount((ulong)inputs);
+
+ Operand dest = GetRegFromBit(bit, baseType, mode);
+ Operand offset = Const((long)NativeContext.GetRegisterOffset(dest.GetRegister()));
+ Operand addr = Local(OperandType.I64);
+
+ block.Operations.AddAfter(loadArg, Operation(Instruction.Load, dest, addr));
+ block.Operations.AddAfter(loadArg, Operation(Instruction.Add, addr, arg, offset));
+
+ inputs &= ~(1L << bit);
+ }
+ }
+
+ private static void StoreLocals(
+ BasicBlock block,
+ long outputs,
+ RegisterType baseType,
+ ExecutionMode mode,
+ Operand arg)
+ {
+ while (outputs != 0)
+ {
+ int bit = BitOperations.TrailingZeroCount(outputs);
+
+ Operand source = GetRegFromBit(bit, baseType, mode);
+ Operand offset = Const((long)NativeContext.GetRegisterOffset(source.GetRegister()));
+ Operand addr = Local(OperandType.I64);
+
+ block.Append(Operation(Instruction.Add, addr, arg, offset));
+ block.Append(Operation(Instruction.Store, default, addr, source));
+
+ outputs &= ~(1L << bit);
+ }
+ }
+
+ private static Operand GetRegFromBit(int bit, RegisterType baseType, ExecutionMode mode)
+ {
+ if (bit < RegsCount)
+ {
+ return Register(bit, baseType, GetOperandType(baseType, mode));
+ }
+ else if (baseType == RegisterType.Integer)
+ {
+ return Register(bit & RegsMask, RegisterType.Flag, OperandType.I32);
+ }
+ else if (baseType == RegisterType.Vector)
+ {
+ return Register(bit & RegsMask, RegisterType.FpFlag, OperandType.I32);
+ }
+ else
+ {
+ throw new ArgumentOutOfRangeException(nameof(bit));
+ }
+ }
+
+ private static OperandType GetOperandType(RegisterType type, ExecutionMode mode)
+ {
+ switch (type)
+ {
+ case RegisterType.Flag: return OperandType.I32;
+ case RegisterType.FpFlag: return OperandType.I32;
+ case RegisterType.Integer: return (mode == ExecutionMode.Aarch64) ? OperandType.I64 : OperandType.I32;
+ case RegisterType.Vector: return OperandType.V128;
+ }
+
+ throw new ArgumentException($"Invalid register type \"{type}\".");
+ }
+
+ private static bool EndsWithReturn(BasicBlock block)
+ {
+ Operation last = block.Operations.Last;
+
+ return last != default && last.Instruction == Instruction.Return;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Translation/RejitRequest.cs b/src/ARMeilleure/Translation/RejitRequest.cs
new file mode 100644
index 00000000..1bed5c0a
--- /dev/null
+++ b/src/ARMeilleure/Translation/RejitRequest.cs
@@ -0,0 +1,16 @@
+using ARMeilleure.State;
+
+namespace ARMeilleure.Translation
+{
+ struct RejitRequest
+ {
+ public ulong Address;
+ public ExecutionMode Mode;
+
+ public RejitRequest(ulong address, ExecutionMode mode)
+ {
+ Address = address;
+ Mode = mode;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Translation/SsaConstruction.cs b/src/ARMeilleure/Translation/SsaConstruction.cs
new file mode 100644
index 00000000..2b6efc11
--- /dev/null
+++ b/src/ARMeilleure/Translation/SsaConstruction.cs
@@ -0,0 +1,289 @@
+using ARMeilleure.Common;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Translation
+{
+ static partial class Ssa
+ {
+ private class DefMap
+ {
+ private readonly Dictionary<int, Operand> _map;
+ private readonly BitMap _phiMasks;
+
+ public DefMap()
+ {
+ _map = new Dictionary<int, Operand>();
+ _phiMasks = new BitMap(Allocators.Default, RegisterConsts.TotalCount);
+ }
+
+ public bool TryAddOperand(int key, Operand operand)
+ {
+ return _map.TryAdd(key, operand);
+ }
+
+ public bool TryGetOperand(int key, out Operand operand)
+ {
+ return _map.TryGetValue(key, out operand);
+ }
+
+ public bool AddPhi(int key)
+ {
+ return _phiMasks.Set(key);
+ }
+
+ public bool HasPhi(int key)
+ {
+ return _phiMasks.IsSet(key);
+ }
+ }
+
+ public static void Construct(ControlFlowGraph cfg)
+ {
+ var globalDefs = new DefMap[cfg.Blocks.Count];
+ var localDefs = new Operand[cfg.LocalsCount + RegisterConsts.TotalCount];
+
+ var dfPhiBlocks = new Queue<BasicBlock>();
+
+ for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
+ {
+ globalDefs[block.Index] = new DefMap();
+ }
+
+ // First pass, get all defs and locals uses.
+ for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
+ {
+ for (Operation node = block.Operations.First; node != default; node = node.ListNext)
+ {
+ for (int index = 0; index < node.SourcesCount; index++)
+ {
+ Operand src = node.GetSource(index);
+
+ if (TryGetId(src, out int srcKey))
+ {
+ Operand local = localDefs[srcKey];
+
+ if (local == default)
+ {
+ local = src;
+ }
+
+ node.SetSource(index, local);
+ }
+ }
+
+ Operand dest = node.Destination;
+
+ if (TryGetId(dest, out int destKey))
+ {
+ Operand local = Local(dest.Type);
+
+ localDefs[destKey] = local;
+
+ node.Destination = local;
+ }
+ }
+
+ for (int key = 0; key < localDefs.Length; key++)
+ {
+ Operand local = localDefs[key];
+
+ if (local == default)
+ {
+ continue;
+ }
+
+ globalDefs[block.Index].TryAddOperand(key, local);
+
+ dfPhiBlocks.Enqueue(block);
+
+ while (dfPhiBlocks.TryDequeue(out BasicBlock dfPhiBlock))
+ {
+ foreach (BasicBlock domFrontier in dfPhiBlock.DominanceFrontiers)
+ {
+ if (globalDefs[domFrontier.Index].AddPhi(key))
+ {
+ dfPhiBlocks.Enqueue(domFrontier);
+ }
+ }
+ }
+ }
+
+ Array.Clear(localDefs);
+ }
+
+ // Second pass, rename variables with definitions on different blocks.
+ for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
+ {
+ for (Operation node = block.Operations.First; node != default; node = node.ListNext)
+ {
+ for (int index = 0; index < node.SourcesCount; index++)
+ {
+ Operand src = node.GetSource(index);
+
+ if (TryGetId(src, out int key))
+ {
+ Operand local = localDefs[key];
+
+ if (local == default)
+ {
+ local = FindDef(globalDefs, block, src);
+ localDefs[key] = local;
+ }
+
+ node.SetSource(index, local);
+ }
+ }
+ }
+
+ Array.Clear(localDefs);
+ }
+ }
+
+ private static Operand FindDef(DefMap[] globalDefs, BasicBlock current, Operand operand)
+ {
+ if (globalDefs[current.Index].HasPhi(GetId(operand)))
+ {
+ return InsertPhi(globalDefs, current, operand);
+ }
+
+ if (current != current.ImmediateDominator)
+ {
+ return FindDefOnPred(globalDefs, current.ImmediateDominator, operand);
+ }
+
+ return Undef();
+ }
+
+ private static Operand FindDefOnPred(DefMap[] globalDefs, BasicBlock current, Operand operand)
+ {
+ BasicBlock previous;
+
+ do
+ {
+ DefMap defMap = globalDefs[current.Index];
+
+ int key = GetId(operand);
+
+ if (defMap.TryGetOperand(key, out Operand lastDef))
+ {
+ return lastDef;
+ }
+
+ if (defMap.HasPhi(key))
+ {
+ return InsertPhi(globalDefs, current, operand);
+ }
+
+ previous = current;
+ current = current.ImmediateDominator;
+ }
+ while (previous != current);
+
+ return Undef();
+ }
+
+ private static Operand InsertPhi(DefMap[] globalDefs, BasicBlock block, Operand operand)
+ {
+ // This block has a Phi that has not been materialized yet, but that
+ // would define a new version of the variable we're looking for. We need
+ // to materialize the Phi, add all the block/operand pairs into the Phi, and
+ // then use the definition from that Phi.
+ Operand local = Local(operand.Type);
+
+ Operation operation = Operation.Factory.PhiOperation(local, block.Predecessors.Count);
+
+ AddPhi(block, operation);
+
+ globalDefs[block.Index].TryAddOperand(GetId(operand), local);
+
+ PhiOperation phi = operation.AsPhi();
+
+ for (int index = 0; index < block.Predecessors.Count; index++)
+ {
+ BasicBlock predecessor = block.Predecessors[index];
+
+ phi.SetBlock(index, predecessor);
+ phi.SetSource(index, FindDefOnPred(globalDefs, predecessor, operand));
+ }
+
+ return local;
+ }
+
+ private static void AddPhi(BasicBlock block, Operation phi)
+ {
+ Operation node = block.Operations.First;
+
+ if (node != default)
+ {
+ while (node.ListNext != default && node.ListNext.Instruction == Instruction.Phi)
+ {
+ node = node.ListNext;
+ }
+ }
+
+ if (node != default && node.Instruction == Instruction.Phi)
+ {
+ block.Operations.AddAfter(node, phi);
+ }
+ else
+ {
+ block.Operations.AddFirst(phi);
+ }
+ }
+
+ private static bool TryGetId(Operand operand, out int result)
+ {
+ if (operand != default)
+ {
+ if (operand.Kind == OperandKind.Register)
+ {
+ Register reg = operand.GetRegister();
+
+ if (reg.Type == RegisterType.Integer)
+ {
+ result = reg.Index;
+ }
+ else if (reg.Type == RegisterType.Vector)
+ {
+ result = RegisterConsts.IntRegsCount + reg.Index;
+ }
+ else if (reg.Type == RegisterType.Flag)
+ {
+ result = RegisterConsts.IntAndVecRegsCount + reg.Index;
+ }
+ else /* if (reg.Type == RegisterType.FpFlag) */
+ {
+ result = RegisterConsts.FpFlagsOffset + reg.Index;
+ }
+
+ return true;
+ }
+ else if (operand.Kind == OperandKind.LocalVariable && operand.GetLocalNumber() > 0)
+ {
+ result = RegisterConsts.TotalCount + operand.GetLocalNumber() - 1;
+
+ return true;
+ }
+ }
+
+ result = -1;
+
+ return false;
+ }
+
+ private static int GetId(Operand operand)
+ {
+ if (!TryGetId(operand, out int key))
+ {
+ Debug.Fail("OperandKind must be Register or a numbered LocalVariable.");
+ }
+
+ return key;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Translation/SsaDeconstruction.cs b/src/ARMeilleure/Translation/SsaDeconstruction.cs
new file mode 100644
index 00000000..cd6bcca1
--- /dev/null
+++ b/src/ARMeilleure/Translation/SsaDeconstruction.cs
@@ -0,0 +1,48 @@
+using ARMeilleure.IntermediateRepresentation;
+
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+using static ARMeilleure.IntermediateRepresentation.Operation.Factory;
+
+namespace ARMeilleure.Translation
+{
+ static partial class Ssa
+ {
+ public static void Deconstruct(ControlFlowGraph cfg)
+ {
+ for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
+ {
+ Operation operation = block.Operations.First;
+
+ while (operation != default && operation.Instruction == Instruction.Phi)
+ {
+ Operation nextNode = operation.ListNext;
+
+ Operand local = Local(operation.Destination.Type);
+
+ PhiOperation phi = operation.AsPhi();
+
+ for (int index = 0; index < phi.SourcesCount; index++)
+ {
+ BasicBlock predecessor = phi.GetBlock(cfg, index);
+
+ Operand source = phi.GetSource(index);
+
+ predecessor.Append(Operation(Instruction.Copy, local, source));
+
+ phi.SetSource(index, default);
+ }
+
+ Operation copyOp = Operation(Instruction.Copy, operation.Destination, local);
+
+ block.Operations.AddBefore(operation, copyOp);
+
+ operation.Destination = default;
+
+ block.Operations.Remove(operation);
+
+ operation = nextNode;
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Translation/TranslatedFunction.cs b/src/ARMeilleure/Translation/TranslatedFunction.cs
new file mode 100644
index 00000000..f007883e
--- /dev/null
+++ b/src/ARMeilleure/Translation/TranslatedFunction.cs
@@ -0,0 +1,34 @@
+using ARMeilleure.Common;
+using System;
+
+namespace ARMeilleure.Translation
+{
+ class TranslatedFunction
+ {
+ private readonly GuestFunction _func; // Ensure that this delegate will not be garbage collected.
+
+ public IntPtr FuncPointer { get; }
+ public Counter<uint> CallCounter { get; }
+ public ulong GuestSize { get; }
+ public bool HighCq { get; }
+
+ public TranslatedFunction(GuestFunction func, IntPtr funcPointer, Counter<uint> callCounter, ulong guestSize, bool highCq)
+ {
+ _func = func;
+ FuncPointer = funcPointer;
+ CallCounter = callCounter;
+ GuestSize = guestSize;
+ HighCq = highCq;
+ }
+
+ public ulong Execute(State.ExecutionContext context)
+ {
+ return _func(context.NativeContextPtr);
+ }
+
+ public ulong Execute(WrapperFunction dispatcher, State.ExecutionContext context)
+ {
+ return dispatcher(context.NativeContextPtr, (ulong)FuncPointer);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/Translation/Translator.cs b/src/ARMeilleure/Translation/Translator.cs
new file mode 100644
index 00000000..f349c5eb
--- /dev/null
+++ b/src/ARMeilleure/Translation/Translator.cs
@@ -0,0 +1,576 @@
+using ARMeilleure.CodeGen;
+using ARMeilleure.Common;
+using ARMeilleure.Decoders;
+using ARMeilleure.Diagnostics;
+using ARMeilleure.Instructions;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Memory;
+using ARMeilleure.Signal;
+using ARMeilleure.State;
+using ARMeilleure.Translation.Cache;
+using ARMeilleure.Translation.PTC;
+using Ryujinx.Common;
+using System;
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Runtime.InteropServices;
+using System.Threading;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Translation
+{
+ public class Translator
+ {
+ private static readonly AddressTable<ulong>.Level[] Levels64Bit =
+ new AddressTable<ulong>.Level[]
+ {
+ new(31, 17),
+ new(23, 8),
+ new(15, 8),
+ new( 7, 8),
+ new( 2, 5)
+ };
+
+ private static readonly AddressTable<ulong>.Level[] Levels32Bit =
+ new AddressTable<ulong>.Level[]
+ {
+ new(31, 17),
+ new(23, 8),
+ new(15, 8),
+ new( 7, 8),
+ new( 1, 6)
+ };
+
+ private readonly IJitMemoryAllocator _allocator;
+ private readonly ConcurrentQueue<KeyValuePair<ulong, TranslatedFunction>> _oldFuncs;
+
+ private readonly Ptc _ptc;
+
+ internal TranslatorCache<TranslatedFunction> Functions { get; }
+ internal AddressTable<ulong> FunctionTable { get; }
+ internal EntryTable<uint> CountTable { get; }
+ internal TranslatorStubs Stubs { get; }
+ internal TranslatorQueue Queue { get; }
+ internal IMemoryManager Memory { get; }
+
+ private volatile int _threadCount;
+
+ // FIXME: Remove this once the init logic of the emulator will be redone.
+ public static readonly ManualResetEvent IsReadyForTranslation = new(false);
+
+ public Translator(IJitMemoryAllocator allocator, IMemoryManager memory, bool for64Bits)
+ {
+ _allocator = allocator;
+ Memory = memory;
+
+ _oldFuncs = new ConcurrentQueue<KeyValuePair<ulong, TranslatedFunction>>();
+
+ _ptc = new Ptc();
+
+ Queue = new TranslatorQueue();
+
+ JitCache.Initialize(allocator);
+
+ CountTable = new EntryTable<uint>();
+ Functions = new TranslatorCache<TranslatedFunction>();
+ FunctionTable = new AddressTable<ulong>(for64Bits ? Levels64Bit : Levels32Bit);
+ Stubs = new TranslatorStubs(this);
+
+ FunctionTable.Fill = (ulong)Stubs.SlowDispatchStub;
+
+ if (memory.Type.IsHostMapped())
+ {
+ NativeSignalHandler.InitializeSignalHandler(allocator.GetPageSize());
+ }
+ }
+
+ public IPtcLoadState LoadDiskCache(string titleIdText, string displayVersion, bool enabled)
+ {
+ _ptc.Initialize(titleIdText, displayVersion, enabled, Memory.Type);
+ return _ptc;
+ }
+
+ public void PrepareCodeRange(ulong address, ulong size)
+ {
+ if (_ptc.Profiler.StaticCodeSize == 0)
+ {
+ _ptc.Profiler.StaticCodeStart = address;
+ _ptc.Profiler.StaticCodeSize = size;
+ }
+ }
+
+ public void Execute(State.ExecutionContext context, ulong address)
+ {
+ if (Interlocked.Increment(ref _threadCount) == 1)
+ {
+ IsReadyForTranslation.WaitOne();
+
+ if (_ptc.State == PtcState.Enabled)
+ {
+ Debug.Assert(Functions.Count == 0);
+ _ptc.LoadTranslations(this);
+ _ptc.MakeAndSaveTranslations(this);
+ }
+
+ _ptc.Profiler.Start();
+
+ _ptc.Disable();
+
+ // Simple heuristic, should be user configurable in future. (1 for 4 core/ht or less, 2 for 6 core + ht
+ // etc). All threads are normal priority except from the last, which just fills as much of the last core
+ // as the os lets it with a low priority. If we only have one rejit thread, it should be normal priority
+ // as highCq code is performance critical.
+ //
+ // TODO: Use physical cores rather than logical. This only really makes sense for processors with
+ // hyperthreading. Requires OS specific code.
+ int unboundedThreadCount = Math.Max(1, (Environment.ProcessorCount - 6) / 3);
+ int threadCount = Math.Min(4, unboundedThreadCount);
+
+ for (int i = 0; i < threadCount; i++)
+ {
+ bool last = i != 0 && i == unboundedThreadCount - 1;
+
+ Thread backgroundTranslatorThread = new Thread(BackgroundTranslate)
+ {
+ Name = "CPU.BackgroundTranslatorThread." + i,
+ Priority = last ? ThreadPriority.Lowest : ThreadPriority.Normal
+ };
+
+ backgroundTranslatorThread.Start();
+ }
+ }
+
+ Statistics.InitializeTimer();
+
+ NativeInterface.RegisterThread(context, Memory, this);
+
+ if (Optimizations.UseUnmanagedDispatchLoop)
+ {
+ Stubs.DispatchLoop(context.NativeContextPtr, address);
+ }
+ else
+ {
+ do
+ {
+ address = ExecuteSingle(context, address);
+ }
+ while (context.Running && address != 0);
+ }
+
+ NativeInterface.UnregisterThread();
+
+ if (Interlocked.Decrement(ref _threadCount) == 0)
+ {
+ ClearJitCache();
+
+ Queue.Dispose();
+ Stubs.Dispose();
+ FunctionTable.Dispose();
+ CountTable.Dispose();
+
+ _ptc.Close();
+ _ptc.Profiler.Stop();
+
+ _ptc.Dispose();
+ _ptc.Profiler.Dispose();
+ }
+ }
+
+ private ulong ExecuteSingle(State.ExecutionContext context, ulong address)
+ {
+ TranslatedFunction func = GetOrTranslate(address, context.ExecutionMode);
+
+ Statistics.StartTimer();
+
+ ulong nextAddr = func.Execute(Stubs.ContextWrapper, context);
+
+ Statistics.StopTimer(address);
+
+ return nextAddr;
+ }
+
+ public ulong Step(State.ExecutionContext context, ulong address)
+ {
+ TranslatedFunction func = Translate(address, context.ExecutionMode, highCq: false, singleStep: true);
+
+ address = func.Execute(Stubs.ContextWrapper, context);
+
+ EnqueueForDeletion(address, func);
+
+ return address;
+ }
+
+ internal TranslatedFunction GetOrTranslate(ulong address, ExecutionMode mode)
+ {
+ if (!Functions.TryGetValue(address, out TranslatedFunction func))
+ {
+ func = Translate(address, mode, highCq: false);
+
+ TranslatedFunction oldFunc = Functions.GetOrAdd(address, func.GuestSize, func);
+
+ if (oldFunc != func)
+ {
+ JitCache.Unmap(func.FuncPointer);
+ func = oldFunc;
+ }
+
+ if (_ptc.Profiler.Enabled)
+ {
+ _ptc.Profiler.AddEntry(address, mode, highCq: false);
+ }
+
+ RegisterFunction(address, func);
+ }
+
+ return func;
+ }
+
+ internal void RegisterFunction(ulong guestAddress, TranslatedFunction func)
+ {
+ if (FunctionTable.IsValid(guestAddress) && (Optimizations.AllowLcqInFunctionTable || func.HighCq))
+ {
+ Volatile.Write(ref FunctionTable.GetValue(guestAddress), (ulong)func.FuncPointer);
+ }
+ }
+
+ internal TranslatedFunction Translate(ulong address, ExecutionMode mode, bool highCq, bool singleStep = false)
+ {
+ var context = new ArmEmitterContext(
+ Memory,
+ CountTable,
+ FunctionTable,
+ Stubs,
+ address,
+ highCq,
+ _ptc.State != PtcState.Disabled,
+ mode: Aarch32Mode.User);
+
+ Logger.StartPass(PassName.Decoding);
+
+ Block[] blocks = Decoder.Decode(Memory, address, mode, highCq, singleStep ? DecoderMode.SingleInstruction : DecoderMode.MultipleBlocks);
+
+ Logger.EndPass(PassName.Decoding);
+
+ Logger.StartPass(PassName.Translation);
+
+ EmitSynchronization(context);
+
+ if (blocks[0].Address != address)
+ {
+ context.Branch(context.GetLabel(address));
+ }
+
+ ControlFlowGraph cfg = EmitAndGetCFG(context, blocks, out Range funcRange, out Counter<uint> counter);
+
+ ulong funcSize = funcRange.End - funcRange.Start;
+
+ Logger.EndPass(PassName.Translation, cfg);
+
+ Logger.StartPass(PassName.RegisterUsage);
+
+ RegisterUsage.RunPass(cfg, mode);
+
+ Logger.EndPass(PassName.RegisterUsage);
+
+ var retType = OperandType.I64;
+ var argTypes = new OperandType[] { OperandType.I64 };
+
+ var options = highCq ? CompilerOptions.HighCq : CompilerOptions.None;
+
+ if (context.HasPtc && !singleStep)
+ {
+ options |= CompilerOptions.Relocatable;
+ }
+
+ CompiledFunction compiledFunc = Compiler.Compile(cfg, argTypes, retType, options, RuntimeInformation.ProcessArchitecture);
+
+ if (context.HasPtc && !singleStep)
+ {
+ Hash128 hash = Ptc.ComputeHash(Memory, address, funcSize);
+
+ _ptc.WriteCompiledFunction(address, funcSize, hash, highCq, compiledFunc);
+ }
+
+ GuestFunction func = compiledFunc.MapWithPointer<GuestFunction>(out IntPtr funcPointer);
+
+ Allocators.ResetAll();
+
+ return new TranslatedFunction(func, funcPointer, counter, funcSize, highCq);
+ }
+
+ private void BackgroundTranslate()
+ {
+ while (_threadCount != 0 && Queue.TryDequeue(out RejitRequest request))
+ {
+ TranslatedFunction func = Translate(request.Address, request.Mode, highCq: true);
+
+ Functions.AddOrUpdate(request.Address, func.GuestSize, func, (key, oldFunc) =>
+ {
+ EnqueueForDeletion(key, oldFunc);
+ return func;
+ });
+
+ if (_ptc.Profiler.Enabled)
+ {
+ _ptc.Profiler.UpdateEntry(request.Address, request.Mode, highCq: true);
+ }
+
+ RegisterFunction(request.Address, func);
+ }
+ }
+
+ private readonly struct Range
+ {
+ public ulong Start { get; }
+ public ulong End { get; }
+
+ public Range(ulong start, ulong end)
+ {
+ Start = start;
+ End = end;
+ }
+ }
+
+ private static ControlFlowGraph EmitAndGetCFG(
+ ArmEmitterContext context,
+ Block[] blocks,
+ out Range range,
+ out Counter<uint> counter)
+ {
+ counter = null;
+
+ ulong rangeStart = ulong.MaxValue;
+ ulong rangeEnd = 0;
+
+ for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
+ {
+ Block block = blocks[blkIndex];
+
+ if (!block.Exit)
+ {
+ if (rangeStart > block.Address)
+ {
+ rangeStart = block.Address;
+ }
+
+ if (rangeEnd < block.EndAddress)
+ {
+ rangeEnd = block.EndAddress;
+ }
+ }
+
+ if (block.Address == context.EntryAddress)
+ {
+ if (!context.HighCq)
+ {
+ EmitRejitCheck(context, out counter);
+ }
+
+ context.ClearQcFlag();
+ }
+
+ context.CurrBlock = block;
+
+ context.MarkLabel(context.GetLabel(block.Address));
+
+ if (block.Exit)
+ {
+ // Left option here as it may be useful if we need to return to managed rather than tail call in
+ // future. (eg. for debug)
+ bool useReturns = false;
+
+ InstEmitFlowHelper.EmitVirtualJump(context, Const(block.Address), isReturn: useReturns);
+ }
+ else
+ {
+ for (int opcIndex = 0; opcIndex < block.OpCodes.Count; opcIndex++)
+ {
+ OpCode opCode = block.OpCodes[opcIndex];
+
+ context.CurrOp = opCode;
+
+ bool isLastOp = opcIndex == block.OpCodes.Count - 1;
+
+ if (isLastOp)
+ {
+ context.SyncQcFlag();
+
+ if (block.Branch != null && !block.Branch.Exit && block.Branch.Address <= block.Address)
+ {
+ EmitSynchronization(context);
+ }
+ }
+
+ Operand lblPredicateSkip = default;
+
+ if (context.IsInIfThenBlock && context.CurrentIfThenBlockCond != Condition.Al)
+ {
+ lblPredicateSkip = Label();
+
+ InstEmitFlowHelper.EmitCondBranch(context, lblPredicateSkip, context.CurrentIfThenBlockCond.Invert());
+ }
+
+ if (opCode is OpCode32 op && op.Cond < Condition.Al)
+ {
+ lblPredicateSkip = Label();
+
+ InstEmitFlowHelper.EmitCondBranch(context, lblPredicateSkip, op.Cond.Invert());
+ }
+
+ if (opCode.Instruction.Emitter != null)
+ {
+ opCode.Instruction.Emitter(context);
+ }
+ else
+ {
+ throw new InvalidOperationException($"Invalid instruction \"{opCode.Instruction.Name}\".");
+ }
+
+ if (lblPredicateSkip != default)
+ {
+ context.MarkLabel(lblPredicateSkip);
+ }
+
+ if (context.IsInIfThenBlock && opCode.Instruction.Name != InstName.It)
+ {
+ context.AdvanceIfThenBlockState();
+ }
+ }
+ }
+ }
+
+ range = new Range(rangeStart, rangeEnd);
+
+ return context.GetControlFlowGraph();
+ }
+
+ internal static void EmitRejitCheck(ArmEmitterContext context, out Counter<uint> counter)
+ {
+ const int MinsCallForRejit = 100;
+
+ counter = new Counter<uint>(context.CountTable);
+
+ Operand lblEnd = Label();
+
+ Operand address = !context.HasPtc ?
+ Const(ref counter.Value) :
+ Const(ref counter.Value, Ptc.CountTableSymbol);
+
+ Operand curCount = context.Load(OperandType.I32, address);
+ Operand count = context.Add(curCount, Const(1));
+ context.Store(address, count);
+ context.BranchIf(lblEnd, curCount, Const(MinsCallForRejit), Comparison.NotEqual, BasicBlockFrequency.Cold);
+
+ context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.EnqueueForRejit)), Const(context.EntryAddress));
+
+ context.MarkLabel(lblEnd);
+ }
+
+ internal static void EmitSynchronization(EmitterContext context)
+ {
+ long countOffs = NativeContext.GetCounterOffset();
+
+ Operand lblNonZero = Label();
+ Operand lblExit = Label();
+
+ Operand countAddr = context.Add(context.LoadArgument(OperandType.I64, 0), Const(countOffs));
+ Operand count = context.Load(OperandType.I32, countAddr);
+ context.BranchIfTrue(lblNonZero, count, BasicBlockFrequency.Cold);
+
+ Operand running = context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.CheckSynchronization)));
+ context.BranchIfTrue(lblExit, running, BasicBlockFrequency.Cold);
+
+ context.Return(Const(0L));
+
+ context.MarkLabel(lblNonZero);
+ count = context.Subtract(count, Const(1));
+ context.Store(countAddr, count);
+
+ context.MarkLabel(lblExit);
+ }
+
+ public void InvalidateJitCacheRegion(ulong address, ulong size)
+ {
+ ulong[] overlapAddresses = Array.Empty<ulong>();
+
+ int overlapsCount = Functions.GetOverlaps(address, size, ref overlapAddresses);
+
+ if (overlapsCount != 0)
+ {
+ // If rejit is running, stop it as it may be trying to rejit a function on the invalidated region.
+ ClearRejitQueue(allowRequeue: true);
+ }
+
+ for (int index = 0; index < overlapsCount; index++)
+ {
+ ulong overlapAddress = overlapAddresses[index];
+
+ if (Functions.TryGetValue(overlapAddress, out TranslatedFunction overlap))
+ {
+ Functions.Remove(overlapAddress);
+ Volatile.Write(ref FunctionTable.GetValue(overlapAddress), FunctionTable.Fill);
+ EnqueueForDeletion(overlapAddress, overlap);
+ }
+ }
+
+ // TODO: Remove overlapping functions from the JitCache aswell.
+ // This should be done safely, with a mechanism to ensure the function is not being executed.
+ }
+
+ internal void EnqueueForRejit(ulong guestAddress, ExecutionMode mode)
+ {
+ Queue.Enqueue(guestAddress, mode);
+ }
+
+ private void EnqueueForDeletion(ulong guestAddress, TranslatedFunction func)
+ {
+ _oldFuncs.Enqueue(new(guestAddress, func));
+ }
+
+ private void ClearJitCache()
+ {
+ // Ensure no attempt will be made to compile new functions due to rejit.
+ ClearRejitQueue(allowRequeue: false);
+
+ List<TranslatedFunction> functions = Functions.AsList();
+
+ foreach (var func in functions)
+ {
+ JitCache.Unmap(func.FuncPointer);
+
+ func.CallCounter?.Dispose();
+ }
+
+ Functions.Clear();
+
+ while (_oldFuncs.TryDequeue(out var kv))
+ {
+ JitCache.Unmap(kv.Value.FuncPointer);
+
+ kv.Value.CallCounter?.Dispose();
+ }
+ }
+
+ private void ClearRejitQueue(bool allowRequeue)
+ {
+ if (!allowRequeue)
+ {
+ Queue.Clear();
+
+ return;
+ }
+
+ lock (Queue.Sync)
+ {
+ while (Queue.Count > 0 && Queue.TryDequeue(out RejitRequest request))
+ {
+ if (Functions.TryGetValue(request.Address, out var func) && func.CallCounter != null)
+ {
+ Volatile.Write(ref func.CallCounter.Value, 0);
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Translation/TranslatorCache.cs b/src/ARMeilleure/Translation/TranslatorCache.cs
new file mode 100644
index 00000000..11286381
--- /dev/null
+++ b/src/ARMeilleure/Translation/TranslatorCache.cs
@@ -0,0 +1,95 @@
+using System;
+using System.Collections.Generic;
+using System.Threading;
+
+namespace ARMeilleure.Translation
+{
+ internal class TranslatorCache<T>
+ {
+ private readonly IntervalTree<ulong, T> _tree;
+ private readonly ReaderWriterLock _treeLock;
+
+ public int Count => _tree.Count;
+
+ public TranslatorCache()
+ {
+ _tree = new IntervalTree<ulong, T>();
+ _treeLock = new ReaderWriterLock();
+ }
+
+ public bool TryAdd(ulong address, ulong size, T value)
+ {
+ return AddOrUpdate(address, size, value, null);
+ }
+
+ public bool AddOrUpdate(ulong address, ulong size, T value, Func<ulong, T, T> updateFactoryCallback)
+ {
+ _treeLock.AcquireWriterLock(Timeout.Infinite);
+ bool result = _tree.AddOrUpdate(address, address + size, value, updateFactoryCallback);
+ _treeLock.ReleaseWriterLock();
+
+ return result;
+ }
+
+ public T GetOrAdd(ulong address, ulong size, T value)
+ {
+ _treeLock.AcquireWriterLock(Timeout.Infinite);
+ value = _tree.GetOrAdd(address, address + size, value);
+ _treeLock.ReleaseWriterLock();
+
+ return value;
+ }
+
+ public bool Remove(ulong address)
+ {
+ _treeLock.AcquireWriterLock(Timeout.Infinite);
+ bool removed = _tree.Remove(address) != 0;
+ _treeLock.ReleaseWriterLock();
+
+ return removed;
+ }
+
+ public void Clear()
+ {
+ _treeLock.AcquireWriterLock(Timeout.Infinite);
+ _tree.Clear();
+ _treeLock.ReleaseWriterLock();
+ }
+
+ public bool ContainsKey(ulong address)
+ {
+ _treeLock.AcquireReaderLock(Timeout.Infinite);
+ bool result = _tree.ContainsKey(address);
+ _treeLock.ReleaseReaderLock();
+
+ return result;
+ }
+
+ public bool TryGetValue(ulong address, out T value)
+ {
+ _treeLock.AcquireReaderLock(Timeout.Infinite);
+ bool result = _tree.TryGet(address, out value);
+ _treeLock.ReleaseReaderLock();
+
+ return result;
+ }
+
+ public int GetOverlaps(ulong address, ulong size, ref ulong[] overlaps)
+ {
+ _treeLock.AcquireReaderLock(Timeout.Infinite);
+ int count = _tree.Get(address, address + size, ref overlaps);
+ _treeLock.ReleaseReaderLock();
+
+ return count;
+ }
+
+ public List<T> AsList()
+ {
+ _treeLock.AcquireReaderLock(Timeout.Infinite);
+ List<T> list = _tree.AsList();
+ _treeLock.ReleaseReaderLock();
+
+ return list;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Translation/TranslatorQueue.cs b/src/ARMeilleure/Translation/TranslatorQueue.cs
new file mode 100644
index 00000000..fc0aa64f
--- /dev/null
+++ b/src/ARMeilleure/Translation/TranslatorQueue.cs
@@ -0,0 +1,121 @@
+using ARMeilleure.Diagnostics;
+using ARMeilleure.State;
+using System;
+using System.Collections.Generic;
+using System.Threading;
+
+namespace ARMeilleure.Translation
+{
+ /// <summary>
+ /// Represents a queue of <see cref="RejitRequest"/>.
+ /// </summary>
+ /// <remarks>
+ /// This does not necessarily behave like a queue, i.e: a FIFO collection.
+ /// </remarks>
+ sealed class TranslatorQueue : IDisposable
+ {
+ private bool _disposed;
+ private readonly Stack<RejitRequest> _requests;
+ private readonly HashSet<ulong> _requestAddresses;
+
+ /// <summary>
+ /// Gets the object used to synchronize access to the <see cref="TranslatorQueue"/>.
+ /// </summary>
+ public object Sync { get; }
+
+ /// <summary>
+ /// Gets the number of requests in the <see cref="TranslatorQueue"/>.
+ /// </summary>
+ public int Count => _requests.Count;
+
+ /// <summary>
+ /// Initializes a new instance of the <see cref="TranslatorQueue"/> class.
+ /// </summary>
+ public TranslatorQueue()
+ {
+ Sync = new object();
+
+ _requests = new Stack<RejitRequest>();
+ _requestAddresses = new HashSet<ulong>();
+ }
+
+ /// <summary>
+ /// Enqueues a request with the specified <paramref name="address"/> and <paramref name="mode"/>.
+ /// </summary>
+ /// <param name="address">Address of request</param>
+ /// <param name="mode"><see cref="ExecutionMode"/> of request</param>
+ public void Enqueue(ulong address, ExecutionMode mode)
+ {
+ lock (Sync)
+ {
+ if (_requestAddresses.Add(address))
+ {
+ _requests.Push(new RejitRequest(address, mode));
+
+ TranslatorEventSource.Log.RejitQueueAdd(1);
+
+ Monitor.Pulse(Sync);
+ }
+ }
+ }
+
+ /// <summary>
+ /// Tries to dequeue a <see cref="RejitRequest"/>. This will block the thread until a <see cref="RejitRequest"/>
+ /// is enqueued or the <see cref="TranslatorQueue"/> is disposed.
+ /// </summary>
+ /// <param name="result"><see cref="RejitRequest"/> dequeued</param>
+ /// <returns><see langword="true"/> on success; otherwise <see langword="false"/></returns>
+ public bool TryDequeue(out RejitRequest result)
+ {
+ while (!_disposed)
+ {
+ lock (Sync)
+ {
+ if (_requests.TryPop(out result))
+ {
+ _requestAddresses.Remove(result.Address);
+
+ TranslatorEventSource.Log.RejitQueueAdd(-1);
+
+ return true;
+ }
+
+ Monitor.Wait(Sync);
+ }
+ }
+
+ result = default;
+
+ return false;
+ }
+
+ /// <summary>
+ /// Clears the <see cref="TranslatorQueue"/>.
+ /// </summary>
+ public void Clear()
+ {
+ lock (Sync)
+ {
+ TranslatorEventSource.Log.RejitQueueAdd(-_requests.Count);
+
+ _requests.Clear();
+ _requestAddresses.Clear();
+
+ Monitor.PulseAll(Sync);
+ }
+ }
+
+ /// <summary>
+ /// Releases all resources used by the <see cref="TranslatorQueue"/> instance.
+ /// </summary>
+ public void Dispose()
+ {
+ if (!_disposed)
+ {
+ _disposed = true;
+
+ Clear();
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Translation/TranslatorStubs.cs b/src/ARMeilleure/Translation/TranslatorStubs.cs
new file mode 100644
index 00000000..69648df4
--- /dev/null
+++ b/src/ARMeilleure/Translation/TranslatorStubs.cs
@@ -0,0 +1,312 @@
+using ARMeilleure.Instructions;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation.Cache;
+using System;
+using System.Reflection;
+using System.Runtime.InteropServices;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Translation
+{
+ /// <summary>
+ /// Represents a stub manager.
+ /// </summary>
+ class TranslatorStubs : IDisposable
+ {
+ private static readonly Lazy<IntPtr> _slowDispatchStub = new(GenerateSlowDispatchStub, isThreadSafe: true);
+
+ private bool _disposed;
+
+ private readonly Translator _translator;
+ private readonly Lazy<IntPtr> _dispatchStub;
+ private readonly Lazy<DispatcherFunction> _dispatchLoop;
+ private readonly Lazy<WrapperFunction> _contextWrapper;
+
+ /// <summary>
+ /// Gets the dispatch stub.
+ /// </summary>
+ /// <exception cref="ObjectDisposedException"><see cref="TranslatorStubs"/> instance was disposed</exception>
+ public IntPtr DispatchStub
+ {
+ get
+ {
+ ObjectDisposedException.ThrowIf(_disposed, this);
+
+ return _dispatchStub.Value;
+ }
+ }
+
+ /// <summary>
+ /// Gets the slow dispatch stub.
+ /// </summary>
+ /// <exception cref="ObjectDisposedException"><see cref="TranslatorStubs"/> instance was disposed</exception>
+ public IntPtr SlowDispatchStub
+ {
+ get
+ {
+ ObjectDisposedException.ThrowIf(_disposed, this);
+
+ return _slowDispatchStub.Value;
+ }
+ }
+
+ /// <summary>
+ /// Gets the dispatch loop function.
+ /// </summary>
+ /// <exception cref="ObjectDisposedException"><see cref="TranslatorStubs"/> instance was disposed</exception>
+ public DispatcherFunction DispatchLoop
+ {
+ get
+ {
+ ObjectDisposedException.ThrowIf(_disposed, this);
+
+ return _dispatchLoop.Value;
+ }
+ }
+
+ /// <summary>
+ /// Gets the context wrapper function.
+ /// </summary>
+ /// <exception cref="ObjectDisposedException"><see cref="TranslatorStubs"/> instance was disposed</exception>
+ public WrapperFunction ContextWrapper
+ {
+ get
+ {
+ ObjectDisposedException.ThrowIf(_disposed, this);
+
+ return _contextWrapper.Value;
+ }
+ }
+
+ /// <summary>
+ /// Initializes a new instance of the <see cref="TranslatorStubs"/> class with the specified
+ /// <see cref="Translator"/> instance.
+ /// </summary>
+ /// <param name="translator"><see cref="Translator"/> instance to use</param>
+ /// <exception cref="ArgumentNullException"><paramref name="translator"/> is null</exception>
+ public TranslatorStubs(Translator translator)
+ {
+ ArgumentNullException.ThrowIfNull(translator);
+
+ _translator = translator;
+ _dispatchStub = new(GenerateDispatchStub, isThreadSafe: true);
+ _dispatchLoop = new(GenerateDispatchLoop, isThreadSafe: true);
+ _contextWrapper = new(GenerateContextWrapper, isThreadSafe: true);
+ }
+
+ /// <summary>
+ /// Releases all resources used by the <see cref="TranslatorStubs"/> instance.
+ /// </summary>
+ public void Dispose()
+ {
+ Dispose(true);
+ GC.SuppressFinalize(this);
+ }
+
+ /// <summary>
+ /// Releases all unmanaged and optionally managed resources used by the <see cref="TranslatorStubs"/> instance.
+ /// </summary>
+ /// <param name="disposing"><see langword="true"/> to dispose managed resources also; otherwise just unmanaged resouces</param>
+ protected virtual void Dispose(bool disposing)
+ {
+ if (!_disposed)
+ {
+ if (_dispatchStub.IsValueCreated)
+ {
+ JitCache.Unmap(_dispatchStub.Value);
+ }
+
+ if (_dispatchLoop.IsValueCreated)
+ {
+ JitCache.Unmap(Marshal.GetFunctionPointerForDelegate(_dispatchLoop.Value));
+ }
+
+ _disposed = true;
+ }
+ }
+
+ /// <summary>
+ /// Frees resources used by the <see cref="TranslatorStubs"/> instance.
+ /// </summary>
+ ~TranslatorStubs()
+ {
+ Dispose(false);
+ }
+
+ /// <summary>
+ /// Generates a <see cref="DispatchStub"/>.
+ /// </summary>
+ /// <returns>Generated <see cref="DispatchStub"/></returns>
+ private IntPtr GenerateDispatchStub()
+ {
+ var context = new EmitterContext();
+
+ Operand lblFallback = Label();
+ Operand lblEnd = Label();
+
+ // Load the target guest address from the native context.
+ Operand nativeContext = context.LoadArgument(OperandType.I64, 0);
+ Operand guestAddress = context.Load(OperandType.I64,
+ context.Add(nativeContext, Const((ulong)NativeContext.GetDispatchAddressOffset())));
+
+ // Check if guest address is within range of the AddressTable.
+ Operand masked = context.BitwiseAnd(guestAddress, Const(~_translator.FunctionTable.Mask));
+ context.BranchIfTrue(lblFallback, masked);
+
+ Operand index = default;
+ Operand page = Const((long)_translator.FunctionTable.Base);
+
+ for (int i = 0; i < _translator.FunctionTable.Levels.Length; i++)
+ {
+ ref var level = ref _translator.FunctionTable.Levels[i];
+
+ // level.Mask is not used directly because it is more often bigger than 32-bits, so it will not
+ // be encoded as an immediate on x86's bitwise and operation.
+ Operand mask = Const(level.Mask >> level.Index);
+
+ index = context.BitwiseAnd(context.ShiftRightUI(guestAddress, Const(level.Index)), mask);
+
+ if (i < _translator.FunctionTable.Levels.Length - 1)
+ {
+ page = context.Load(OperandType.I64, context.Add(page, context.ShiftLeft(index, Const(3))));
+ context.BranchIfFalse(lblFallback, page);
+ }
+ }
+
+ Operand hostAddress;
+ Operand hostAddressAddr = context.Add(page, context.ShiftLeft(index, Const(3)));
+ hostAddress = context.Load(OperandType.I64, hostAddressAddr);
+ context.Tailcall(hostAddress, nativeContext);
+
+ context.MarkLabel(lblFallback);
+ hostAddress = context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetFunctionAddress)), guestAddress);
+ context.Tailcall(hostAddress, nativeContext);
+
+ var cfg = context.GetControlFlowGraph();
+ var retType = OperandType.I64;
+ var argTypes = new[] { OperandType.I64 };
+
+ var func = Compiler.Compile(cfg, argTypes, retType, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<GuestFunction>();
+
+ return Marshal.GetFunctionPointerForDelegate(func);
+ }
+
+ /// <summary>
+ /// Generates a <see cref="SlowDispatchStub"/>.
+ /// </summary>
+ /// <returns>Generated <see cref="SlowDispatchStub"/></returns>
+ private static IntPtr GenerateSlowDispatchStub()
+ {
+ var context = new EmitterContext();
+
+ // Load the target guest address from the native context.
+ Operand nativeContext = context.LoadArgument(OperandType.I64, 0);
+ Operand guestAddress = context.Load(OperandType.I64,
+ context.Add(nativeContext, Const((ulong)NativeContext.GetDispatchAddressOffset())));
+
+ MethodInfo getFuncAddress = typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetFunctionAddress));
+ Operand hostAddress = context.Call(getFuncAddress, guestAddress);
+ context.Tailcall(hostAddress, nativeContext);
+
+ var cfg = context.GetControlFlowGraph();
+ var retType = OperandType.I64;
+ var argTypes = new[] { OperandType.I64 };
+
+ var func = Compiler.Compile(cfg, argTypes, retType, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<GuestFunction>();
+
+ return Marshal.GetFunctionPointerForDelegate(func);
+ }
+
+ /// <summary>
+ /// Emits code that syncs FP state before executing guest code, or returns it to normal.
+ /// </summary>
+ /// <param name="context">Emitter context for the method</param>
+ /// <param name="nativeContext">Pointer to the native context</param>
+ /// <param name="enter">True if entering guest code, false otherwise</param>
+ private void EmitSyncFpContext(EmitterContext context, Operand nativeContext, bool enter)
+ {
+ if (enter)
+ {
+ InstEmitSimdHelper.EnterArmFpMode(context, (flag) =>
+ {
+ Operand flagAddress = context.Add(nativeContext, Const((ulong)NativeContext.GetRegisterOffset(new Register((int)flag, RegisterType.FpFlag))));
+ return context.Load(OperandType.I32, flagAddress);
+ });
+ }
+ else
+ {
+ InstEmitSimdHelper.ExitArmFpMode(context, (flag, value) =>
+ {
+ Operand flagAddress = context.Add(nativeContext, Const((ulong)NativeContext.GetRegisterOffset(new Register((int)flag, RegisterType.FpFlag))));
+ context.Store(flagAddress, value);
+ });
+ }
+ }
+
+ /// <summary>
+ /// Generates a <see cref="DispatchLoop"/> function.
+ /// </summary>
+ /// <returns><see cref="DispatchLoop"/> function</returns>
+ private DispatcherFunction GenerateDispatchLoop()
+ {
+ var context = new EmitterContext();
+
+ Operand beginLbl = Label();
+ Operand endLbl = Label();
+
+ Operand nativeContext = context.LoadArgument(OperandType.I64, 0);
+ Operand guestAddress = context.Copy(
+ context.AllocateLocal(OperandType.I64),
+ context.LoadArgument(OperandType.I64, 1));
+
+ Operand runningAddress = context.Add(nativeContext, Const((ulong)NativeContext.GetRunningOffset()));
+ Operand dispatchAddress = context.Add(nativeContext, Const((ulong)NativeContext.GetDispatchAddressOffset()));
+
+ EmitSyncFpContext(context, nativeContext, true);
+
+ context.MarkLabel(beginLbl);
+ context.Store(dispatchAddress, guestAddress);
+ context.Copy(guestAddress, context.Call(Const((ulong)DispatchStub), OperandType.I64, nativeContext));
+ context.BranchIfFalse(endLbl, guestAddress);
+ context.BranchIfFalse(endLbl, context.Load(OperandType.I32, runningAddress));
+ context.Branch(beginLbl);
+
+ context.MarkLabel(endLbl);
+
+ EmitSyncFpContext(context, nativeContext, false);
+
+ context.Return();
+
+ var cfg = context.GetControlFlowGraph();
+ var retType = OperandType.None;
+ var argTypes = new[] { OperandType.I64, OperandType.I64 };
+
+ return Compiler.Compile(cfg, argTypes, retType, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<DispatcherFunction>();
+ }
+
+ /// <summary>
+ /// Generates a <see cref="ContextWrapper"/> function.
+ /// </summary>
+ /// <returns><see cref="ContextWrapper"/> function</returns>
+ private WrapperFunction GenerateContextWrapper()
+ {
+ var context = new EmitterContext();
+
+ Operand nativeContext = context.LoadArgument(OperandType.I64, 0);
+ Operand guestMethod = context.LoadArgument(OperandType.I64, 1);
+
+ EmitSyncFpContext(context, nativeContext, true);
+ Operand returnValue = context.Call(guestMethod, OperandType.I64, nativeContext);
+ EmitSyncFpContext(context, nativeContext, false);
+
+ context.Return(returnValue);
+
+ var cfg = context.GetControlFlowGraph();
+ var retType = OperandType.I64;
+ var argTypes = new[] { OperandType.I64, OperandType.I64 };
+
+ return Compiler.Compile(cfg, argTypes, retType, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<WrapperFunction>();
+ }
+ }
+}
diff --git a/src/ARMeilleure/Translation/TranslatorTestMethods.cs b/src/ARMeilleure/Translation/TranslatorTestMethods.cs
new file mode 100644
index 00000000..ab96019a
--- /dev/null
+++ b/src/ARMeilleure/Translation/TranslatorTestMethods.cs
@@ -0,0 +1,148 @@
+using ARMeilleure.CodeGen.X86;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+using System.Runtime.InteropServices;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Translation
+{
+ public static class TranslatorTestMethods
+ {
+ public delegate int FpFlagsPInvokeTest(IntPtr managedMethod);
+
+ private static bool SetPlatformFtz(EmitterContext context, bool ftz)
+ {
+ if (Optimizations.UseSse2)
+ {
+ Operand mxcsr = context.AddIntrinsicInt(Intrinsic.X86Stmxcsr);
+
+ if (ftz)
+ {
+ mxcsr = context.BitwiseOr(mxcsr, Const((int)(Mxcsr.Ftz | Mxcsr.Um | Mxcsr.Dm)));
+ }
+ else
+ {
+ mxcsr = context.BitwiseAnd(mxcsr, Const(~(int)Mxcsr.Ftz));
+ }
+
+ context.AddIntrinsicNoRet(Intrinsic.X86Ldmxcsr, mxcsr);
+
+ return true;
+ }
+ else if (Optimizations.UseAdvSimd)
+ {
+ Operand fpcr = context.AddIntrinsicInt(Intrinsic.Arm64MrsFpcr);
+
+ if (ftz)
+ {
+ fpcr = context.BitwiseOr(fpcr, Const((int)FPCR.Fz));
+ }
+ else
+ {
+ fpcr = context.BitwiseAnd(fpcr, Const(~(int)FPCR.Fz));
+ }
+
+ context.AddIntrinsicNoRet(Intrinsic.Arm64MsrFpcr, fpcr);
+
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+ }
+
+ private static Operand FpBitsToInt(EmitterContext context, Operand fp)
+ {
+ Operand vec = context.VectorInsert(context.VectorZero(), fp, 0);
+ return context.VectorExtract(OperandType.I32, vec, 0);
+ }
+
+ public static FpFlagsPInvokeTest GenerateFpFlagsPInvokeTest()
+ {
+ EmitterContext context = new EmitterContext();
+
+ Operand methodAddress = context.Copy(context.LoadArgument(OperandType.I64, 0));
+
+ // Verify that default dotnet fp state does not flush to zero.
+ // This is required for SoftFloat to function.
+
+ // Denormal + zero != 0
+
+ Operand denormal = ConstF(BitConverter.Int32BitsToSingle(1)); // 1.40129846432e-45
+ Operand zeroF = ConstF(0f);
+ Operand zero = Const(0);
+
+ Operand result = context.Add(zeroF, denormal);
+
+ // Must not be zero.
+
+ Operand correct1Label = Label();
+
+ context.BranchIfFalse(correct1Label, context.ICompareEqual(FpBitsToInt(context, result), zero));
+
+ context.Return(Const(1));
+
+ context.MarkLabel(correct1Label);
+
+ // Set flush to zero flag. If unsupported by the backend, just return true.
+
+ if (!SetPlatformFtz(context, true))
+ {
+ context.Return(Const(0));
+ }
+
+ // Denormal + zero == 0
+
+ Operand resultFz = context.Add(zeroF, denormal);
+
+ // Must equal zero.
+
+ Operand correct2Label = Label();
+
+ context.BranchIfTrue(correct2Label, context.ICompareEqual(FpBitsToInt(context, resultFz), zero));
+
+ SetPlatformFtz(context, false);
+
+ context.Return(Const(2));
+
+ context.MarkLabel(correct2Label);
+
+ // Call a managed method. This method should not change Fz state.
+
+ context.Call(methodAddress, OperandType.None);
+
+ // Denormal + zero == 0
+
+ Operand resultFz2 = context.Add(zeroF, denormal);
+
+ // Must equal zero.
+
+ Operand correct3Label = Label();
+
+ context.BranchIfTrue(correct3Label, context.ICompareEqual(FpBitsToInt(context, resultFz2), zero));
+
+ SetPlatformFtz(context, false);
+
+ context.Return(Const(3));
+
+ context.MarkLabel(correct3Label);
+
+ // Success.
+
+ SetPlatformFtz(context, false);
+
+ context.Return(Const(0));
+
+ // Compile and return the function.
+
+ ControlFlowGraph cfg = context.GetControlFlowGraph();
+
+ OperandType[] argTypes = new OperandType[] { OperandType.I64 };
+
+ return Compiler.Compile(cfg, argTypes, OperandType.I32, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<FpFlagsPInvokeTest>();
+ }
+ }
+}