diff options
Diffstat (limited to 'ARMeilleure/Translation')
| -rw-r--r-- | ARMeilleure/Translation/ArmEmitterContext.cs | 15 | ||||
| -rw-r--r-- | ARMeilleure/Translation/DirectCallStubs.cs | 131 | ||||
| -rw-r--r-- | ARMeilleure/Translation/EmitterContext.cs | 17 | ||||
| -rw-r--r-- | ARMeilleure/Translation/JitCache.cs | 11 | ||||
| -rw-r--r-- | ARMeilleure/Translation/JumpTable.cs | 149 | ||||
| -rw-r--r-- | ARMeilleure/Translation/TranslatedFunction.cs | 9 | ||||
| -rw-r--r-- | ARMeilleure/Translation/Translator.cs | 48 |
7 files changed, 360 insertions, 20 deletions
diff --git a/ARMeilleure/Translation/ArmEmitterContext.cs b/ARMeilleure/Translation/ArmEmitterContext.cs index d35e985e..d1a2c92d 100644 --- a/ARMeilleure/Translation/ArmEmitterContext.cs +++ b/ARMeilleure/Translation/ArmEmitterContext.cs @@ -41,10 +41,19 @@ namespace ARMeilleure.Translation public Aarch32Mode Mode { get; } - public ArmEmitterContext(MemoryManager memory, Aarch32Mode mode) + public JumpTable JumpTable { get; } + + public long BaseAddress { get; } + + public bool HighCq { get; } + + public ArmEmitterContext(MemoryManager memory, JumpTable jumpTable, long baseAddress, bool highCq, Aarch32Mode mode) { - Memory = memory; - Mode = mode; + Memory = memory; + JumpTable = jumpTable; + BaseAddress = baseAddress; + HighCq = highCq; + Mode = mode; _labels = new Dictionary<ulong, Operand>(); } diff --git a/ARMeilleure/Translation/DirectCallStubs.cs b/ARMeilleure/Translation/DirectCallStubs.cs new file mode 100644 index 00000000..e6e87b2b --- /dev/null +++ b/ARMeilleure/Translation/DirectCallStubs.cs @@ -0,0 +1,131 @@ +using ARMeilleure.Instructions; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using System; +using System.Runtime.InteropServices; + +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Translation +{ + static class DirectCallStubs + { + private delegate long GuestFunction(IntPtr nativeContextPtr); + + private static GuestFunction _directCallStub; + private static GuestFunction _directTailCallStub; + private static GuestFunction _indirectCallStub; + private static GuestFunction _indirectTailCallStub; + + private static object _lock; + private static bool _initialized; + + static DirectCallStubs() + { + _lock = new object(); + } + + public static void InitializeStubs() + { + if (_initialized) return; + lock (_lock) + { + if (_initialized) return; + _directCallStub = GenerateDirectCallStub(false); + _directTailCallStub = GenerateDirectCallStub(true); + _indirectCallStub = GenerateIndirectCallStub(false); + _indirectTailCallStub = GenerateIndirectCallStub(true); + _initialized = true; + } + } + + public static IntPtr DirectCallStub(bool tailCall) + { + return Marshal.GetFunctionPointerForDelegate(tailCall ? _directTailCallStub : _directCallStub); + } + + public static IntPtr IndirectCallStub(bool tailCall) + { + return Marshal.GetFunctionPointerForDelegate(tailCall ? _indirectTailCallStub : _indirectCallStub); + } + + private static void EmitCall(EmitterContext context, Operand address, bool tailCall) + { + if (tailCall) + { + context.Tailcall(address, context.LoadArgument(OperandType.I64, 0)); + } + else + { + context.Return(context.Call(address, OperandType.I64, context.LoadArgument(OperandType.I64, 0))); + } + } + + /// <summary> + /// Generates a stub that is used to find function addresses. Used for direct calls when their jump table does not have the host address yet. + /// Takes a NativeContext like a translated guest function, and extracts the target address from the NativeContext. + /// When the target function is compiled in highCq, all table entries are updated to point to that function instead of this stub by the translator. + /// </summary> + private static GuestFunction GenerateDirectCallStub(bool tailCall) + { + EmitterContext context = new EmitterContext(); + + Operand nativeContextPtr = context.LoadArgument(OperandType.I64, 0); + + Operand address = context.Load(OperandType.I64, context.Add(nativeContextPtr, Const((long)NativeContext.GetCallAddressOffset()))); + + address = context.BitwiseOr(address, Const(address.Type, 1)); // Set call flag. + Operand functionAddr = context.Call(new _U64_U64(NativeInterface.GetFunctionAddress), address); + EmitCall(context, functionAddr, tailCall); + + ControlFlowGraph cfg = context.GetControlFlowGraph(); + + OperandType[] argTypes = new OperandType[] + { + OperandType.I64 + }; + + return Compiler.Compile<GuestFunction>( + cfg, + argTypes, + OperandType.I64, + CompilerOptions.HighCq); + } + + /// <summary> + /// Generates a stub that is used to find function addresses and add them to an indirect table. + /// Used for indirect calls entries (already claimed) when their jump table does not have the host address yet. + /// Takes a NativeContext like a translated guest function, and extracts the target indirect table entry from the NativeContext. + /// If the function we find is highCq, the entry in the table is updated to point to that function rather than this stub. + /// </summary> + private static GuestFunction GenerateIndirectCallStub(bool tailCall) + { + EmitterContext context = new EmitterContext(); + + Operand nativeContextPtr = context.LoadArgument(OperandType.I64, 0); + + Operand entryAddress = context.Load(OperandType.I64, context.Add(nativeContextPtr, Const((long)NativeContext.GetCallAddressOffset()))); + Operand address = context.Load(OperandType.I64, entryAddress); + + // We need to find the missing function. If the function is HighCq, then it replaces this stub in the indirect table. + // Either way, we call it afterwards. + Operand functionAddr = context.Call(new _U64_U64_U64(NativeInterface.GetIndirectFunctionAddress), address, entryAddress); + + // Call and save the function. + EmitCall(context, functionAddr, tailCall); + + ControlFlowGraph cfg = context.GetControlFlowGraph(); + + OperandType[] argTypes = new OperandType[] + { + OperandType.I64 + }; + + return Compiler.Compile<GuestFunction>( + cfg, + argTypes, + OperandType.I64, + CompilerOptions.HighCq); + } + } +} diff --git a/ARMeilleure/Translation/EmitterContext.cs b/ARMeilleure/Translation/EmitterContext.cs index a125a715..a11d25a6 100644 --- a/ARMeilleure/Translation/EmitterContext.cs +++ b/ARMeilleure/Translation/EmitterContext.cs @@ -143,9 +143,22 @@ namespace ARMeilleure.Translation } } - public Operand CompareAndSwap128(Operand address, Operand expected, Operand desired) + public void Tailcall(Operand address, params Operand[] callArgs) { - return Add(Instruction.CompareAndSwap128, Local(OperandType.V128), address, expected, desired); + Operand[] args = new Operand[callArgs.Length + 1]; + + args[0] = address; + + Array.Copy(callArgs, 0, args, 1, callArgs.Length); + + Add(Instruction.Tailcall, null, args); + + _needsNewBlock = true; + } + + public Operand CompareAndSwap(Operand address, Operand expected, Operand desired) + { + return Add(Instruction.CompareAndSwap, Local(desired.Type), address, expected, desired); } public Operand ConditionalSelect(Operand op1, Operand op2, Operand op3) diff --git a/ARMeilleure/Translation/JitCache.cs b/ARMeilleure/Translation/JitCache.cs index 73f04a96..b004cc22 100644 --- a/ARMeilleure/Translation/JitCache.cs +++ b/ARMeilleure/Translation/JitCache.cs @@ -13,9 +13,11 @@ namespace ARMeilleure.Translation private const int CodeAlignment = 4; // Bytes - private const int CacheSize = 512 * 1024 * 1024; + private const int CacheSize = 2047 * 1024 * 1024; - private static IntPtr _basePointer; + private static ReservedRegion _jitRegion; + + private static IntPtr _basePointer => _jitRegion.Pointer; private static int _offset; @@ -25,10 +27,11 @@ namespace ARMeilleure.Translation static JitCache() { - _basePointer = MemoryManagement.Allocate(CacheSize); + _jitRegion = new ReservedRegion(CacheSize); if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { + _jitRegion.ExpandIfNeeded(PageSize); JitUnwindWindows.InstallFunctionTableHandler(_basePointer, CacheSize); // The first page is used for the table based SEH structs. @@ -97,6 +100,8 @@ namespace ARMeilleure.Translation _offset += codeSize; + _jitRegion.ExpandIfNeeded((ulong)_offset); + if ((ulong)(uint)_offset > CacheSize) { throw new OutOfMemoryException(); diff --git a/ARMeilleure/Translation/JumpTable.cs b/ARMeilleure/Translation/JumpTable.cs new file mode 100644 index 00000000..5cad2944 --- /dev/null +++ b/ARMeilleure/Translation/JumpTable.cs @@ -0,0 +1,149 @@ +using ARMeilleure.Memory; +using System; +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System.Threading; + +namespace ARMeilleure.Translation +{ + class JumpTable + { + public static JumpTable Instance { get; } + + static JumpTable() + { + Instance = new JumpTable(); + } + + // The jump table is a block of (guestAddress, hostAddress) function mappings. + // Each entry corresponds to one branch in a JIT compiled function. The entries are + // reserved specifically for each call. + // The _dependants dictionary can be used to update the hostAddress for any functions that change. + + public const int JumpTableStride = 16; // 8 byte guest address, 8 byte host address + + private const int JumpTableSize = 1048576; + + private const int JumpTableByteSize = JumpTableSize * JumpTableStride; + + // The dynamic table is also a block of (guestAddress, hostAddress) function mappings. + // The main difference is that indirect calls and jumps reserve _multiple_ entries on the table. + // These start out as all 0. When an indirect call is made, it tries to find the guest address on the table. + + // If we get to an empty address, the guestAddress is set to the call that we want. + + // If we get to a guestAddress that matches our own (or we just claimed it), the hostAddress is read. + // If it is non-zero, we immediately branch or call the host function. + // If it is 0, NativeInterface is called to find the rejited address of the call. + // If none is found, the hostAddress entry stays at 0. Otherwise, the new address is placed in the entry. + + // If the table size is exhausted and we didn't find our desired address, we fall back to requesting + // the function from the JIT. + + private const int DynamicTableSize = 1048576; + + public const int DynamicTableElems = 1; + + public const int DynamicTableStride = DynamicTableElems * JumpTableStride; + + private const int DynamicTableByteSize = DynamicTableSize * JumpTableStride * DynamicTableElems; + + private int _tableEnd = 0; + private int _dynTableEnd = 0; + + private ConcurrentDictionary<ulong, TranslatedFunction> _targets; + private ConcurrentDictionary<ulong, LinkedList<int>> _dependants; // TODO: Attach to TranslatedFunction or a wrapper class. + + private ReservedRegion _jumpRegion; + private ReservedRegion _dynamicRegion; + public IntPtr JumpPointer => _jumpRegion.Pointer; + public IntPtr DynamicPointer => _dynamicRegion.Pointer; + + public JumpTable() + { + _jumpRegion = new ReservedRegion(JumpTableByteSize); + _dynamicRegion = new ReservedRegion(DynamicTableByteSize); + + _targets = new ConcurrentDictionary<ulong, TranslatedFunction>(); + _dependants = new ConcurrentDictionary<ulong, LinkedList<int>>(); + } + + public void RegisterFunction(ulong address, TranslatedFunction func) { + address &= ~3UL; + _targets.AddOrUpdate(address, func, (key, oldFunc) => func); + long funcPtr = func.GetPointer().ToInt64(); + + // Update all jump table entries that target this address. + LinkedList<int> myDependants; + if (_dependants.TryGetValue(address, out myDependants)) + { + lock (myDependants) + { + foreach (var entry in myDependants) + { + IntPtr addr = _jumpRegion.Pointer + entry * JumpTableStride; + Marshal.WriteInt64(addr, 8, funcPtr); + } + } + } + } + + public int ReserveDynamicEntry(bool isJump) + { + int entry = Interlocked.Increment(ref _dynTableEnd); + if (entry >= DynamicTableSize) + { + throw new OutOfMemoryException("JIT Dynamic Jump Table exhausted."); + } + + _dynamicRegion.ExpandIfNeeded((ulong)((entry + 1) * DynamicTableStride)); + + // Initialize all host function pointers to the indirect call stub. + + IntPtr addr = _dynamicRegion.Pointer + entry * DynamicTableStride; + long stubPtr = (long)DirectCallStubs.IndirectCallStub(isJump); + + for (int i = 0; i < DynamicTableElems; i++) + { + Marshal.WriteInt64(addr, i * JumpTableStride + 8, stubPtr); + } + + return entry; + } + + public int ReserveTableEntry(long ownerAddress, long address, bool isJump) + { + int entry = Interlocked.Increment(ref _tableEnd); + if (entry >= JumpTableSize) + { + throw new OutOfMemoryException("JIT Direct Jump Table exhausted."); + } + + _jumpRegion.ExpandIfNeeded((ulong)((entry + 1) * JumpTableStride)); + + // Is the address we have already registered? If so, put the function address in the jump table. + // If not, it will point to the direct call stub. + long value = (long)DirectCallStubs.DirectCallStub(isJump); + TranslatedFunction func; + if (_targets.TryGetValue((ulong)address, out func)) + { + value = func.GetPointer().ToInt64(); + } + + // Make sure changes to the function at the target address update this jump table entry. + LinkedList<int> targetDependants = _dependants.GetOrAdd((ulong)address, (addr) => new LinkedList<int>()); + lock (targetDependants) + { + targetDependants.AddLast(entry); + } + + IntPtr addr = _jumpRegion.Pointer + entry * JumpTableStride; + + Marshal.WriteInt64(addr, 0, address); + Marshal.WriteInt64(addr, 8, value); + + return entry; + } + } +} diff --git a/ARMeilleure/Translation/TranslatedFunction.cs b/ARMeilleure/Translation/TranslatedFunction.cs index 06069cf8..af01aaab 100644 --- a/ARMeilleure/Translation/TranslatedFunction.cs +++ b/ARMeilleure/Translation/TranslatedFunction.cs @@ -1,3 +1,5 @@ +using System; +using System.Runtime.InteropServices; using System.Threading; namespace ARMeilleure.Translation @@ -11,6 +13,8 @@ namespace ARMeilleure.Translation private bool _rejit; private int _callCount; + public bool HighCq => !_rejit; + public TranslatedFunction(GuestFunction func, bool rejit) { _func = func; @@ -26,5 +30,10 @@ namespace ARMeilleure.Translation { return _rejit && Interlocked.Increment(ref _callCount) == MinCallsForRejit; } + + public IntPtr GetPointer() + { + return Marshal.GetFunctionPointerForDelegate(_func); + } } }
\ No newline at end of file diff --git a/ARMeilleure/Translation/Translator.cs b/ARMeilleure/Translation/Translator.cs index 3008303e..9d534d58 100644 --- a/ARMeilleure/Translation/Translator.cs +++ b/ARMeilleure/Translation/Translator.cs @@ -16,10 +16,14 @@ namespace ARMeilleure.Translation { private const ulong CallFlag = InstEmitFlowHelper.CallFlag; + private const bool AlwaysTranslateFunctions = true; // If false, only translates a single block for lowCq. + private MemoryManager _memory; private ConcurrentDictionary<ulong, TranslatedFunction> _funcs; + private JumpTable _jumpTable; + private PriorityQueue<RejitRequest> _backgroundQueue; private AutoResetEvent _backgroundTranslatorEvent; @@ -32,9 +36,13 @@ namespace ARMeilleure.Translation _funcs = new ConcurrentDictionary<ulong, TranslatedFunction>(); + _jumpTable = JumpTable.Instance; + _backgroundQueue = new PriorityQueue<RejitRequest>(2); _backgroundTranslatorEvent = new AutoResetEvent(false); + + DirectCallStubs.InitializeStubs(); } private void TranslateQueuedSubs() @@ -46,30 +54,42 @@ namespace ARMeilleure.Translation TranslatedFunction func = Translate(request.Address, request.Mode, highCq: true); _funcs.AddOrUpdate(request.Address, func, (key, oldFunc) => func); + _jumpTable.RegisterFunction(request.Address, func); } else { _backgroundTranslatorEvent.WaitOne(); } } + _backgroundTranslatorEvent.Set(); // Wake up any other background translator threads, to encourage them to exit. } public void Execute(State.ExecutionContext context, ulong address) { if (Interlocked.Increment(ref _threadCount) == 1) { - Thread backgroundTranslatorThread = new Thread(TranslateQueuedSubs) + // Simple heuristic, should be user configurable in future. (1 for 4 core/ht or less, 2 for 6 core+ht etc). + // All threads are normal priority except from the last, which just fills as much of the last core as the os lets it with a low priority. + // If we only have one rejit thread, it should be normal priority as highCq code is performance critical. + // TODO: Use physical cores rather than logical. This only really makes sense for processors with hyperthreading. Requires OS specific code. + int unboundedThreadCount = Math.Max(1, (Environment.ProcessorCount - 6) / 3); + int threadCount = Math.Min(3, unboundedThreadCount); + for (int i = 0; i < threadCount; i++) { - Name = "CPU.BackgroundTranslatorThread", - Priority = ThreadPriority.Lowest - }; + bool last = i != 0 && i == unboundedThreadCount - 1; + Thread backgroundTranslatorThread = new Thread(TranslateQueuedSubs) + { + Name = "CPU.BackgroundTranslatorThread." + i, + Priority = last ? ThreadPriority.Lowest : ThreadPriority.Normal + }; - backgroundTranslatorThread.Start(); + backgroundTranslatorThread.Start(); + } } Statistics.InitializeTimer(); - NativeInterface.RegisterThread(context, _memory); + NativeInterface.RegisterThread(context, _memory, this); do { @@ -98,7 +118,7 @@ namespace ARMeilleure.Translation return nextAddr; } - private TranslatedFunction GetOrTranslate(ulong address, ExecutionMode mode) + internal TranslatedFunction GetOrTranslate(ulong address, ExecutionMode mode) { // TODO: Investigate how we should handle code at unaligned addresses. // Currently, those low bits are used to store special flags. @@ -124,12 +144,12 @@ namespace ARMeilleure.Translation private TranslatedFunction Translate(ulong address, ExecutionMode mode, bool highCq) { - ArmEmitterContext context = new ArmEmitterContext(_memory, Aarch32Mode.User); + ArmEmitterContext context = new ArmEmitterContext(_memory, _jumpTable, (long)address, highCq, Aarch32Mode.User); Logger.StartPass(PassName.Decoding); - Block[] blocks = highCq - ? Decoder.DecodeFunction (_memory, address, mode) + Block[] blocks = AlwaysTranslateFunctions + ? Decoder.DecodeFunction (_memory, address, mode, highCq) : Decoder.DecodeBasicBlock(_memory, address, mode); Logger.EndPass(PassName.Decoding); @@ -216,7 +236,7 @@ namespace ARMeilleure.Translation // with some kind of branch). if (isLastOp && block.Next == null) { - context.Return(Const(opCode.Address + (ulong)opCode.OpCodeSizeInBytes)); + InstEmitFlowHelper.EmitTailContinue(context, Const(opCode.Address + (ulong)opCode.OpCodeSizeInBytes)); } } } @@ -238,7 +258,11 @@ namespace ARMeilleure.Translation context.BranchIfTrue(lblNonZero, count); - context.Call(new _Void(NativeInterface.CheckSynchronization)); + Operand running = context.Call(new _Bool(NativeInterface.CheckSynchronization)); + + context.BranchIfTrue(lblExit, running); + + context.Return(Const(0L)); context.Branch(lblExit); |
