aboutsummaryrefslogtreecommitdiff
path: root/ARMeilleure/Translation
diff options
context:
space:
mode:
Diffstat (limited to 'ARMeilleure/Translation')
-rw-r--r--ARMeilleure/Translation/ArmEmitterContext.cs15
-rw-r--r--ARMeilleure/Translation/DirectCallStubs.cs131
-rw-r--r--ARMeilleure/Translation/EmitterContext.cs17
-rw-r--r--ARMeilleure/Translation/JitCache.cs11
-rw-r--r--ARMeilleure/Translation/JumpTable.cs149
-rw-r--r--ARMeilleure/Translation/TranslatedFunction.cs9
-rw-r--r--ARMeilleure/Translation/Translator.cs48
7 files changed, 360 insertions, 20 deletions
diff --git a/ARMeilleure/Translation/ArmEmitterContext.cs b/ARMeilleure/Translation/ArmEmitterContext.cs
index d35e985e..d1a2c92d 100644
--- a/ARMeilleure/Translation/ArmEmitterContext.cs
+++ b/ARMeilleure/Translation/ArmEmitterContext.cs
@@ -41,10 +41,19 @@ namespace ARMeilleure.Translation
public Aarch32Mode Mode { get; }
- public ArmEmitterContext(MemoryManager memory, Aarch32Mode mode)
+ public JumpTable JumpTable { get; }
+
+ public long BaseAddress { get; }
+
+ public bool HighCq { get; }
+
+ public ArmEmitterContext(MemoryManager memory, JumpTable jumpTable, long baseAddress, bool highCq, Aarch32Mode mode)
{
- Memory = memory;
- Mode = mode;
+ Memory = memory;
+ JumpTable = jumpTable;
+ BaseAddress = baseAddress;
+ HighCq = highCq;
+ Mode = mode;
_labels = new Dictionary<ulong, Operand>();
}
diff --git a/ARMeilleure/Translation/DirectCallStubs.cs b/ARMeilleure/Translation/DirectCallStubs.cs
new file mode 100644
index 00000000..e6e87b2b
--- /dev/null
+++ b/ARMeilleure/Translation/DirectCallStubs.cs
@@ -0,0 +1,131 @@
+using ARMeilleure.Instructions;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using System;
+using System.Runtime.InteropServices;
+
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Translation
+{
+ static class DirectCallStubs
+ {
+ private delegate long GuestFunction(IntPtr nativeContextPtr);
+
+ private static GuestFunction _directCallStub;
+ private static GuestFunction _directTailCallStub;
+ private static GuestFunction _indirectCallStub;
+ private static GuestFunction _indirectTailCallStub;
+
+ private static object _lock;
+ private static bool _initialized;
+
+ static DirectCallStubs()
+ {
+ _lock = new object();
+ }
+
+ public static void InitializeStubs()
+ {
+ if (_initialized) return;
+ lock (_lock)
+ {
+ if (_initialized) return;
+ _directCallStub = GenerateDirectCallStub(false);
+ _directTailCallStub = GenerateDirectCallStub(true);
+ _indirectCallStub = GenerateIndirectCallStub(false);
+ _indirectTailCallStub = GenerateIndirectCallStub(true);
+ _initialized = true;
+ }
+ }
+
+ public static IntPtr DirectCallStub(bool tailCall)
+ {
+ return Marshal.GetFunctionPointerForDelegate(tailCall ? _directTailCallStub : _directCallStub);
+ }
+
+ public static IntPtr IndirectCallStub(bool tailCall)
+ {
+ return Marshal.GetFunctionPointerForDelegate(tailCall ? _indirectTailCallStub : _indirectCallStub);
+ }
+
+ private static void EmitCall(EmitterContext context, Operand address, bool tailCall)
+ {
+ if (tailCall)
+ {
+ context.Tailcall(address, context.LoadArgument(OperandType.I64, 0));
+ }
+ else
+ {
+ context.Return(context.Call(address, OperandType.I64, context.LoadArgument(OperandType.I64, 0)));
+ }
+ }
+
+ /// <summary>
+ /// Generates a stub that is used to find function addresses. Used for direct calls when their jump table does not have the host address yet.
+ /// Takes a NativeContext like a translated guest function, and extracts the target address from the NativeContext.
+ /// When the target function is compiled in highCq, all table entries are updated to point to that function instead of this stub by the translator.
+ /// </summary>
+ private static GuestFunction GenerateDirectCallStub(bool tailCall)
+ {
+ EmitterContext context = new EmitterContext();
+
+ Operand nativeContextPtr = context.LoadArgument(OperandType.I64, 0);
+
+ Operand address = context.Load(OperandType.I64, context.Add(nativeContextPtr, Const((long)NativeContext.GetCallAddressOffset())));
+
+ address = context.BitwiseOr(address, Const(address.Type, 1)); // Set call flag.
+ Operand functionAddr = context.Call(new _U64_U64(NativeInterface.GetFunctionAddress), address);
+ EmitCall(context, functionAddr, tailCall);
+
+ ControlFlowGraph cfg = context.GetControlFlowGraph();
+
+ OperandType[] argTypes = new OperandType[]
+ {
+ OperandType.I64
+ };
+
+ return Compiler.Compile<GuestFunction>(
+ cfg,
+ argTypes,
+ OperandType.I64,
+ CompilerOptions.HighCq);
+ }
+
+ /// <summary>
+ /// Generates a stub that is used to find function addresses and add them to an indirect table.
+ /// Used for indirect calls entries (already claimed) when their jump table does not have the host address yet.
+ /// Takes a NativeContext like a translated guest function, and extracts the target indirect table entry from the NativeContext.
+ /// If the function we find is highCq, the entry in the table is updated to point to that function rather than this stub.
+ /// </summary>
+ private static GuestFunction GenerateIndirectCallStub(bool tailCall)
+ {
+ EmitterContext context = new EmitterContext();
+
+ Operand nativeContextPtr = context.LoadArgument(OperandType.I64, 0);
+
+ Operand entryAddress = context.Load(OperandType.I64, context.Add(nativeContextPtr, Const((long)NativeContext.GetCallAddressOffset())));
+ Operand address = context.Load(OperandType.I64, entryAddress);
+
+ // We need to find the missing function. If the function is HighCq, then it replaces this stub in the indirect table.
+ // Either way, we call it afterwards.
+ Operand functionAddr = context.Call(new _U64_U64_U64(NativeInterface.GetIndirectFunctionAddress), address, entryAddress);
+
+ // Call and save the function.
+ EmitCall(context, functionAddr, tailCall);
+
+ ControlFlowGraph cfg = context.GetControlFlowGraph();
+
+ OperandType[] argTypes = new OperandType[]
+ {
+ OperandType.I64
+ };
+
+ return Compiler.Compile<GuestFunction>(
+ cfg,
+ argTypes,
+ OperandType.I64,
+ CompilerOptions.HighCq);
+ }
+ }
+}
diff --git a/ARMeilleure/Translation/EmitterContext.cs b/ARMeilleure/Translation/EmitterContext.cs
index a125a715..a11d25a6 100644
--- a/ARMeilleure/Translation/EmitterContext.cs
+++ b/ARMeilleure/Translation/EmitterContext.cs
@@ -143,9 +143,22 @@ namespace ARMeilleure.Translation
}
}
- public Operand CompareAndSwap128(Operand address, Operand expected, Operand desired)
+ public void Tailcall(Operand address, params Operand[] callArgs)
{
- return Add(Instruction.CompareAndSwap128, Local(OperandType.V128), address, expected, desired);
+ Operand[] args = new Operand[callArgs.Length + 1];
+
+ args[0] = address;
+
+ Array.Copy(callArgs, 0, args, 1, callArgs.Length);
+
+ Add(Instruction.Tailcall, null, args);
+
+ _needsNewBlock = true;
+ }
+
+ public Operand CompareAndSwap(Operand address, Operand expected, Operand desired)
+ {
+ return Add(Instruction.CompareAndSwap, Local(desired.Type), address, expected, desired);
}
public Operand ConditionalSelect(Operand op1, Operand op2, Operand op3)
diff --git a/ARMeilleure/Translation/JitCache.cs b/ARMeilleure/Translation/JitCache.cs
index 73f04a96..b004cc22 100644
--- a/ARMeilleure/Translation/JitCache.cs
+++ b/ARMeilleure/Translation/JitCache.cs
@@ -13,9 +13,11 @@ namespace ARMeilleure.Translation
private const int CodeAlignment = 4; // Bytes
- private const int CacheSize = 512 * 1024 * 1024;
+ private const int CacheSize = 2047 * 1024 * 1024;
- private static IntPtr _basePointer;
+ private static ReservedRegion _jitRegion;
+
+ private static IntPtr _basePointer => _jitRegion.Pointer;
private static int _offset;
@@ -25,10 +27,11 @@ namespace ARMeilleure.Translation
static JitCache()
{
- _basePointer = MemoryManagement.Allocate(CacheSize);
+ _jitRegion = new ReservedRegion(CacheSize);
if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
{
+ _jitRegion.ExpandIfNeeded(PageSize);
JitUnwindWindows.InstallFunctionTableHandler(_basePointer, CacheSize);
// The first page is used for the table based SEH structs.
@@ -97,6 +100,8 @@ namespace ARMeilleure.Translation
_offset += codeSize;
+ _jitRegion.ExpandIfNeeded((ulong)_offset);
+
if ((ulong)(uint)_offset > CacheSize)
{
throw new OutOfMemoryException();
diff --git a/ARMeilleure/Translation/JumpTable.cs b/ARMeilleure/Translation/JumpTable.cs
new file mode 100644
index 00000000..5cad2944
--- /dev/null
+++ b/ARMeilleure/Translation/JumpTable.cs
@@ -0,0 +1,149 @@
+using ARMeilleure.Memory;
+using System;
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+using System.Runtime.InteropServices;
+using System.Threading;
+
+namespace ARMeilleure.Translation
+{
+ class JumpTable
+ {
+ public static JumpTable Instance { get; }
+
+ static JumpTable()
+ {
+ Instance = new JumpTable();
+ }
+
+ // The jump table is a block of (guestAddress, hostAddress) function mappings.
+ // Each entry corresponds to one branch in a JIT compiled function. The entries are
+ // reserved specifically for each call.
+ // The _dependants dictionary can be used to update the hostAddress for any functions that change.
+
+ public const int JumpTableStride = 16; // 8 byte guest address, 8 byte host address
+
+ private const int JumpTableSize = 1048576;
+
+ private const int JumpTableByteSize = JumpTableSize * JumpTableStride;
+
+ // The dynamic table is also a block of (guestAddress, hostAddress) function mappings.
+ // The main difference is that indirect calls and jumps reserve _multiple_ entries on the table.
+ // These start out as all 0. When an indirect call is made, it tries to find the guest address on the table.
+
+ // If we get to an empty address, the guestAddress is set to the call that we want.
+
+ // If we get to a guestAddress that matches our own (or we just claimed it), the hostAddress is read.
+ // If it is non-zero, we immediately branch or call the host function.
+ // If it is 0, NativeInterface is called to find the rejited address of the call.
+ // If none is found, the hostAddress entry stays at 0. Otherwise, the new address is placed in the entry.
+
+ // If the table size is exhausted and we didn't find our desired address, we fall back to requesting
+ // the function from the JIT.
+
+ private const int DynamicTableSize = 1048576;
+
+ public const int DynamicTableElems = 1;
+
+ public const int DynamicTableStride = DynamicTableElems * JumpTableStride;
+
+ private const int DynamicTableByteSize = DynamicTableSize * JumpTableStride * DynamicTableElems;
+
+ private int _tableEnd = 0;
+ private int _dynTableEnd = 0;
+
+ private ConcurrentDictionary<ulong, TranslatedFunction> _targets;
+ private ConcurrentDictionary<ulong, LinkedList<int>> _dependants; // TODO: Attach to TranslatedFunction or a wrapper class.
+
+ private ReservedRegion _jumpRegion;
+ private ReservedRegion _dynamicRegion;
+ public IntPtr JumpPointer => _jumpRegion.Pointer;
+ public IntPtr DynamicPointer => _dynamicRegion.Pointer;
+
+ public JumpTable()
+ {
+ _jumpRegion = new ReservedRegion(JumpTableByteSize);
+ _dynamicRegion = new ReservedRegion(DynamicTableByteSize);
+
+ _targets = new ConcurrentDictionary<ulong, TranslatedFunction>();
+ _dependants = new ConcurrentDictionary<ulong, LinkedList<int>>();
+ }
+
+ public void RegisterFunction(ulong address, TranslatedFunction func) {
+ address &= ~3UL;
+ _targets.AddOrUpdate(address, func, (key, oldFunc) => func);
+ long funcPtr = func.GetPointer().ToInt64();
+
+ // Update all jump table entries that target this address.
+ LinkedList<int> myDependants;
+ if (_dependants.TryGetValue(address, out myDependants))
+ {
+ lock (myDependants)
+ {
+ foreach (var entry in myDependants)
+ {
+ IntPtr addr = _jumpRegion.Pointer + entry * JumpTableStride;
+ Marshal.WriteInt64(addr, 8, funcPtr);
+ }
+ }
+ }
+ }
+
+ public int ReserveDynamicEntry(bool isJump)
+ {
+ int entry = Interlocked.Increment(ref _dynTableEnd);
+ if (entry >= DynamicTableSize)
+ {
+ throw new OutOfMemoryException("JIT Dynamic Jump Table exhausted.");
+ }
+
+ _dynamicRegion.ExpandIfNeeded((ulong)((entry + 1) * DynamicTableStride));
+
+ // Initialize all host function pointers to the indirect call stub.
+
+ IntPtr addr = _dynamicRegion.Pointer + entry * DynamicTableStride;
+ long stubPtr = (long)DirectCallStubs.IndirectCallStub(isJump);
+
+ for (int i = 0; i < DynamicTableElems; i++)
+ {
+ Marshal.WriteInt64(addr, i * JumpTableStride + 8, stubPtr);
+ }
+
+ return entry;
+ }
+
+ public int ReserveTableEntry(long ownerAddress, long address, bool isJump)
+ {
+ int entry = Interlocked.Increment(ref _tableEnd);
+ if (entry >= JumpTableSize)
+ {
+ throw new OutOfMemoryException("JIT Direct Jump Table exhausted.");
+ }
+
+ _jumpRegion.ExpandIfNeeded((ulong)((entry + 1) * JumpTableStride));
+
+ // Is the address we have already registered? If so, put the function address in the jump table.
+ // If not, it will point to the direct call stub.
+ long value = (long)DirectCallStubs.DirectCallStub(isJump);
+ TranslatedFunction func;
+ if (_targets.TryGetValue((ulong)address, out func))
+ {
+ value = func.GetPointer().ToInt64();
+ }
+
+ // Make sure changes to the function at the target address update this jump table entry.
+ LinkedList<int> targetDependants = _dependants.GetOrAdd((ulong)address, (addr) => new LinkedList<int>());
+ lock (targetDependants)
+ {
+ targetDependants.AddLast(entry);
+ }
+
+ IntPtr addr = _jumpRegion.Pointer + entry * JumpTableStride;
+
+ Marshal.WriteInt64(addr, 0, address);
+ Marshal.WriteInt64(addr, 8, value);
+
+ return entry;
+ }
+ }
+}
diff --git a/ARMeilleure/Translation/TranslatedFunction.cs b/ARMeilleure/Translation/TranslatedFunction.cs
index 06069cf8..af01aaab 100644
--- a/ARMeilleure/Translation/TranslatedFunction.cs
+++ b/ARMeilleure/Translation/TranslatedFunction.cs
@@ -1,3 +1,5 @@
+using System;
+using System.Runtime.InteropServices;
using System.Threading;
namespace ARMeilleure.Translation
@@ -11,6 +13,8 @@ namespace ARMeilleure.Translation
private bool _rejit;
private int _callCount;
+ public bool HighCq => !_rejit;
+
public TranslatedFunction(GuestFunction func, bool rejit)
{
_func = func;
@@ -26,5 +30,10 @@ namespace ARMeilleure.Translation
{
return _rejit && Interlocked.Increment(ref _callCount) == MinCallsForRejit;
}
+
+ public IntPtr GetPointer()
+ {
+ return Marshal.GetFunctionPointerForDelegate(_func);
+ }
}
} \ No newline at end of file
diff --git a/ARMeilleure/Translation/Translator.cs b/ARMeilleure/Translation/Translator.cs
index 3008303e..9d534d58 100644
--- a/ARMeilleure/Translation/Translator.cs
+++ b/ARMeilleure/Translation/Translator.cs
@@ -16,10 +16,14 @@ namespace ARMeilleure.Translation
{
private const ulong CallFlag = InstEmitFlowHelper.CallFlag;
+ private const bool AlwaysTranslateFunctions = true; // If false, only translates a single block for lowCq.
+
private MemoryManager _memory;
private ConcurrentDictionary<ulong, TranslatedFunction> _funcs;
+ private JumpTable _jumpTable;
+
private PriorityQueue<RejitRequest> _backgroundQueue;
private AutoResetEvent _backgroundTranslatorEvent;
@@ -32,9 +36,13 @@ namespace ARMeilleure.Translation
_funcs = new ConcurrentDictionary<ulong, TranslatedFunction>();
+ _jumpTable = JumpTable.Instance;
+
_backgroundQueue = new PriorityQueue<RejitRequest>(2);
_backgroundTranslatorEvent = new AutoResetEvent(false);
+
+ DirectCallStubs.InitializeStubs();
}
private void TranslateQueuedSubs()
@@ -46,30 +54,42 @@ namespace ARMeilleure.Translation
TranslatedFunction func = Translate(request.Address, request.Mode, highCq: true);
_funcs.AddOrUpdate(request.Address, func, (key, oldFunc) => func);
+ _jumpTable.RegisterFunction(request.Address, func);
}
else
{
_backgroundTranslatorEvent.WaitOne();
}
}
+ _backgroundTranslatorEvent.Set(); // Wake up any other background translator threads, to encourage them to exit.
}
public void Execute(State.ExecutionContext context, ulong address)
{
if (Interlocked.Increment(ref _threadCount) == 1)
{
- Thread backgroundTranslatorThread = new Thread(TranslateQueuedSubs)
+ // Simple heuristic, should be user configurable in future. (1 for 4 core/ht or less, 2 for 6 core+ht etc).
+ // All threads are normal priority except from the last, which just fills as much of the last core as the os lets it with a low priority.
+ // If we only have one rejit thread, it should be normal priority as highCq code is performance critical.
+ // TODO: Use physical cores rather than logical. This only really makes sense for processors with hyperthreading. Requires OS specific code.
+ int unboundedThreadCount = Math.Max(1, (Environment.ProcessorCount - 6) / 3);
+ int threadCount = Math.Min(3, unboundedThreadCount);
+ for (int i = 0; i < threadCount; i++)
{
- Name = "CPU.BackgroundTranslatorThread",
- Priority = ThreadPriority.Lowest
- };
+ bool last = i != 0 && i == unboundedThreadCount - 1;
+ Thread backgroundTranslatorThread = new Thread(TranslateQueuedSubs)
+ {
+ Name = "CPU.BackgroundTranslatorThread." + i,
+ Priority = last ? ThreadPriority.Lowest : ThreadPriority.Normal
+ };
- backgroundTranslatorThread.Start();
+ backgroundTranslatorThread.Start();
+ }
}
Statistics.InitializeTimer();
- NativeInterface.RegisterThread(context, _memory);
+ NativeInterface.RegisterThread(context, _memory, this);
do
{
@@ -98,7 +118,7 @@ namespace ARMeilleure.Translation
return nextAddr;
}
- private TranslatedFunction GetOrTranslate(ulong address, ExecutionMode mode)
+ internal TranslatedFunction GetOrTranslate(ulong address, ExecutionMode mode)
{
// TODO: Investigate how we should handle code at unaligned addresses.
// Currently, those low bits are used to store special flags.
@@ -124,12 +144,12 @@ namespace ARMeilleure.Translation
private TranslatedFunction Translate(ulong address, ExecutionMode mode, bool highCq)
{
- ArmEmitterContext context = new ArmEmitterContext(_memory, Aarch32Mode.User);
+ ArmEmitterContext context = new ArmEmitterContext(_memory, _jumpTable, (long)address, highCq, Aarch32Mode.User);
Logger.StartPass(PassName.Decoding);
- Block[] blocks = highCq
- ? Decoder.DecodeFunction (_memory, address, mode)
+ Block[] blocks = AlwaysTranslateFunctions
+ ? Decoder.DecodeFunction (_memory, address, mode, highCq)
: Decoder.DecodeBasicBlock(_memory, address, mode);
Logger.EndPass(PassName.Decoding);
@@ -216,7 +236,7 @@ namespace ARMeilleure.Translation
// with some kind of branch).
if (isLastOp && block.Next == null)
{
- context.Return(Const(opCode.Address + (ulong)opCode.OpCodeSizeInBytes));
+ InstEmitFlowHelper.EmitTailContinue(context, Const(opCode.Address + (ulong)opCode.OpCodeSizeInBytes));
}
}
}
@@ -238,7 +258,11 @@ namespace ARMeilleure.Translation
context.BranchIfTrue(lblNonZero, count);
- context.Call(new _Void(NativeInterface.CheckSynchronization));
+ Operand running = context.Call(new _Bool(NativeInterface.CheckSynchronization));
+
+ context.BranchIfTrue(lblExit, running);
+
+ context.Return(Const(0L));
context.Branch(lblExit);