aboutsummaryrefslogtreecommitdiff
path: root/src/ARMeilleure/Translation/Translator.cs
diff options
context:
space:
mode:
Diffstat (limited to 'src/ARMeilleure/Translation/Translator.cs')
-rw-r--r--src/ARMeilleure/Translation/Translator.cs576
1 files changed, 576 insertions, 0 deletions
diff --git a/src/ARMeilleure/Translation/Translator.cs b/src/ARMeilleure/Translation/Translator.cs
new file mode 100644
index 00000000..f349c5eb
--- /dev/null
+++ b/src/ARMeilleure/Translation/Translator.cs
@@ -0,0 +1,576 @@
+using ARMeilleure.CodeGen;
+using ARMeilleure.Common;
+using ARMeilleure.Decoders;
+using ARMeilleure.Diagnostics;
+using ARMeilleure.Instructions;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Memory;
+using ARMeilleure.Signal;
+using ARMeilleure.State;
+using ARMeilleure.Translation.Cache;
+using ARMeilleure.Translation.PTC;
+using Ryujinx.Common;
+using System;
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Runtime.InteropServices;
+using System.Threading;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Translation
+{
+ public class Translator
+ {
+ private static readonly AddressTable<ulong>.Level[] Levels64Bit =
+ new AddressTable<ulong>.Level[]
+ {
+ new(31, 17),
+ new(23, 8),
+ new(15, 8),
+ new( 7, 8),
+ new( 2, 5)
+ };
+
+ private static readonly AddressTable<ulong>.Level[] Levels32Bit =
+ new AddressTable<ulong>.Level[]
+ {
+ new(31, 17),
+ new(23, 8),
+ new(15, 8),
+ new( 7, 8),
+ new( 1, 6)
+ };
+
+ private readonly IJitMemoryAllocator _allocator;
+ private readonly ConcurrentQueue<KeyValuePair<ulong, TranslatedFunction>> _oldFuncs;
+
+ private readonly Ptc _ptc;
+
+ internal TranslatorCache<TranslatedFunction> Functions { get; }
+ internal AddressTable<ulong> FunctionTable { get; }
+ internal EntryTable<uint> CountTable { get; }
+ internal TranslatorStubs Stubs { get; }
+ internal TranslatorQueue Queue { get; }
+ internal IMemoryManager Memory { get; }
+
+ private volatile int _threadCount;
+
+ // FIXME: Remove this once the init logic of the emulator will be redone.
+ public static readonly ManualResetEvent IsReadyForTranslation = new(false);
+
+ public Translator(IJitMemoryAllocator allocator, IMemoryManager memory, bool for64Bits)
+ {
+ _allocator = allocator;
+ Memory = memory;
+
+ _oldFuncs = new ConcurrentQueue<KeyValuePair<ulong, TranslatedFunction>>();
+
+ _ptc = new Ptc();
+
+ Queue = new TranslatorQueue();
+
+ JitCache.Initialize(allocator);
+
+ CountTable = new EntryTable<uint>();
+ Functions = new TranslatorCache<TranslatedFunction>();
+ FunctionTable = new AddressTable<ulong>(for64Bits ? Levels64Bit : Levels32Bit);
+ Stubs = new TranslatorStubs(this);
+
+ FunctionTable.Fill = (ulong)Stubs.SlowDispatchStub;
+
+ if (memory.Type.IsHostMapped())
+ {
+ NativeSignalHandler.InitializeSignalHandler(allocator.GetPageSize());
+ }
+ }
+
+ public IPtcLoadState LoadDiskCache(string titleIdText, string displayVersion, bool enabled)
+ {
+ _ptc.Initialize(titleIdText, displayVersion, enabled, Memory.Type);
+ return _ptc;
+ }
+
+ public void PrepareCodeRange(ulong address, ulong size)
+ {
+ if (_ptc.Profiler.StaticCodeSize == 0)
+ {
+ _ptc.Profiler.StaticCodeStart = address;
+ _ptc.Profiler.StaticCodeSize = size;
+ }
+ }
+
+ public void Execute(State.ExecutionContext context, ulong address)
+ {
+ if (Interlocked.Increment(ref _threadCount) == 1)
+ {
+ IsReadyForTranslation.WaitOne();
+
+ if (_ptc.State == PtcState.Enabled)
+ {
+ Debug.Assert(Functions.Count == 0);
+ _ptc.LoadTranslations(this);
+ _ptc.MakeAndSaveTranslations(this);
+ }
+
+ _ptc.Profiler.Start();
+
+ _ptc.Disable();
+
+ // Simple heuristic, should be user configurable in future. (1 for 4 core/ht or less, 2 for 6 core + ht
+ // etc). All threads are normal priority except from the last, which just fills as much of the last core
+ // as the os lets it with a low priority. If we only have one rejit thread, it should be normal priority
+ // as highCq code is performance critical.
+ //
+ // TODO: Use physical cores rather than logical. This only really makes sense for processors with
+ // hyperthreading. Requires OS specific code.
+ int unboundedThreadCount = Math.Max(1, (Environment.ProcessorCount - 6) / 3);
+ int threadCount = Math.Min(4, unboundedThreadCount);
+
+ for (int i = 0; i < threadCount; i++)
+ {
+ bool last = i != 0 && i == unboundedThreadCount - 1;
+
+ Thread backgroundTranslatorThread = new Thread(BackgroundTranslate)
+ {
+ Name = "CPU.BackgroundTranslatorThread." + i,
+ Priority = last ? ThreadPriority.Lowest : ThreadPriority.Normal
+ };
+
+ backgroundTranslatorThread.Start();
+ }
+ }
+
+ Statistics.InitializeTimer();
+
+ NativeInterface.RegisterThread(context, Memory, this);
+
+ if (Optimizations.UseUnmanagedDispatchLoop)
+ {
+ Stubs.DispatchLoop(context.NativeContextPtr, address);
+ }
+ else
+ {
+ do
+ {
+ address = ExecuteSingle(context, address);
+ }
+ while (context.Running && address != 0);
+ }
+
+ NativeInterface.UnregisterThread();
+
+ if (Interlocked.Decrement(ref _threadCount) == 0)
+ {
+ ClearJitCache();
+
+ Queue.Dispose();
+ Stubs.Dispose();
+ FunctionTable.Dispose();
+ CountTable.Dispose();
+
+ _ptc.Close();
+ _ptc.Profiler.Stop();
+
+ _ptc.Dispose();
+ _ptc.Profiler.Dispose();
+ }
+ }
+
+ private ulong ExecuteSingle(State.ExecutionContext context, ulong address)
+ {
+ TranslatedFunction func = GetOrTranslate(address, context.ExecutionMode);
+
+ Statistics.StartTimer();
+
+ ulong nextAddr = func.Execute(Stubs.ContextWrapper, context);
+
+ Statistics.StopTimer(address);
+
+ return nextAddr;
+ }
+
+ public ulong Step(State.ExecutionContext context, ulong address)
+ {
+ TranslatedFunction func = Translate(address, context.ExecutionMode, highCq: false, singleStep: true);
+
+ address = func.Execute(Stubs.ContextWrapper, context);
+
+ EnqueueForDeletion(address, func);
+
+ return address;
+ }
+
+ internal TranslatedFunction GetOrTranslate(ulong address, ExecutionMode mode)
+ {
+ if (!Functions.TryGetValue(address, out TranslatedFunction func))
+ {
+ func = Translate(address, mode, highCq: false);
+
+ TranslatedFunction oldFunc = Functions.GetOrAdd(address, func.GuestSize, func);
+
+ if (oldFunc != func)
+ {
+ JitCache.Unmap(func.FuncPointer);
+ func = oldFunc;
+ }
+
+ if (_ptc.Profiler.Enabled)
+ {
+ _ptc.Profiler.AddEntry(address, mode, highCq: false);
+ }
+
+ RegisterFunction(address, func);
+ }
+
+ return func;
+ }
+
+ internal void RegisterFunction(ulong guestAddress, TranslatedFunction func)
+ {
+ if (FunctionTable.IsValid(guestAddress) && (Optimizations.AllowLcqInFunctionTable || func.HighCq))
+ {
+ Volatile.Write(ref FunctionTable.GetValue(guestAddress), (ulong)func.FuncPointer);
+ }
+ }
+
+ internal TranslatedFunction Translate(ulong address, ExecutionMode mode, bool highCq, bool singleStep = false)
+ {
+ var context = new ArmEmitterContext(
+ Memory,
+ CountTable,
+ FunctionTable,
+ Stubs,
+ address,
+ highCq,
+ _ptc.State != PtcState.Disabled,
+ mode: Aarch32Mode.User);
+
+ Logger.StartPass(PassName.Decoding);
+
+ Block[] blocks = Decoder.Decode(Memory, address, mode, highCq, singleStep ? DecoderMode.SingleInstruction : DecoderMode.MultipleBlocks);
+
+ Logger.EndPass(PassName.Decoding);
+
+ Logger.StartPass(PassName.Translation);
+
+ EmitSynchronization(context);
+
+ if (blocks[0].Address != address)
+ {
+ context.Branch(context.GetLabel(address));
+ }
+
+ ControlFlowGraph cfg = EmitAndGetCFG(context, blocks, out Range funcRange, out Counter<uint> counter);
+
+ ulong funcSize = funcRange.End - funcRange.Start;
+
+ Logger.EndPass(PassName.Translation, cfg);
+
+ Logger.StartPass(PassName.RegisterUsage);
+
+ RegisterUsage.RunPass(cfg, mode);
+
+ Logger.EndPass(PassName.RegisterUsage);
+
+ var retType = OperandType.I64;
+ var argTypes = new OperandType[] { OperandType.I64 };
+
+ var options = highCq ? CompilerOptions.HighCq : CompilerOptions.None;
+
+ if (context.HasPtc && !singleStep)
+ {
+ options |= CompilerOptions.Relocatable;
+ }
+
+ CompiledFunction compiledFunc = Compiler.Compile(cfg, argTypes, retType, options, RuntimeInformation.ProcessArchitecture);
+
+ if (context.HasPtc && !singleStep)
+ {
+ Hash128 hash = Ptc.ComputeHash(Memory, address, funcSize);
+
+ _ptc.WriteCompiledFunction(address, funcSize, hash, highCq, compiledFunc);
+ }
+
+ GuestFunction func = compiledFunc.MapWithPointer<GuestFunction>(out IntPtr funcPointer);
+
+ Allocators.ResetAll();
+
+ return new TranslatedFunction(func, funcPointer, counter, funcSize, highCq);
+ }
+
+ private void BackgroundTranslate()
+ {
+ while (_threadCount != 0 && Queue.TryDequeue(out RejitRequest request))
+ {
+ TranslatedFunction func = Translate(request.Address, request.Mode, highCq: true);
+
+ Functions.AddOrUpdate(request.Address, func.GuestSize, func, (key, oldFunc) =>
+ {
+ EnqueueForDeletion(key, oldFunc);
+ return func;
+ });
+
+ if (_ptc.Profiler.Enabled)
+ {
+ _ptc.Profiler.UpdateEntry(request.Address, request.Mode, highCq: true);
+ }
+
+ RegisterFunction(request.Address, func);
+ }
+ }
+
+ private readonly struct Range
+ {
+ public ulong Start { get; }
+ public ulong End { get; }
+
+ public Range(ulong start, ulong end)
+ {
+ Start = start;
+ End = end;
+ }
+ }
+
+ private static ControlFlowGraph EmitAndGetCFG(
+ ArmEmitterContext context,
+ Block[] blocks,
+ out Range range,
+ out Counter<uint> counter)
+ {
+ counter = null;
+
+ ulong rangeStart = ulong.MaxValue;
+ ulong rangeEnd = 0;
+
+ for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
+ {
+ Block block = blocks[blkIndex];
+
+ if (!block.Exit)
+ {
+ if (rangeStart > block.Address)
+ {
+ rangeStart = block.Address;
+ }
+
+ if (rangeEnd < block.EndAddress)
+ {
+ rangeEnd = block.EndAddress;
+ }
+ }
+
+ if (block.Address == context.EntryAddress)
+ {
+ if (!context.HighCq)
+ {
+ EmitRejitCheck(context, out counter);
+ }
+
+ context.ClearQcFlag();
+ }
+
+ context.CurrBlock = block;
+
+ context.MarkLabel(context.GetLabel(block.Address));
+
+ if (block.Exit)
+ {
+ // Left option here as it may be useful if we need to return to managed rather than tail call in
+ // future. (eg. for debug)
+ bool useReturns = false;
+
+ InstEmitFlowHelper.EmitVirtualJump(context, Const(block.Address), isReturn: useReturns);
+ }
+ else
+ {
+ for (int opcIndex = 0; opcIndex < block.OpCodes.Count; opcIndex++)
+ {
+ OpCode opCode = block.OpCodes[opcIndex];
+
+ context.CurrOp = opCode;
+
+ bool isLastOp = opcIndex == block.OpCodes.Count - 1;
+
+ if (isLastOp)
+ {
+ context.SyncQcFlag();
+
+ if (block.Branch != null && !block.Branch.Exit && block.Branch.Address <= block.Address)
+ {
+ EmitSynchronization(context);
+ }
+ }
+
+ Operand lblPredicateSkip = default;
+
+ if (context.IsInIfThenBlock && context.CurrentIfThenBlockCond != Condition.Al)
+ {
+ lblPredicateSkip = Label();
+
+ InstEmitFlowHelper.EmitCondBranch(context, lblPredicateSkip, context.CurrentIfThenBlockCond.Invert());
+ }
+
+ if (opCode is OpCode32 op && op.Cond < Condition.Al)
+ {
+ lblPredicateSkip = Label();
+
+ InstEmitFlowHelper.EmitCondBranch(context, lblPredicateSkip, op.Cond.Invert());
+ }
+
+ if (opCode.Instruction.Emitter != null)
+ {
+ opCode.Instruction.Emitter(context);
+ }
+ else
+ {
+ throw new InvalidOperationException($"Invalid instruction \"{opCode.Instruction.Name}\".");
+ }
+
+ if (lblPredicateSkip != default)
+ {
+ context.MarkLabel(lblPredicateSkip);
+ }
+
+ if (context.IsInIfThenBlock && opCode.Instruction.Name != InstName.It)
+ {
+ context.AdvanceIfThenBlockState();
+ }
+ }
+ }
+ }
+
+ range = new Range(rangeStart, rangeEnd);
+
+ return context.GetControlFlowGraph();
+ }
+
+ internal static void EmitRejitCheck(ArmEmitterContext context, out Counter<uint> counter)
+ {
+ const int MinsCallForRejit = 100;
+
+ counter = new Counter<uint>(context.CountTable);
+
+ Operand lblEnd = Label();
+
+ Operand address = !context.HasPtc ?
+ Const(ref counter.Value) :
+ Const(ref counter.Value, Ptc.CountTableSymbol);
+
+ Operand curCount = context.Load(OperandType.I32, address);
+ Operand count = context.Add(curCount, Const(1));
+ context.Store(address, count);
+ context.BranchIf(lblEnd, curCount, Const(MinsCallForRejit), Comparison.NotEqual, BasicBlockFrequency.Cold);
+
+ context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.EnqueueForRejit)), Const(context.EntryAddress));
+
+ context.MarkLabel(lblEnd);
+ }
+
+ internal static void EmitSynchronization(EmitterContext context)
+ {
+ long countOffs = NativeContext.GetCounterOffset();
+
+ Operand lblNonZero = Label();
+ Operand lblExit = Label();
+
+ Operand countAddr = context.Add(context.LoadArgument(OperandType.I64, 0), Const(countOffs));
+ Operand count = context.Load(OperandType.I32, countAddr);
+ context.BranchIfTrue(lblNonZero, count, BasicBlockFrequency.Cold);
+
+ Operand running = context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.CheckSynchronization)));
+ context.BranchIfTrue(lblExit, running, BasicBlockFrequency.Cold);
+
+ context.Return(Const(0L));
+
+ context.MarkLabel(lblNonZero);
+ count = context.Subtract(count, Const(1));
+ context.Store(countAddr, count);
+
+ context.MarkLabel(lblExit);
+ }
+
+ public void InvalidateJitCacheRegion(ulong address, ulong size)
+ {
+ ulong[] overlapAddresses = Array.Empty<ulong>();
+
+ int overlapsCount = Functions.GetOverlaps(address, size, ref overlapAddresses);
+
+ if (overlapsCount != 0)
+ {
+ // If rejit is running, stop it as it may be trying to rejit a function on the invalidated region.
+ ClearRejitQueue(allowRequeue: true);
+ }
+
+ for (int index = 0; index < overlapsCount; index++)
+ {
+ ulong overlapAddress = overlapAddresses[index];
+
+ if (Functions.TryGetValue(overlapAddress, out TranslatedFunction overlap))
+ {
+ Functions.Remove(overlapAddress);
+ Volatile.Write(ref FunctionTable.GetValue(overlapAddress), FunctionTable.Fill);
+ EnqueueForDeletion(overlapAddress, overlap);
+ }
+ }
+
+ // TODO: Remove overlapping functions from the JitCache aswell.
+ // This should be done safely, with a mechanism to ensure the function is not being executed.
+ }
+
+ internal void EnqueueForRejit(ulong guestAddress, ExecutionMode mode)
+ {
+ Queue.Enqueue(guestAddress, mode);
+ }
+
+ private void EnqueueForDeletion(ulong guestAddress, TranslatedFunction func)
+ {
+ _oldFuncs.Enqueue(new(guestAddress, func));
+ }
+
+ private void ClearJitCache()
+ {
+ // Ensure no attempt will be made to compile new functions due to rejit.
+ ClearRejitQueue(allowRequeue: false);
+
+ List<TranslatedFunction> functions = Functions.AsList();
+
+ foreach (var func in functions)
+ {
+ JitCache.Unmap(func.FuncPointer);
+
+ func.CallCounter?.Dispose();
+ }
+
+ Functions.Clear();
+
+ while (_oldFuncs.TryDequeue(out var kv))
+ {
+ JitCache.Unmap(kv.Value.FuncPointer);
+
+ kv.Value.CallCounter?.Dispose();
+ }
+ }
+
+ private void ClearRejitQueue(bool allowRequeue)
+ {
+ if (!allowRequeue)
+ {
+ Queue.Clear();
+
+ return;
+ }
+
+ lock (Queue.Sync)
+ {
+ while (Queue.Count > 0 && Queue.TryDequeue(out RejitRequest request))
+ {
+ if (Functions.TryGetValue(request.Address, out var func) && func.CallCounter != null)
+ {
+ Volatile.Write(ref func.CallCounter.Value, 0);
+ }
+ }
+ }
+ }
+ }
+}