diff options
| author | FICTURE7 <FICTURE7@gmail.com> | 2021-08-17 22:08:34 +0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-08-17 15:08:34 -0300 |
| commit | 22b2cb39af00fb8881e908fd671fbf57a6e2db2a (patch) | |
| tree | a79e2df801d7f16a33ff50b3c5bfed303cb476e9 /ARMeilleure/Translation | |
| parent | cd4530f29c6a4ffd1b023105350b0440fa63f47b (diff) | |
Reduce JIT GC allocations (#2515)
* Turn `MemoryOperand` into a struct
* Remove `IntrinsicOperation`
* Remove `PhiNode`
* Remove `Node`
* Turn `Operand` into a struct
* Turn `Operation` into a struct
* Clean up pool management methods
* Add `Arena` allocator
* Move `OperationHelper` to `Operation.Factory`
* Move `OperandHelper` to `Operand.Factory`
* Optimize `Operation` a bit
* Fix `Arena` initialization
* Rename `NativeList<T>` to `ArenaList<T>`
* Reduce `Operand` size from 88 to 56 bytes
* Reduce `Operation` size from 56 to 40 bytes
* Add optimistic interning of Register & Constant operands
* Optimize `RegisterUsage` pass a bit
* Optimize `RemoveUnusedNodes` pass a bit
Iterating in reverse-order allows killing dependency chains in a single
pass.
* Fix PPTC symbols
* Optimize `BasicBlock` a bit
Reduce allocations from `_successor` & `DominanceFrontiers`
* Fix `Operation` resize
* Make `Arena` expandable
Change the arena allocator to be expandable by allocating in pages, with
some of them being pooled. Currently 32 pages are pooled. An LRU removal
mechanism should probably be added to it.
Apparently MHR can allocate bitmaps large enough to exceed the 16MB
limit for the type.
* Move `Arena` & `ArenaList` to `Common`
* Remove `ThreadStaticPool` & co
* Add `PhiOperation`
* Reduce `Operand` size from 56 from 48 bytes
* Add linear-probing to `Operand` intern table
* Optimize `HybridAllocator` a bit
* Add `Allocators` class
* Tune `ArenaAllocator` sizes
* Add page removal mechanism to `ArenaAllocator`
Remove pages which have not been used for more than 5s after each reset.
I am on fence if this would be better using a Gen2 callback object like
the one in System.Buffers.ArrayPool<T>, to trim the pool. Because right
now if a large translation happens, the pages will be freed only after a
reset. This reset may not happen for a while because no new translation
is hit, but the arena base sizes are rather small.
* Fix `OOM` when allocating larger than page size in `ArenaAllocator`
Tweak resizing mechanism for Operand.Uses and Assignemnts.
* Optimize `Optimizer` a bit
* Optimize `Operand.Add<T>/Remove<T>` a bit
* Clean up `PreAllocator`
* Fix phi insertion order
Reduce codegen diffs.
* Fix code alignment
* Use new heuristics for degree of parallelism
* Suppress warnings
* Address gdkchan's feedback
Renamed `GetValue()` to `GetValueUnsafe()` to make it more clear that
`Operand.Value` should usually not be modified directly.
* Add fast path to `ArenaAllocator`
* Assembly for `ArenaAllocator.Allocate(ulong)`:
.L0:
mov rax, [rcx+0x18]
lea r8, [rax+rdx]
cmp r8, [rcx+0x10]
ja short .L2
.L1:
mov rdx, [rcx+8]
add rax, [rdx+8]
mov [rcx+0x18], r8
ret
.L2:
jmp ArenaAllocator.AllocateSlow(UInt64)
A few variable/field had to be changed to ulong so that RyuJIT avoids
emitting zero-extends.
* Implement a new heuristic to free pooled pages.
If an arena is used often, it is more likely that its pages will be
needed, so the pages are kept for longer (e.g: during PPTC rebuild or
burst sof compilations). If is not used often, then it is more likely
that its pages will not be needed (e.g: after PPTC rebuild or bursts
of compilations).
* Address riperiperi's feedback
* Use `EqualityComparer<T>` in `IntrusiveList<T>`
Avoids a potential GC hole in `Equals(T, T)`.
Diffstat (limited to 'ARMeilleure/Translation')
| -rw-r--r-- | ARMeilleure/Translation/ArmEmitterContext.cs | 6 | ||||
| -rw-r--r-- | ARMeilleure/Translation/Compiler.cs | 2 | ||||
| -rw-r--r-- | ARMeilleure/Translation/ControlFlowGraph.cs | 19 | ||||
| -rw-r--r-- | ARMeilleure/Translation/EmitterContext.cs | 44 | ||||
| -rw-r--r-- | ARMeilleure/Translation/PTC/DegreeOfParallelism.cs | 50 | ||||
| -rw-r--r-- | ARMeilleure/Translation/PTC/Ptc.cs | 23 | ||||
| -rw-r--r-- | ARMeilleure/Translation/RegisterToLocal.cs | 6 | ||||
| -rw-r--r-- | ARMeilleure/Translation/RegisterUsage.cs | 220 | ||||
| -rw-r--r-- | ARMeilleure/Translation/SsaConstruction.cs | 110 | ||||
| -rw-r--r-- | ARMeilleure/Translation/SsaDeconstruction.cs | 28 | ||||
| -rw-r--r-- | ARMeilleure/Translation/Translator.cs | 43 | ||||
| -rw-r--r-- | ARMeilleure/Translation/TranslatorStubs.cs | 4 |
12 files changed, 241 insertions, 314 deletions
diff --git a/ARMeilleure/Translation/ArmEmitterContext.cs b/ARMeilleure/Translation/ArmEmitterContext.cs index 7a82b27b..563775ee 100644 --- a/ARMeilleure/Translation/ArmEmitterContext.cs +++ b/ARMeilleure/Translation/ArmEmitterContext.cs @@ -9,7 +9,7 @@ using ARMeilleure.Translation.PTC; using System; using System.Collections.Generic; using System.Reflection; -using static ARMeilleure.IntermediateRepresentation.OperandHelper; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; namespace ARMeilleure.Translation { @@ -138,7 +138,7 @@ namespace ARMeilleure.Translation { if (_optOpLastCompare == null || _optOpLastCompare != _optOpLastFlagSet) { - return null; + return default; } Operand n = _optCmpTempN; @@ -193,7 +193,7 @@ namespace ARMeilleure.Translation } } - return null; + return default; } } }
\ No newline at end of file diff --git a/ARMeilleure/Translation/Compiler.cs b/ARMeilleure/Translation/Compiler.cs index af718e21..812144a1 100644 --- a/ARMeilleure/Translation/Compiler.cs +++ b/ARMeilleure/Translation/Compiler.cs @@ -55,7 +55,7 @@ namespace ARMeilleure.Translation Logger.EndPass(PassName.SsaConstruction, cfg); - CompilerContext cctx = new CompilerContext(cfg, argTypes, retType, options); + CompilerContext cctx = new(cfg, argTypes, retType, options); return CodeGenerator.Generate(cctx, ptcInfo); } diff --git a/ARMeilleure/Translation/ControlFlowGraph.cs b/ARMeilleure/Translation/ControlFlowGraph.cs index 4c76d5dd..3e7ff0c9 100644 --- a/ARMeilleure/Translation/ControlFlowGraph.cs +++ b/ARMeilleure/Translation/ControlFlowGraph.cs @@ -22,15 +22,12 @@ namespace ARMeilleure.Translation Blocks = blocks; LocalsCount = localsCount; - Update(removeUnreachableBlocks: true); + Update(); } - public void Update(bool removeUnreachableBlocks) + public void Update() { - if (removeUnreachableBlocks) - { - RemoveUnreachableBlocks(Blocks); - } + RemoveUnreachableBlocks(Blocks); var visited = new HashSet<BasicBlock>(); var blockStack = new Stack<BasicBlock>(); @@ -47,7 +44,7 @@ namespace ARMeilleure.Translation { bool visitedNew = false; - for (int i = 0; i < block.SuccessorCount; i++) + for (int i = 0; i < block.SuccessorsCount; i++) { BasicBlock succ = block.GetSuccessor(i); @@ -83,7 +80,7 @@ namespace ARMeilleure.Translation { Debug.Assert(block.Index != -1, "Invalid block index."); - for (int i = 0; i < block.SuccessorCount; i++) + for (int i = 0; i < block.SuccessorsCount; i++) { BasicBlock succ = block.GetSuccessor(i); @@ -105,9 +102,9 @@ namespace ARMeilleure.Translation if (!visited.Contains(block)) { - while (block.SuccessorCount > 0) + while (block.SuccessorsCount > 0) { - block.RemoveSuccessor(index: block.SuccessorCount - 1); + block.RemoveSuccessor(index: block.SuccessorsCount - 1); } blocks.Remove(block); @@ -126,7 +123,7 @@ namespace ARMeilleure.Translation { BasicBlock splitBlock = new BasicBlock(Blocks.Count); - for (int i = 0; i < predecessor.SuccessorCount; i++) + for (int i = 0; i < predecessor.SuccessorsCount; i++) { if (predecessor.GetSuccessor(i) == successor) { diff --git a/ARMeilleure/Translation/EmitterContext.cs b/ARMeilleure/Translation/EmitterContext.cs index fbd9e691..7525a5d4 100644 --- a/ARMeilleure/Translation/EmitterContext.cs +++ b/ARMeilleure/Translation/EmitterContext.cs @@ -1,12 +1,10 @@ using ARMeilleure.Diagnostics; using ARMeilleure.IntermediateRepresentation; using ARMeilleure.State; -using ARMeilleure.Translation.PTC; using System; using System.Collections.Generic; using System.Reflection; - -using static ARMeilleure.IntermediateRepresentation.OperandHelper; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; namespace ARMeilleure.Translation { @@ -77,7 +75,7 @@ namespace ARMeilleure.Translation public void BranchIf(Operand label, Operand op1, Operand op2, Comparison comp, BasicBlockFrequency falseFreq = default) { - Add(Instruction.BranchIf, null, op1, op2, Const((int)comp)); + Add(Instruction.BranchIf, default, op1, op2, Const((int)comp)); BranchToLabel(label, uncond: false, falseFreq); } @@ -157,7 +155,7 @@ namespace ARMeilleure.Translation } else { - return Add(Instruction.Call, null, args); + return Add(Instruction.Call, default, args); } } @@ -169,7 +167,7 @@ namespace ARMeilleure.Translation Array.Copy(callArgs, 0, args, 1, callArgs.Length); - Add(Instruction.Tailcall, null, args); + Add(Instruction.Tailcall, default, args); _needsNewBlock = true; } @@ -356,7 +354,7 @@ namespace ARMeilleure.Translation public void Return(Operand op1) { - Add(Instruction.Return, null, op1); + Add(Instruction.Return, default, op1); _needsNewBlock = true; } @@ -398,17 +396,17 @@ namespace ARMeilleure.Translation public void Store(Operand address, Operand value) { - Add(Instruction.Store, null, address, value); + Add(Instruction.Store, default, address, value); } public void Store16(Operand address, Operand value) { - Add(Instruction.Store16, null, address, value); + Add(Instruction.Store16, default, address, value); } public void Store8(Operand address, Operand value) { - Add(Instruction.Store8, null, address, value); + Add(Instruction.Store8, default, address, value); } public void StoreToContext() @@ -501,11 +499,11 @@ namespace ARMeilleure.Translation } } - private Operand Add(Instruction inst, Operand dest = null) + private Operand Add(Instruction inst, Operand dest = default) { NewNextBlockIfNeeded(); - Operation operation = OperationHelper.Operation(inst, dest); + Operation operation = Operation.Factory.Operation(inst, dest); _irBlock.Operations.AddLast(operation); @@ -516,7 +514,7 @@ namespace ARMeilleure.Translation { NewNextBlockIfNeeded(); - Operation operation = OperationHelper.Operation(inst, dest, sources); + Operation operation = Operation.Factory.Operation(inst, dest, sources); _irBlock.Operations.AddLast(operation); @@ -527,7 +525,7 @@ namespace ARMeilleure.Translation { NewNextBlockIfNeeded(); - Operation operation = OperationHelper.Operation(inst, dest, source0); + Operation operation = Operation.Factory.Operation(inst, dest, source0); _irBlock.Operations.AddLast(operation); @@ -538,7 +536,7 @@ namespace ARMeilleure.Translation { NewNextBlockIfNeeded(); - Operation operation = OperationHelper.Operation(inst, dest, source0, source1); + Operation operation = Operation.Factory.Operation(inst, dest, source0, source1); _irBlock.Operations.AddLast(operation); @@ -549,7 +547,7 @@ namespace ARMeilleure.Translation { NewNextBlockIfNeeded(); - Operation operation = OperationHelper.Operation(inst, dest, source0, source1, source2); + Operation operation = Operation.Factory.Operation(inst, dest, source0, source1, source2); _irBlock.Operations.AddLast(operation); @@ -573,14 +571,14 @@ namespace ARMeilleure.Translation public void AddIntrinsicNoRet(Intrinsic intrin, params Operand[] args) { - Add(intrin, null, args); + Add(intrin, default, args); } private Operand Add(Intrinsic intrin, Operand dest, params Operand[] sources) { NewNextBlockIfNeeded(); - IntrinsicOperation operation = new IntrinsicOperation(intrin, dest, sources); + Operation operation = Operation.Factory.Operation(intrin, dest, sources); _irBlock.Operations.AddLast(operation); @@ -641,7 +639,7 @@ namespace ARMeilleure.Translation private void NextBlock(BasicBlock nextBlock) { - if (_irBlock?.SuccessorCount == 0 && !EndsWithUnconditional(_irBlock)) + if (_irBlock?.SuccessorsCount == 0 && !EndsWithUnconditional(_irBlock)) { _irBlock.AddSuccessor(nextBlock); @@ -662,9 +660,11 @@ namespace ARMeilleure.Translation private static bool EndsWithUnconditional(BasicBlock block) { - return block.Operations.Last is Operation lastOp && - (lastOp.Instruction == Instruction.Return || - lastOp.Instruction == Instruction.Tailcall); + Operation last = block.Operations.Last; + + return last != default && + (last.Instruction == Instruction.Return || + last.Instruction == Instruction.Tailcall); } public ControlFlowGraph GetControlFlowGraph() diff --git a/ARMeilleure/Translation/PTC/DegreeOfParallelism.cs b/ARMeilleure/Translation/PTC/DegreeOfParallelism.cs deleted file mode 100644 index e4752c5e..00000000 --- a/ARMeilleure/Translation/PTC/DegreeOfParallelism.cs +++ /dev/null @@ -1,50 +0,0 @@ -using System; - -namespace ARMeilleure.Translation.PTC -{ - class DegreeOfParallelism - { - public double GiBRef { get; } // GiB. - public double WeightRef { get; } // %. - public double IncrementByGiB { get; } // %. - private double _coefficient; - - public DegreeOfParallelism(double gibRef, double weightRef, double incrementByGiB) - { - GiBRef = gibRef; - WeightRef = weightRef; - IncrementByGiB = incrementByGiB; - - _coefficient = weightRef - (incrementByGiB * gibRef); - } - - public int GetDegreeOfParallelism(int min, int max) - { - double degreeOfParallelism = (GetProcessorCount() * GetWeight(GetAvailableMemoryGiB())) / 100d; - - return Math.Clamp((int)Math.Round(degreeOfParallelism), min, max); - } - - public static double GetProcessorCount() - { - return (double)Environment.ProcessorCount; - } - - public double GetWeight(double gib) - { - return (IncrementByGiB * gib) + _coefficient; - } - - public static double GetAvailableMemoryGiB() - { - GCMemoryInfo gcMemoryInfo = GC.GetGCMemoryInfo(); - - return FromBytesToGiB(gcMemoryInfo.TotalAvailableMemoryBytes - gcMemoryInfo.MemoryLoadBytes); - } - - private static double FromBytesToGiB(long bytes) - { - return Math.ScaleB((double)bytes, -30); - } - } -}
\ No newline at end of file diff --git a/ARMeilleure/Translation/PTC/Ptc.cs b/ARMeilleure/Translation/PTC/Ptc.cs index 9f07ca01..1ed54945 100644 --- a/ARMeilleure/Translation/PTC/Ptc.cs +++ b/ARMeilleure/Translation/PTC/Ptc.cs @@ -9,7 +9,6 @@ using Ryujinx.Common.Configuration; using Ryujinx.Common.Logging; using System; using System.Buffers.Binary; -using System.Collections.Concurrent; using System.Collections.Generic; using System.Diagnostics; using System.IO; @@ -28,7 +27,7 @@ namespace ARMeilleure.Translation.PTC private const string OuterHeaderMagicString = "PTCohd\0\0"; private const string InnerHeaderMagicString = "PTCihd\0\0"; - private const uint InternalVersion = 2228; //! To be incremented manually for each change to the ARMeilleure project. + private const uint InternalVersion = 2515; //! To be incremented manually for each change to the ARMeilleure project. private const string ActualDir = "0"; private const string BackupDir = "1"; @@ -776,9 +775,7 @@ namespace ARMeilleure.Translation.PTC _translateCount = 0; _translateTotalCount = profiledFuncsToTranslate.Count; - int degreeOfParallelism = new DegreeOfParallelism(4d, 75d, 12.5d).GetDegreeOfParallelism(0, 32); - - if (_translateTotalCount == 0 || degreeOfParallelism == 0) + if (_translateTotalCount == 0) { ResetCarriersIfNeeded(); @@ -787,6 +784,14 @@ namespace ARMeilleure.Translation.PTC return; } + int degreeOfParallelism = Environment.ProcessorCount; + + // If there are enough cores lying around, we leave one alone for other tasks. + if (degreeOfParallelism > 4) + { + degreeOfParallelism--; + } + Logger.Info?.Print(LogClass.Ptc, $"{_translateCount} of {_translateTotalCount} functions translated | Thread count: {degreeOfParallelism}"); PtcStateChanged?.Invoke(PtcLoadingState.Start, _translateCount, _translateTotalCount); @@ -825,8 +830,6 @@ namespace ARMeilleure.Translation.PTC break; } } - - Translator.DisposePools(); } List<Thread> threads = new List<Thread>(); @@ -839,6 +842,8 @@ namespace ARMeilleure.Translation.PTC threads.Add(thread); } + Stopwatch sw = Stopwatch.StartNew(); + threads.ForEach((thread) => thread.Start()); threads.ForEach((thread) => thread.Join()); @@ -847,9 +852,11 @@ namespace ARMeilleure.Translation.PTC progressReportEvent.Set(); progressReportThread.Join(); + sw.Stop(); + PtcStateChanged?.Invoke(PtcLoadingState.Loaded, _translateCount, _translateTotalCount); - Logger.Info?.Print(LogClass.Ptc, $"{_translateCount} of {_translateTotalCount} functions translated | Thread count: {degreeOfParallelism}"); + Logger.Info?.Print(LogClass.Ptc, $"{_translateCount} of {_translateTotalCount} functions translated | Thread count: {degreeOfParallelism} in {sw.Elapsed.TotalSeconds} s"); Thread preSaveThread = new Thread(PreSave); preSaveThread.IsBackground = true; diff --git a/ARMeilleure/Translation/RegisterToLocal.cs b/ARMeilleure/Translation/RegisterToLocal.cs index 088cec7e..abb9b373 100644 --- a/ARMeilleure/Translation/RegisterToLocal.cs +++ b/ARMeilleure/Translation/RegisterToLocal.cs @@ -1,7 +1,7 @@ using ARMeilleure.IntermediateRepresentation; using System.Collections.Generic; -using static ARMeilleure.IntermediateRepresentation.OperandHelper; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; namespace ARMeilleure.Translation { @@ -27,11 +27,11 @@ namespace ARMeilleure.Translation for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext) { - for (Node node = block.Operations.First; node != null; node = node.ListNext) + for (Operation node = block.Operations.First; node != default; node = node.ListNext) { Operand dest = node.Destination; - if (dest != null && dest.Kind == OperandKind.Register) + if (dest != default && dest.Kind == OperandKind.Register) { node.Destination = GetLocal(dest); } diff --git a/ARMeilleure/Translation/RegisterUsage.cs b/ARMeilleure/Translation/RegisterUsage.cs index 1a97515f..035d4540 100644 --- a/ARMeilleure/Translation/RegisterUsage.cs +++ b/ARMeilleure/Translation/RegisterUsage.cs @@ -1,9 +1,11 @@ using ARMeilleure.IntermediateRepresentation; using ARMeilleure.State; using System; - -using static ARMeilleure.IntermediateRepresentation.OperandHelper; -using static ARMeilleure.IntermediateRepresentation.OperationHelper; +using System.Numerics; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; +using static ARMeilleure.IntermediateRepresentation.Operation.Factory; namespace ARMeilleure.Translation { @@ -14,27 +16,48 @@ namespace ARMeilleure.Translation private struct RegisterMask : IEquatable<RegisterMask> { - public long IntMask { get; set; } - public long VecMask { get; set; } + public long IntMask => Mask.GetElement(0); + public long VecMask => Mask.GetElement(1); + + public Vector128<long> Mask { get; } + + public RegisterMask(Vector128<long> mask) + { + Mask = mask; + } public RegisterMask(long intMask, long vecMask) { - IntMask = intMask; - VecMask = vecMask; + Mask = Vector128.Create(intMask, vecMask); } public static RegisterMask operator &(RegisterMask x, RegisterMask y) { + if (Sse2.IsSupported) + { + return new RegisterMask(Sse2.And(x.Mask, y.Mask)); + } + return new RegisterMask(x.IntMask & y.IntMask, x.VecMask & y.VecMask); } public static RegisterMask operator |(RegisterMask x, RegisterMask y) { + if (Sse2.IsSupported) + { + return new RegisterMask(Sse2.Or(x.Mask, y.Mask)); + } + return new RegisterMask(x.IntMask | y.IntMask, x.VecMask | y.VecMask); } public static RegisterMask operator ~(RegisterMask x) { + if (Sse2.IsSupported) + { + return new RegisterMask(Sse2.AndNot(x.Mask, Vector128<long>.AllBitsSet)); + } + return new RegisterMask(~x.IntMask, ~x.VecMask); } @@ -55,12 +78,12 @@ namespace ARMeilleure.Translation public bool Equals(RegisterMask other) { - return IntMask == other.IntMask && VecMask == other.VecMask; + return Mask.Equals(other.Mask); } public override int GetHashCode() { - return HashCode.Combine(IntMask, VecMask); + return Mask.GetHashCode(); } } @@ -72,27 +95,23 @@ namespace ARMeilleure.Translation for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext) { - for (Node node = block.Operations.First; node != null; node = node.ListNext) + for (Operation node = block.Operations.First; node != default; node = node.ListNext) { - Operation operation = node as Operation; - - for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++) + for (int index = 0; index < node.SourcesCount; index++) { - Operand source = operation.GetSource(srcIndex); + Operand source = node.GetSource(index); - if (source.Kind != OperandKind.Register) + if (source.Kind == OperandKind.Register) { - continue; - } + Register register = source.GetRegister(); - Register register = source.GetRegister(); - - localInputs[block.Index] |= GetMask(register) & ~localOutputs[block.Index]; + localInputs[block.Index] |= GetMask(register) & ~localOutputs[block.Index]; + } } - if (operation.Destination != null && operation.Destination.Kind == OperandKind.Register) + if (node.Destination != default && node.Destination.Kind == OperandKind.Register) { - localOutputs[block.Index] |= GetMask(operation.Destination.GetRegister()); + localOutputs[block.Index] |= GetMask(node.Destination.GetRegister()); } } } @@ -104,7 +123,6 @@ namespace ARMeilleure.Translation RegisterMask[] globalOutputs = new RegisterMask[cfg.Blocks.Count]; bool modified; - bool firstPass = true; do @@ -121,7 +139,6 @@ namespace ARMeilleure.Translation BasicBlock predecessor = block.Predecessors[0]; RegisterMask cmnOutputs = localOutputs[predecessor.Index] | globalCmnOutputs[predecessor.Index]; - RegisterMask outputs = globalOutputs[predecessor.Index]; for (int pIndex = 1; pIndex < block.Predecessors.Count; pIndex++) @@ -129,7 +146,6 @@ namespace ARMeilleure.Translation predecessor = block.Predecessors[pIndex]; cmnOutputs &= localOutputs[predecessor.Index] | globalCmnOutputs[predecessor.Index]; - outputs |= globalOutputs[predecessor.Index]; } @@ -140,21 +156,13 @@ namespace ARMeilleure.Translation cmnOutputs &= globalCmnOutputs[block.Index]; } - if (Exchange(globalCmnOutputs, block.Index, cmnOutputs)) - { - modified = true; - } - + modified |= Exchange(globalCmnOutputs, block.Index, cmnOutputs); outputs |= localOutputs[block.Index]; - - if (Exchange(globalOutputs, block.Index, globalOutputs[block.Index] | outputs)) - { - modified = true; - } + modified |= Exchange(globalOutputs, block.Index, globalOutputs[block.Index] | outputs); } - else if (Exchange(globalOutputs, block.Index, localOutputs[block.Index])) + else { - modified = true; + modified |= Exchange(globalOutputs, block.Index, localOutputs[block.Index]); } } @@ -165,17 +173,14 @@ namespace ARMeilleure.Translation RegisterMask inputs = localInputs[block.Index]; - for (int i = 0; i < block.SuccessorCount; i++) + for (int i = 0; i < block.SuccessorsCount; i++) { inputs |= globalInputs[block.GetSuccessor(i).Index]; } inputs &= ~globalCmnOutputs[block.Index]; - if (Exchange(globalInputs, block.Index, globalInputs[block.Index] | inputs)) - { - modified = true; - } + modified |= Exchange(globalInputs, block.Index, globalInputs[block.Index] | inputs); } firstPass = false; @@ -192,12 +197,18 @@ namespace ARMeilleure.Translation block.Operations.Remove(block.Operations.First); } + Operand arg = default; + // The only block without any predecessor should be the entry block. // It always needs a context load as it is the first block to run. if (block.Predecessors.Count == 0 || hasContextLoad) { - LoadLocals(block, globalInputs[block.Index].VecMask, RegisterType.Vector, mode); - LoadLocals(block, globalInputs[block.Index].IntMask, RegisterType.Integer, mode); + arg = Local(OperandType.I64); + + Operation loadArg = block.Operations.AddFirst(Operation(Instruction.LoadArgument, arg, Const(0))); + + LoadLocals(block, globalInputs[block.Index].VecMask, RegisterType.Vector, mode, loadArg, arg); + LoadLocals(block, globalInputs[block.Index].IntMask, RegisterType.Integer, mode, loadArg, arg); } bool hasContextStore = HasContextStore(block); @@ -209,8 +220,15 @@ namespace ARMeilleure.Translation if (EndsWithReturn(block) || hasContextStore) { - StoreLocals(block, globalOutputs[block.Index].IntMask, RegisterType.Integer, mode); - StoreLocals(block, globalOutputs[block.Index].VecMask, RegisterType.Vector, mode); + if (arg == default) + { + arg = Local(OperandType.I64); + + block.Append(Operation(Instruction.LoadArgument, arg, Const(0))); + } + + StoreLocals(block, globalOutputs[block.Index].IntMask, RegisterType.Integer, mode, arg); + StoreLocals(block, globalOutputs[block.Index].VecMask, RegisterType.Vector, mode, arg); } } } @@ -222,27 +240,31 @@ namespace ARMeilleure.Translation private static bool HasContextStore(BasicBlock block) { - return EndsWith(block, Instruction.StoreToContext) && block.GetLastOp().SourcesCount == 0; + return EndsWith(block, Instruction.StoreToContext) && block.Operations.Last.SourcesCount == 0; } private static bool StartsWith(BasicBlock block, Instruction inst) { - if (block.Operations.Count == 0) + if (block.Operations.Count > 0) { - return false; + Operation first = block.Operations.First; + + return first != default && first.Instruction == inst; } - return block.Operations.First is Operation operation && operation.Instruction == inst; + return false; } private static bool EndsWith(BasicBlock block, Instruction inst) { - if (block.Operations.Count == 0) + if (block.Operations.Count > 0) { - return false; + Operation last = block.Operations.Last; + + return last != default && last.Instruction == inst; } - return block.Operations.Last is Operation operation && operation.Instruction == inst; + return false; } private static RegisterMask GetMask(Register register) @@ -263,76 +285,57 @@ namespace ARMeilleure.Translation private static bool Exchange(RegisterMask[] masks, int blkIndex, RegisterMask value) { - RegisterMask oldValue = masks[blkIndex]; + ref RegisterMask curValue = ref masks[blkIndex]; + + bool changed = curValue != value; - masks[blkIndex] = value; + curValue = value; - return oldValue != value; + return changed; } - private static void LoadLocals(BasicBlock block, long inputs, RegisterType baseType, ExecutionMode mode) + private static void LoadLocals( + BasicBlock block, + long inputs, + RegisterType baseType, + ExecutionMode mode, + Operation loadArg, + Operand arg) { - Operand arg0 = Local(OperandType.I64); - - for (int bit = 63; bit >= 0; bit--) + while (inputs != 0) { - long mask = 1L << bit; - - if ((inputs & mask) == 0) - { - continue; - } + int bit = 63 - BitOperations.LeadingZeroCount((ulong)inputs); Operand dest = GetRegFromBit(bit, baseType, mode); - - long offset = NativeContext.GetRegisterOffset(dest.GetRegister()); - + Operand offset = Const((long)NativeContext.GetRegisterOffset(dest.GetRegister())); Operand addr = Local(OperandType.I64); - Operation loadOp = Operation(Instruction.Load, dest, addr); - - block.Operations.AddFirst(loadOp); + block.Operations.AddAfter(loadArg, Operation(Instruction.Load, dest, addr)); + block.Operations.AddAfter(loadArg, Operation(Instruction.Add, addr, arg, offset)); - Operation calcOffsOp = Operation(Instruction.Add, addr, arg0, Const(offset)); - - block.Operations.AddFirst(calcOffsOp); + inputs &= ~(1L << bit); } - - Operation loadArg0 = Operation(Instruction.LoadArgument, arg0, Const(0)); - - block.Operations.AddFirst(loadArg0); } - private static void StoreLocals(BasicBlock block, long outputs, RegisterType baseType, ExecutionMode mode) + private static void StoreLocals( + BasicBlock block, + long outputs, + RegisterType baseType, + ExecutionMode mode, + Operand arg) { - Operand arg0 = Local(OperandType.I64); - - Operation loadArg0 = Operation(Instruction.LoadArgument, arg0, Const(0)); - - block.Append(loadArg0); - - for (int bit = 0; bit < 64; bit++) + while (outputs != 0) { - long mask = 1L << bit; - - if ((outputs & mask) == 0) - { - continue; - } + int bit = BitOperations.TrailingZeroCount(outputs); Operand source = GetRegFromBit(bit, baseType, mode); - - long offset = NativeContext.GetRegisterOffset(source.GetRegister()); - + Operand offset = Const((long)NativeContext.GetRegisterOffset(source.GetRegister())); Operand addr = Local(OperandType.I64); - Operation calcOffsOp = Operation(Instruction.Add, addr, arg0, Const(offset)); - - block.Append(calcOffsOp); + block.Append(Operation(Instruction.Add, addr, arg, offset)); + block.Append(Operation(Instruction.Store, default, addr, source)); - Operation storeOp = Operation(Instruction.Store, null, addr, source); - - block.Append(storeOp); + outputs &= ~(1L << bit); } } @@ -340,15 +343,15 @@ namespace ARMeilleure.Translation { if (bit < RegsCount) { - return OperandHelper.Register(bit, baseType, GetOperandType(baseType, mode)); + return Register(bit, baseType, GetOperandType(baseType, mode)); } else if (baseType == RegisterType.Integer) { - return OperandHelper.Register(bit & RegsMask, RegisterType.Flag, OperandType.I32); + return Register(bit & RegsMask, RegisterType.Flag, OperandType.I32); } else if (baseType == RegisterType.Vector) { - return OperandHelper.Register(bit & RegsMask, RegisterType.FpFlag, OperandType.I32); + return Register(bit & RegsMask, RegisterType.FpFlag, OperandType.I32); } else { @@ -371,12 +374,9 @@ namespace ARMeilleure.Translation private static bool EndsWithReturn(BasicBlock block) { - if (!(block.GetLastOp() is Operation operation)) - { - return false; - } + Operation last = block.Operations.Last; - return operation.Instruction == Instruction.Return; + return last != default && last.Instruction == Instruction.Return; } } }
\ No newline at end of file diff --git a/ARMeilleure/Translation/SsaConstruction.cs b/ARMeilleure/Translation/SsaConstruction.cs index 76cb9a44..9ba7b8df 100644 --- a/ARMeilleure/Translation/SsaConstruction.cs +++ b/ARMeilleure/Translation/SsaConstruction.cs @@ -4,7 +4,7 @@ using ARMeilleure.State; using System; using System.Collections.Generic; using System.Diagnostics; -using static ARMeilleure.IntermediateRepresentation.OperandHelper; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; namespace ARMeilleure.Translation { @@ -18,7 +18,7 @@ namespace ARMeilleure.Translation public DefMap() { _map = new Dictionary<int, Operand>(); - _phiMasks = new BitMap(RegisterConsts.TotalCount); + _phiMasks = new BitMap(Allocators.Default, RegisterConsts.TotalCount); } public bool TryAddOperand(int key, Operand operand) @@ -57,26 +57,26 @@ namespace ARMeilleure.Translation // First pass, get all defs and locals uses. for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext) { - for (Node node = block.Operations.First; node != null; node = node.ListNext) + for (Operation node = block.Operations.First; node != default; node = node.ListNext) { - if (node is not Operation operation) + for (int index = 0; index < node.SourcesCount; index++) { - continue; - } - - for (int index = 0; index < operation.SourcesCount; index++) - { - Operand src = operation.GetSource(index); + Operand src = node.GetSource(index); if (TryGetId(src, out int srcKey)) { - Operand local = localDefs[srcKey] ?? src; + Operand local = localDefs[srcKey]; - operation.SetSource(index, local); + if (local == default) + { + local = src; + } + + node.SetSource(index, local); } } - Operand dest = operation.Destination; + Operand dest = node.Destination; if (TryGetId(dest, out int destKey)) { @@ -84,7 +84,7 @@ namespace ARMeilleure.Translation localDefs[destKey] = local; - operation.Destination = local; + node.Destination = local; } } @@ -92,7 +92,7 @@ namespace ARMeilleure.Translation { Operand local = localDefs[key]; - if (local is null) + if (local == default) { continue; } @@ -119,28 +119,23 @@ namespace ARMeilleure.Translation // Second pass, rename variables with definitions on different blocks. for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext) { - for (Node node = block.Operations.First; node != null; node = node.ListNext) + for (Operation node = block.Operations.First; node != default; node = node.ListNext) { - if (node is not Operation operation) - { - continue; - } - - for (int index = 0; index < operation.SourcesCount; index++) + for (int index = 0; index < node.SourcesCount; index++) { - Operand src = operation.GetSource(index); + Operand src = node.GetSource(index); if (TryGetId(src, out int key)) { Operand local = localDefs[key]; - if (local is null) + if (local == default) { local = FindDef(globalDefs, block, src); localDefs[key] = local; } - operation.SetSource(index, local); + node.SetSource(index, local); } } } @@ -200,12 +195,14 @@ namespace ARMeilleure.Translation // then use the definition from that Phi. Operand local = Local(operand.Type); - PhiNode phi = new PhiNode(local, block.Predecessors.Count); + Operation operation = Operation.Factory.PhiOperation(local, block.Predecessors.Count); - AddPhi(block, phi); + AddPhi(block, operation); globalDefs[block.Index].TryAddOperand(GetId(operand), local); + PhiOperation phi = operation.AsPhi(); + for (int index = 0; index < block.Predecessors.Count; index++) { BasicBlock predecessor = block.Predecessors[index]; @@ -217,19 +214,19 @@ namespace ARMeilleure.Translation return local; } - private static void AddPhi(BasicBlock block, PhiNode phi) + private static void AddPhi(BasicBlock block, Operation phi) { - Node node = block.Operations.First; + Operation node = block.Operations.First; - if (node != null) + if (node != default) { - while (node.ListNext is PhiNode) + while (node.ListNext != default && node.ListNext.Instruction == Instruction.Phi) { node = node.ListNext; } } - if (node is PhiNode) + if (node != default && node.Instruction == Instruction.Phi) { block.Operations.AddAfter(node, phi); } @@ -241,34 +238,37 @@ namespace ARMeilleure.Translation private static bool TryGetId(Operand operand, out int result) { - if (operand is { Kind: OperandKind.Register }) + if (operand != default) { - Register reg = operand.GetRegister(); - - if (reg.Type == RegisterType.Integer) - { - result = reg.Index; - } - else if (reg.Type == RegisterType.Vector) - { - result = RegisterConsts.IntRegsCount + reg.Index; - } - else if (reg.Type == RegisterType.Flag) + if (operand.Kind == OperandKind.Register) { - result = RegisterConsts.IntAndVecRegsCount + reg.Index; + Register reg = operand.GetRegister(); + + if (reg.Type == RegisterType.Integer) + { + result = reg.Index; + } + else if (reg.Type == RegisterType.Vector) + { + result = RegisterConsts.IntRegsCount + reg.Index; + } + else if (reg.Type == RegisterType.Flag) + { + result = RegisterConsts.IntAndVecRegsCount + reg.Index; + } + else /* if (reg.Type == RegisterType.FpFlag) */ + { + result = RegisterConsts.FpFlagsOffset + reg.Index; + } + + return true; } - else /* if (reg.Type == RegisterType.FpFlag) */ + else if (operand.Kind == OperandKind.LocalVariable && operand.GetLocalNumber() > 0) { - result = RegisterConsts.FpFlagsOffset + reg.Index; - } + result = RegisterConsts.TotalCount + operand.GetLocalNumber() - 1; - return true; - } - else if (operand is { Kind: OperandKind.LocalVariable } && operand.GetLocalNumber() > 0) - { - result = RegisterConsts.TotalCount + operand.GetLocalNumber() - 1; - - return true; + return true; + } } result = -1; diff --git a/ARMeilleure/Translation/SsaDeconstruction.cs b/ARMeilleure/Translation/SsaDeconstruction.cs index 2e9e3281..cd6bcca1 100644 --- a/ARMeilleure/Translation/SsaDeconstruction.cs +++ b/ARMeilleure/Translation/SsaDeconstruction.cs @@ -1,7 +1,7 @@ using ARMeilleure.IntermediateRepresentation; -using static ARMeilleure.IntermediateRepresentation.OperandHelper; -using static ARMeilleure.IntermediateRepresentation.OperationHelper; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; +using static ARMeilleure.IntermediateRepresentation.Operation.Factory; namespace ARMeilleure.Translation { @@ -11,34 +11,36 @@ namespace ARMeilleure.Translation { for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext) { - Node node = block.Operations.First; + Operation operation = block.Operations.First; - while (node is PhiNode phi) + while (operation != default && operation.Instruction == Instruction.Phi) { - Node nextNode = node.ListNext; + Operation nextNode = operation.ListNext; - Operand local = Local(phi.Destination.Type); + Operand local = Local(operation.Destination.Type); + + PhiOperation phi = operation.AsPhi(); for (int index = 0; index < phi.SourcesCount; index++) { - BasicBlock predecessor = phi.GetBlock(index); + BasicBlock predecessor = phi.GetBlock(cfg, index); Operand source = phi.GetSource(index); predecessor.Append(Operation(Instruction.Copy, local, source)); - phi.SetSource(index, null); + phi.SetSource(index, default); } - Operation copyOp = Operation(Instruction.Copy, phi.Destination, local); + Operation copyOp = Operation(Instruction.Copy, operation.Destination, local); - block.Operations.AddBefore(node, copyOp); + block.Operations.AddBefore(operation, copyOp); - phi.Destination = null; + operation.Destination = default; - block.Operations.Remove(node); + block.Operations.Remove(operation); - node = nextNode; + operation = nextNode; } } } diff --git a/ARMeilleure/Translation/Translator.cs b/ARMeilleure/Translation/Translator.cs index 2110a4e3..03ed4c5e 100644 --- a/ARMeilleure/Translation/Translator.cs +++ b/ARMeilleure/Translation/Translator.cs @@ -13,12 +13,8 @@ using System; using System.Collections.Concurrent; using System.Collections.Generic; using System.Diagnostics; -using System.Runtime; using System.Threading; - -using static ARMeilleure.Common.BitMapPool; -using static ARMeilleure.IntermediateRepresentation.OperandHelper; -using static ARMeilleure.IntermediateRepresentation.OperationHelper; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; namespace ARMeilleure.Translation { @@ -193,13 +189,9 @@ namespace ARMeilleure.Translation ClearJitCache(); - DisposePools(); - Stubs.Dispose(); FunctionTable.Dispose(); CountTable.Dispose(); - - GCSettings.LargeObjectHeapCompactionMode = GCLargeObjectHeapCompactionMode.CompactOnce; } } @@ -266,8 +258,6 @@ namespace ARMeilleure.Translation Logger.EndPass(PassName.Decoding); - PreparePool(highCq ? 1 : 0); - Logger.StartPass(PassName.Translation); EmitSynchronization(context); @@ -281,7 +271,7 @@ namespace ARMeilleure.Translation ulong funcSize = funcRange.End - funcRange.Start; - Logger.EndPass(PassName.Translation); + Logger.EndPass(PassName.Translation, cfg); Logger.StartPass(PassName.RegisterUsage); @@ -298,8 +288,6 @@ namespace ARMeilleure.Translation if (!context.HasPtc) { func = Compiler.Compile<GuestFunction>(cfg, argTypes, OperandType.I64, options); - - ResetPool(highCq ? 1 : 0); } else { @@ -307,33 +295,16 @@ namespace ARMeilleure.Translation func = Compiler.Compile<GuestFunction>(cfg, argTypes, OperandType.I64, options, ptcInfo); - ResetPool(highCq ? 1 : 0); - Hash128 hash = Ptc.ComputeHash(Memory, address, funcSize); Ptc.WriteInfoCodeRelocUnwindInfo(address, funcSize, hash, highCq, ptcInfo); } - return new TranslatedFunction(func, counter, funcSize, highCq); - } + var result = new TranslatedFunction(func, counter, funcSize, highCq); - internal static void PreparePool(int groupId = 0) - { - PrepareOperandPool(groupId); - PrepareOperationPool(groupId); - } - - internal static void ResetPool(int groupId = 0) - { - ResetOperationPool(groupId); - ResetOperandPool(groupId); - } + Allocators.ResetAll(); - internal static void DisposePools() - { - DisposeOperandPools(); - DisposeOperationPools(); - DisposeBitMapPools(); + return result; } private struct Range @@ -408,7 +379,7 @@ namespace ARMeilleure.Translation EmitSynchronization(context); } - Operand lblPredicateSkip = null; + Operand lblPredicateSkip = default; if (opCode is OpCode32 op && op.Cond < Condition.Al) { @@ -426,7 +397,7 @@ namespace ARMeilleure.Translation throw new InvalidOperationException($"Invalid instruction \"{opCode.Instruction.Name}\"."); } - if (lblPredicateSkip != null) + if (lblPredicateSkip != default) { context.MarkLabel(lblPredicateSkip); } diff --git a/ARMeilleure/Translation/TranslatorStubs.cs b/ARMeilleure/Translation/TranslatorStubs.cs index aff2ac7e..48fa3a94 100644 --- a/ARMeilleure/Translation/TranslatorStubs.cs +++ b/ARMeilleure/Translation/TranslatorStubs.cs @@ -5,7 +5,7 @@ using ARMeilleure.Translation.Cache; using System; using System.Reflection; using System.Runtime.InteropServices; -using static ARMeilleure.IntermediateRepresentation.OperandHelper; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; namespace ARMeilleure.Translation { @@ -145,7 +145,7 @@ namespace ARMeilleure.Translation Operand masked = context.BitwiseAnd(guestAddress, Const(~_translator.FunctionTable.Mask)); context.BranchIfTrue(lblFallback, masked); - Operand index = null; + Operand index = default; Operand page = Const((long)_translator.FunctionTable.Base); for (int i = 0; i < _translator.FunctionTable.Levels.Length; i++) |
