From 8226997bc7334ef2c29a1dadee72591f6d6037b1 Mon Sep 17 00:00:00 2001 From: riperiperi Date: Wed, 18 Mar 2020 11:44:32 +0000 Subject: CodeGen Optimisations (LSRA and Translator) (#978) * Start of JIT garbage collection improvements - thread static pool for Operand, MemoryOperand, Operation - Operands and Operations are always to be constructed via their static helper classes, so they can be pooled. - removing LinkedList from Node for sources/destinations (replaced with List<>s for now, but probably could do arrays since size is bounded) - removing params constructors from Node - LinkedList<> to List<> with Clear() for Operand assignments/uses - ThreadStaticPool is very simple and basically just exists for the purpose of our specific translation allocation problem. Right now it will stay at the worst case allocation count for that thread (so far) - the pool can never shrink. - Still some cases of Operand[] that haven't been removed yet. Will need to evaluate them (eg. is there a reasonable max number of params for Calls?) * ConcurrentStack instead of ConcurrentQueue for Rejit * Optimize some parts of LSRA - BitMap now operates on 64-bit int rather than 32-bit - BitMap is now pooled in a ThreadStatic pool (within lrsa) - BitMap now is now its own iterator. Marginally speeds up iterating through the bits. - A few cases where enumerators were generated have been converted to forms that generate less garbage. - New data structure for sorting _usePositions in LiveIntervals. Much faster split, NextUseAfter, initial insertion. Random insertion is slightly slower. - That last one is WIP since you need to insert the values backwards. It would be ideal if it just flipped it for you, uncomplicating things on the caller side. * Use a static pool of thread static pools. (yes.) Prevents each execution thread creating its own lowCq pool and making me cry. * Move constant value to top, change naming convention. * Fix iteration of memory operands. * Increase max thread count. * Address Feedback --- .../CodeGen/RegisterAllocators/CopyResolver.cs | 21 +++-- .../CodeGen/RegisterAllocators/HybridAllocator.cs | 11 +-- .../RegisterAllocators/LinearScanAllocator.cs | 98 +++++++++++----------- .../CodeGen/RegisterAllocators/LiveInterval.cs | 36 ++++---- 4 files changed, 89 insertions(+), 77 deletions(-) (limited to 'ARMeilleure/CodeGen/RegisterAllocators') diff --git a/ARMeilleure/CodeGen/RegisterAllocators/CopyResolver.cs b/ARMeilleure/CodeGen/RegisterAllocators/CopyResolver.cs index 65901e80..417f3bae 100644 --- a/ARMeilleure/CodeGen/RegisterAllocators/CopyResolver.cs +++ b/ARMeilleure/CodeGen/RegisterAllocators/CopyResolver.cs @@ -2,6 +2,9 @@ using ARMeilleure.IntermediateRepresentation; using System; using System.Collections.Generic; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; +using static ARMeilleure.IntermediateRepresentation.OperationHelper; + namespace ARMeilleure.CodeGen.RegisterAllocators { class CopyResolver @@ -133,14 +136,14 @@ namespace ARMeilleure.CodeGen.RegisterAllocators private static void EmitCopy(List sequence, Operand x, Operand y) { - sequence.Add(new Operation(Instruction.Copy, x, y)); + sequence.Add(Operation(Instruction.Copy, x, y)); } private static void EmitXorSwap(List sequence, Operand x, Operand y) { - sequence.Add(new Operation(Instruction.BitwiseExclusiveOr, x, x, y)); - sequence.Add(new Operation(Instruction.BitwiseExclusiveOr, y, y, x)); - sequence.Add(new Operation(Instruction.BitwiseExclusiveOr, x, x, y)); + sequence.Add(Operation(Instruction.BitwiseExclusiveOr, x, x, y)); + sequence.Add(Operation(Instruction.BitwiseExclusiveOr, y, y, x)); + sequence.Add(Operation(Instruction.BitwiseExclusiveOr, x, x, y)); } } @@ -194,20 +197,20 @@ namespace ARMeilleure.CodeGen.RegisterAllocators { Operand register = GetRegister(right.Register, type); - Operand offset = new Operand(left.SpillOffset); + Operand offset = Const(left.SpillOffset); - _fillQueue.Enqueue(new Operation(Instruction.Fill, register, offset)); + _fillQueue.Enqueue(Operation(Instruction.Fill, register, offset)); HasCopy = true; } private void AddSplitSpill(LiveInterval left, LiveInterval right, OperandType type) { - Operand offset = new Operand(right.SpillOffset); + Operand offset = Const(right.SpillOffset); Operand register = GetRegister(left.Register, type); - _spillQueue.Enqueue(new Operation(Instruction.Spill, null, offset, register)); + _spillQueue.Enqueue(Operation(Instruction.Spill, null, offset, register)); HasCopy = true; } @@ -240,7 +243,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators private static Operand GetRegister(Register reg, OperandType type) { - return new Operand(reg.Index, reg.Type, type); + return Register(reg.Index, reg.Type, type); } } } \ No newline at end of file diff --git a/ARMeilleure/CodeGen/RegisterAllocators/HybridAllocator.cs b/ARMeilleure/CodeGen/RegisterAllocators/HybridAllocator.cs index ce7936f9..21470a66 100644 --- a/ARMeilleure/CodeGen/RegisterAllocators/HybridAllocator.cs +++ b/ARMeilleure/CodeGen/RegisterAllocators/HybridAllocator.cs @@ -1,10 +1,11 @@ -using ARMeilleure.Common; using ARMeilleure.IntermediateRepresentation; using ARMeilleure.Translation; using System.Collections.Generic; using System.Diagnostics; +using System.Numerics; using static ARMeilleure.IntermediateRepresentation.OperandHelper; +using static ARMeilleure.IntermediateRepresentation.OperationHelper; namespace ARMeilleure.CodeGen.RegisterAllocators { @@ -265,7 +266,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators node.SetSource(srcIndex, temp); } - Operation fillOp = new Operation(Instruction.Fill, temp, Const(info.SpillOffset)); + Operation fillOp = Operation(Instruction.Fill, temp, Const(info.SpillOffset)); block.Operations.AddBefore(node, fillOp); } @@ -317,7 +318,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators if (info.IsBlockLocal && mask != 0) { - int selectedReg = BitUtils.LowestBitSet(mask); + int selectedReg = BitOperations.TrailingZeroCount(mask); info.Register = selectedReg; @@ -363,7 +364,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators node.SetDestination(dstIndex, temp); - Operation spillOp = new Operation(Instruction.Spill, null, Const(info.SpillOffset), temp); + Operation spillOp = Operation(Instruction.Spill, null, Const(info.SpillOffset), temp); block.Operations.AddAfter(node, spillOp); @@ -415,7 +416,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators private static Operand GetSpillTemp(Operand local, int freeMask, ref int useMask) { - int selectedReg = BitUtils.LowestBitSet(freeMask & ~useMask); + int selectedReg = BitOperations.TrailingZeroCount(freeMask & ~useMask); useMask |= 1 << selectedReg; diff --git a/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs b/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs index 1dc6ad73..01bb9554 100644 --- a/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs +++ b/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs @@ -5,6 +5,7 @@ using System; using System.Collections.Generic; using System.Diagnostics; using System.Linq; +using System.Numerics; namespace ARMeilleure.CodeGen.RegisterAllocators { @@ -32,7 +33,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators private int _operationsCount; - private class AllocationContext + private class AllocationContext : IDisposable { public RegisterMasks Masks { get; } @@ -49,8 +50,8 @@ namespace ARMeilleure.CodeGen.RegisterAllocators StackAlloc = stackAlloc; Masks = masks; - Active = new BitMap(intervalsCount); - Inactive = new BitMap(intervalsCount); + Active = BitMapPool.Allocate(intervalsCount); + Inactive = BitMapPool.Allocate(intervalsCount); } public void MoveActiveToInactive(int bit) @@ -69,6 +70,11 @@ namespace ARMeilleure.CodeGen.RegisterAllocators dest.Set(bit); } + + public void Dispose() + { + BitMapPool.Release(); + } } public AllocationResult RunPass( @@ -121,10 +127,14 @@ namespace ARMeilleure.CodeGen.RegisterAllocators InsertSplitCopies(); InsertSplitCopiesAtEdges(cfg); - return new AllocationResult( + AllocationResult result = new AllocationResult( context.IntUsedRegisters, context.VecUsedRegisters, context.StackAlloc.TotalSize); + + context.Dispose(); + + return result; } private void AllocateInterval(AllocationContext context, LiveInterval current, int cIndex) @@ -618,15 +628,22 @@ namespace ARMeilleure.CodeGen.RegisterAllocators bool hasSingleOrNoSuccessor = block.Next == null || block.Branch == null; - foreach (BasicBlock successor in Successors(block)) + for (int i = 0; i < 2; i++) { + // This used to use an enumerable, but it ended up generating a lot of garbage, so now it is a loop. + BasicBlock successor = (i == 0) ? block.Next : block.Branch; + if (successor == null) + { + continue; + } + int succIndex = successor.Index; // If the current node is a split node, then the actual successor node // (the successor before the split) should be right after it. if (IsSplitEdgeBlock(successor)) { - succIndex = Successors(successor).First().Index; + succIndex = FirstSuccessor(successor).Index; } CopyResolver copyResolver = new CopyResolver(); @@ -699,8 +716,10 @@ namespace ARMeilleure.CodeGen.RegisterAllocators { Operand register = GetRegister(current); - foreach (int usePosition in current.UsePositions()) + IList usePositions = current.UsePositions(); + for (int i = usePositions.Count - 1; i >= 0; i--) { + int usePosition = -usePositions[i]; (_, Node operation) = GetOperationNode(usePosition); for (int index = 0; index < operation.SourcesCount; index++) @@ -743,7 +762,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators { Debug.Assert(!interval.IsSpilled, "Spilled intervals are not allowed."); - return new Operand( + return OperandHelper.Register( interval.Register.Index, interval.Register.Type, interval.Local.Type); @@ -778,8 +797,9 @@ namespace ARMeilleure.CodeGen.RegisterAllocators { _operationNodes.Add((block.Operations, node)); - foreach (Operand dest in Destinations(node)) + for (int i = 0; i < node.DestinationsCount; i++) { + Operand dest = node.GetDestination(i); if (dest.Kind == OperandKind.LocalVariable && visited.Add(dest)) { dest.NumberLocal(_intervals.Count); @@ -815,12 +835,12 @@ namespace ARMeilleure.CodeGen.RegisterAllocators // Compute local live sets. for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext) { - BitMap liveGen = new BitMap(mapSize); - BitMap liveKill = new BitMap(mapSize); + BitMap liveGen = BitMapPool.Allocate(mapSize); + BitMap liveKill = BitMapPool.Allocate(mapSize); for (Node node = block.Operations.First; node != null; node = node.ListNext) { - foreach (Operand source in Sources(node)) + Sources(node, (source) => { int id = GetOperandId(source); @@ -828,10 +848,11 @@ namespace ARMeilleure.CodeGen.RegisterAllocators { liveGen.Set(id); } - } + }); - foreach (Operand dest in Destinations(node)) + for (int i = 0; i < node.DestinationsCount; i++) { + Operand dest = node.GetDestination(i); liveKill.Set(GetOperandId(dest)); } } @@ -846,8 +867,8 @@ namespace ARMeilleure.CodeGen.RegisterAllocators for (int index = 0; index < cfg.Blocks.Count; index++) { - blkLiveIn [index] = new BitMap(mapSize); - blkLiveOut[index] = new BitMap(mapSize); + blkLiveIn [index] = BitMapPool.Allocate(mapSize); + blkLiveOut[index] = BitMapPool.Allocate(mapSize); } bool modified; @@ -862,12 +883,10 @@ namespace ARMeilleure.CodeGen.RegisterAllocators BitMap liveOut = blkLiveOut[block.Index]; - foreach (BasicBlock successor in Successors(block)) + if ((block.Next != null && liveOut.Set(blkLiveIn[block.Next.Index])) || + (block.Branch != null && liveOut.Set(blkLiveIn[block.Branch.Index]))) { - if (liveOut.Set(blkLiveIn[successor.Index])) - { - modified = true; - } + modified = true; } BitMap liveIn = blkLiveIn[block.Index]; @@ -920,21 +939,22 @@ namespace ARMeilleure.CodeGen.RegisterAllocators { operationPos -= InstructionGap; - foreach (Operand dest in Destinations(node)) + for (int i = 0; i < node.DestinationsCount; i++) { + Operand dest = node.GetDestination(i); LiveInterval interval = _intervals[GetOperandId(dest)]; interval.SetStart(operationPos + 1); interval.AddUsePosition(operationPos + 1); } - foreach (Operand source in Sources(node)) + Sources(node, (source) => { LiveInterval interval = _intervals[GetOperandId(source)]; interval.AddRange(blockStart, operationPos + 1); interval.AddUsePosition(operationPos); - } + }); if (node is Operation operation && operation.Instruction == Instruction.Call) { @@ -949,7 +969,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators { while (mask != 0) { - int regIndex = BitUtils.LowestBitSet(mask); + int regIndex = BitOperations.TrailingZeroCount(mask); Register callerSavedReg = new Register(regIndex, regType); @@ -982,17 +1002,9 @@ namespace ARMeilleure.CodeGen.RegisterAllocators return (register.Index << 1) | (register.Type == RegisterType.Vector ? 1 : 0); } - private static IEnumerable Successors(BasicBlock block) + private static BasicBlock FirstSuccessor(BasicBlock block) { - if (block.Next != null) - { - yield return block.Next; - } - - if (block.Branch != null) - { - yield return block.Branch; - } + return block.Next ?? block.Branch; } private static IEnumerable BottomOperations(BasicBlock block) @@ -1007,15 +1019,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators } } - private static IEnumerable Destinations(Node node) - { - for (int index = 0; index < node.DestinationsCount; index++) - { - yield return node.GetDestination(index); - } - } - - private static IEnumerable Sources(Node node) + private static void Sources(Node node, Action action) { for (int index = 0; index < node.SourcesCount; index++) { @@ -1023,7 +1027,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators if (IsLocalOrRegister(source.Kind)) { - yield return source; + action(source); } else if (source.Kind == OperandKind.Memory) { @@ -1031,12 +1035,12 @@ namespace ARMeilleure.CodeGen.RegisterAllocators if (memOp.BaseAddress != null) { - yield return memOp.BaseAddress; + action(memOp.BaseAddress); } if (memOp.Index != null) { - yield return memOp.Index; + action(memOp.Index); } } } diff --git a/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs b/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs index 18858a76..6e786061 100644 --- a/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs +++ b/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs @@ -1,3 +1,4 @@ +using ARMeilleure.Common; using ARMeilleure.IntermediateRepresentation; using System; using System.Collections.Generic; @@ -12,7 +13,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators private LiveInterval _parent; - private SortedSet _usePositions; + private SortedIntegerList _usePositions; public int UsesCount => _usePositions.Count; @@ -38,7 +39,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators Local = local; _parent = parent ?? this; - _usePositions = new SortedSet(); + _usePositions = new SortedIntegerList(); _ranges = new List(); @@ -196,7 +197,9 @@ namespace ARMeilleure.CodeGen.RegisterAllocators public void AddUsePosition(int position) { - _usePositions.Add(position); + // Inserts are in descending order, but ascending is faster for SortedIntegerList<>. + // We flip the ordering, then iterate backwards when using the final list. + _usePositions.Add(-position); } public bool Overlaps(int position) @@ -247,9 +250,9 @@ namespace ARMeilleure.CodeGen.RegisterAllocators return _childs.Values; } - public IEnumerable UsePositions() + public IList UsePositions() { - return _usePositions; + return _usePositions.GetList(); } public int FirstUse() @@ -259,20 +262,19 @@ namespace ARMeilleure.CodeGen.RegisterAllocators return NotFound; } - return _usePositions.First(); + return -_usePositions.Last(); } public int NextUseAfter(int position) { - foreach (int usePosition in _usePositions) - { - if (usePosition >= position) - { - return usePosition; - } - } + int index = _usePositions.FindLessEqualIndex(-position); + return (index >= 0) ? -_usePositions[index] : NotFound; + } - return NotFound; + public void RemoveAfter(int position) + { + int index = _usePositions.FindLessEqualIndex(-position); + _usePositions.RemoveRange(0, index + 1); } public LiveInterval Split(int position) @@ -311,12 +313,14 @@ namespace ARMeilleure.CodeGen.RegisterAllocators _ranges.RemoveRange(splitIndex, count); } - foreach (int usePosition in _usePositions.Where(x => x >= position)) + int addAfter = _usePositions.FindLessEqualIndex(-position); + for (int index = addAfter; index >= 0; index--) { + int usePosition = _usePositions[index]; right._usePositions.Add(usePosition); } - _usePositions.RemoveWhere(x => x >= position); + RemoveAfter(position); Debug.Assert(_ranges.Count != 0, "Left interval is empty after split."); -- cgit v1.2.3