aboutsummaryrefslogtreecommitdiff
path: root/ARMeilleure/Translation
diff options
context:
space:
mode:
authorriperiperi <rhy3756547@hotmail.com>2020-03-18 11:44:32 +0000
committerGitHub <noreply@github.com>2020-03-18 22:44:32 +1100
commit8226997bc7334ef2c29a1dadee72591f6d6037b1 (patch)
treef95b9aa233bbbef2f5288fb29c4c89cf8738ef74 /ARMeilleure/Translation
parent7475e180b4344fa2cf60243d8257304871fad24a (diff)
CodeGen Optimisations (LSRA and Translator) (#978)
* Start of JIT garbage collection improvements - thread static pool for Operand, MemoryOperand, Operation - Operands and Operations are always to be constructed via their static helper classes, so they can be pooled. - removing LinkedList from Node for sources/destinations (replaced with List<>s for now, but probably could do arrays since size is bounded) - removing params constructors from Node - LinkedList<> to List<> with Clear() for Operand assignments/uses - ThreadStaticPool is very simple and basically just exists for the purpose of our specific translation allocation problem. Right now it will stay at the worst case allocation count for that thread (so far) - the pool can never shrink. - Still some cases of Operand[] that haven't been removed yet. Will need to evaluate them (eg. is there a reasonable max number of params for Calls?) * ConcurrentStack instead of ConcurrentQueue for Rejit * Optimize some parts of LSRA - BitMap now operates on 64-bit int rather than 32-bit - BitMap is now pooled in a ThreadStatic pool (within lrsa) - BitMap now is now its own iterator. Marginally speeds up iterating through the bits. - A few cases where enumerators were generated have been converted to forms that generate less garbage. - New data structure for sorting _usePositions in LiveIntervals. Much faster split, NextUseAfter, initial insertion. Random insertion is slightly slower. - That last one is WIP since you need to insert the values backwards. It would be ideal if it just flipped it for you, uncomplicating things on the caller side. * Use a static pool of thread static pools. (yes.) Prevents each execution thread creating its own lowCq pool and making me cry. * Move constant value to top, change naming convention. * Fix iteration of memory operands. * Increase max thread count. * Address Feedback
Diffstat (limited to 'ARMeilleure/Translation')
-rw-r--r--ARMeilleure/Translation/ControlFlowGraph.cs2
-rw-r--r--ARMeilleure/Translation/EmitterContext.cs53
-rw-r--r--ARMeilleure/Translation/PriorityQueue.cs10
-rw-r--r--ARMeilleure/Translation/RegisterUsage.cs19
-rw-r--r--ARMeilleure/Translation/SsaDeconstruction.cs5
-rw-r--r--ARMeilleure/Translation/Translator.cs8
6 files changed, 77 insertions, 20 deletions
diff --git a/ARMeilleure/Translation/ControlFlowGraph.cs b/ARMeilleure/Translation/ControlFlowGraph.cs
index 37613eb4..16b406ab 100644
--- a/ARMeilleure/Translation/ControlFlowGraph.cs
+++ b/ARMeilleure/Translation/ControlFlowGraph.cs
@@ -141,7 +141,7 @@ namespace ARMeilleure.Translation
splitBlock2.Branch = successor;
- splitBlock2.Operations.AddLast(new Operation(Instruction.Branch, null));
+ splitBlock2.Operations.AddLast(OperationHelper.Operation(Instruction.Branch, null));
Blocks.AddBefore(successor, splitBlock2);
}
diff --git a/ARMeilleure/Translation/EmitterContext.cs b/ARMeilleure/Translation/EmitterContext.cs
index a11d25a6..d9e0a4ed 100644
--- a/ARMeilleure/Translation/EmitterContext.cs
+++ b/ARMeilleure/Translation/EmitterContext.cs
@@ -459,14 +459,63 @@ namespace ARMeilleure.Translation
return Add(Instruction.ZeroExtend8, Local(type), op1);
}
- private Operand Add(Instruction inst, Operand dest = null, params Operand[] sources)
+ private void NewNextBlockIfNeeded()
{
if (_needsNewBlock)
{
NewNextBlock();
}
+ }
+
+ private Operand Add(Instruction inst, Operand dest = null)
+ {
+ NewNextBlockIfNeeded();
+
+ Operation operation = OperationHelper.Operation(inst, dest);
+
+ _irBlock.Operations.AddLast(operation);
+
+ return dest;
+ }
+
+ private Operand Add(Instruction inst, Operand dest, Operand[] sources)
+ {
+ NewNextBlockIfNeeded();
+
+ Operation operation = OperationHelper.Operation(inst, dest, sources);
+
+ _irBlock.Operations.AddLast(operation);
+
+ return dest;
+ }
+
+ private Operand Add(Instruction inst, Operand dest, Operand source0)
+ {
+ NewNextBlockIfNeeded();
+
+ Operation operation = OperationHelper.Operation(inst, dest, source0);
+
+ _irBlock.Operations.AddLast(operation);
+
+ return dest;
+ }
+
+ private Operand Add(Instruction inst, Operand dest, Operand source0, Operand source1)
+ {
+ NewNextBlockIfNeeded();
+
+ Operation operation = OperationHelper.Operation(inst, dest, source0, source1);
+
+ _irBlock.Operations.AddLast(operation);
+
+ return dest;
+ }
+
+ private Operand Add(Instruction inst, Operand dest, Operand source0, Operand source1, Operand source2)
+ {
+ NewNextBlockIfNeeded();
- Operation operation = new Operation(inst, dest, sources);
+ Operation operation = OperationHelper.Operation(inst, dest, source0, source1, source2);
_irBlock.Operations.AddLast(operation);
diff --git a/ARMeilleure/Translation/PriorityQueue.cs b/ARMeilleure/Translation/PriorityQueue.cs
index ab593dc0..000a5009 100644
--- a/ARMeilleure/Translation/PriorityQueue.cs
+++ b/ARMeilleure/Translation/PriorityQueue.cs
@@ -4,28 +4,28 @@ namespace ARMeilleure.Translation
{
class PriorityQueue<T>
{
- private ConcurrentQueue<T>[] _queues;
+ private ConcurrentStack<T>[] _queues;
public PriorityQueue(int priorities)
{
- _queues = new ConcurrentQueue<T>[priorities];
+ _queues = new ConcurrentStack<T>[priorities];
for (int index = 0; index < priorities; index++)
{
- _queues[index] = new ConcurrentQueue<T>();
+ _queues[index] = new ConcurrentStack<T>();
}
}
public void Enqueue(int priority, T value)
{
- _queues[priority].Enqueue(value);
+ _queues[priority].Push(value);
}
public bool TryDequeue(out T value)
{
for (int index = 0; index < _queues.Length; index++)
{
- if (_queues[index].TryDequeue(out value))
+ if (_queues[index].TryPop(out value))
{
return true;
}
diff --git a/ARMeilleure/Translation/RegisterUsage.cs b/ARMeilleure/Translation/RegisterUsage.cs
index 84dfce7b..d5124285 100644
--- a/ARMeilleure/Translation/RegisterUsage.cs
+++ b/ARMeilleure/Translation/RegisterUsage.cs
@@ -3,6 +3,7 @@ using ARMeilleure.State;
using System;
using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+using static ARMeilleure.IntermediateRepresentation.OperationHelper;
namespace ARMeilleure.Translation
{
@@ -299,16 +300,16 @@ namespace ARMeilleure.Translation
Operand addr = Local(OperandType.I64);
- Operation loadOp = new Operation(Instruction.Load, dest, addr);
+ Operation loadOp = Operation(Instruction.Load, dest, addr);
block.Operations.AddFirst(loadOp);
- Operation calcOffsOp = new Operation(Instruction.Add, addr, arg0, Const(offset));
+ Operation calcOffsOp = Operation(Instruction.Add, addr, arg0, Const(offset));
block.Operations.AddFirst(calcOffsOp);
}
- Operation loadArg0 = new Operation(Instruction.LoadArgument, arg0, Const(0));
+ Operation loadArg0 = Operation(Instruction.LoadArgument, arg0, Const(0));
block.Operations.AddFirst(loadArg0);
}
@@ -329,7 +330,7 @@ namespace ARMeilleure.Translation
Operand arg0 = Local(OperandType.I64);
- Operation loadArg0 = new Operation(Instruction.LoadArgument, arg0, Const(0));
+ Operation loadArg0 = Operation(Instruction.LoadArgument, arg0, Const(0));
block.Append(loadArg0);
@@ -348,11 +349,11 @@ namespace ARMeilleure.Translation
Operand addr = Local(OperandType.I64);
- Operation calcOffsOp = new Operation(Instruction.Add, addr, arg0, Const(offset));
+ Operation calcOffsOp = Operation(Instruction.Add, addr, arg0, Const(offset));
block.Append(calcOffsOp);
- Operation storeOp = new Operation(Instruction.Store, null, addr, source);
+ Operation storeOp = Operation(Instruction.Store, null, addr, source);
block.Append(storeOp);
}
@@ -362,15 +363,15 @@ namespace ARMeilleure.Translation
{
if (bit < RegsCount)
{
- return new Operand(bit, baseType, GetOperandType(baseType, mode));
+ return OperandHelper.Register(bit, baseType, GetOperandType(baseType, mode));
}
else if (baseType == RegisterType.Integer)
{
- return new Operand(bit & RegsMask, RegisterType.Flag, OperandType.I32);
+ return OperandHelper.Register(bit & RegsMask, RegisterType.Flag, OperandType.I32);
}
else if (baseType == RegisterType.Vector)
{
- return new Operand(bit & RegsMask, RegisterType.FpFlag, OperandType.I32);
+ return OperandHelper.Register(bit & RegsMask, RegisterType.FpFlag, OperandType.I32);
}
else
{
diff --git a/ARMeilleure/Translation/SsaDeconstruction.cs b/ARMeilleure/Translation/SsaDeconstruction.cs
index 37d61625..c3bcaf8c 100644
--- a/ARMeilleure/Translation/SsaDeconstruction.cs
+++ b/ARMeilleure/Translation/SsaDeconstruction.cs
@@ -2,6 +2,7 @@ using ARMeilleure.IntermediateRepresentation;
using System.Collections.Generic;
using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+using static ARMeilleure.IntermediateRepresentation.OperationHelper;
namespace ARMeilleure.Translation
{
@@ -25,12 +26,12 @@ namespace ARMeilleure.Translation
Operand source = phi.GetSource(index);
- predecessor.Append(new Operation(Instruction.Copy, local, source));
+ predecessor.Append(Operation(Instruction.Copy, local, source));
phi.SetSource(index, null);
}
- Operation copyOp = new Operation(Instruction.Copy, phi.Destination, local);
+ Operation copyOp = Operation(Instruction.Copy, phi.Destination, local);
block.Operations.AddBefore(node, copyOp);
diff --git a/ARMeilleure/Translation/Translator.cs b/ARMeilleure/Translation/Translator.cs
index 9d534d58..053c7328 100644
--- a/ARMeilleure/Translation/Translator.cs
+++ b/ARMeilleure/Translation/Translator.cs
@@ -73,7 +73,7 @@ namespace ARMeilleure.Translation
// If we only have one rejit thread, it should be normal priority as highCq code is performance critical.
// TODO: Use physical cores rather than logical. This only really makes sense for processors with hyperthreading. Requires OS specific code.
int unboundedThreadCount = Math.Max(1, (Environment.ProcessorCount - 6) / 3);
- int threadCount = Math.Min(3, unboundedThreadCount);
+ int threadCount = Math.Min(4, unboundedThreadCount);
for (int i = 0; i < threadCount; i++)
{
bool last = i != 0 && i == unboundedThreadCount - 1;
@@ -146,6 +146,9 @@ namespace ARMeilleure.Translation
{
ArmEmitterContext context = new ArmEmitterContext(_memory, _jumpTable, (long)address, highCq, Aarch32Mode.User);
+ OperandHelper.PrepareOperandPool(highCq);
+ OperationHelper.PrepareOperationPool(highCq);
+
Logger.StartPass(PassName.Decoding);
Block[] blocks = AlwaysTranslateFunctions
@@ -181,6 +184,9 @@ namespace ARMeilleure.Translation
GuestFunction func = Compiler.Compile<GuestFunction>(cfg, argTypes, OperandType.I64, options);
+ OperandHelper.ResetOperandPool(highCq);
+ OperationHelper.ResetOperationPool(highCq);
+
return new TranslatedFunction(func, rejit: !highCq);
}