diff options
Diffstat (limited to 'ChocolArm64/Translation/ALocalAlloc.cs')
| -rw-r--r-- | ChocolArm64/Translation/ALocalAlloc.cs | 231 |
1 files changed, 231 insertions, 0 deletions
diff --git a/ChocolArm64/Translation/ALocalAlloc.cs b/ChocolArm64/Translation/ALocalAlloc.cs new file mode 100644 index 00000000..0661ddc8 --- /dev/null +++ b/ChocolArm64/Translation/ALocalAlloc.cs @@ -0,0 +1,231 @@ +using System.Collections.Generic; + +namespace ChocolArm64.Translation +{ + class ALocalAlloc + { + private class PathIo + { + private Dictionary<AILBlock, long> AllInputs; + private Dictionary<AILBlock, long> CmnOutputs; + + private long AllOutputs; + + public PathIo() + { + AllInputs = new Dictionary<AILBlock, long>(); + CmnOutputs = new Dictionary<AILBlock, long>(); + } + + public PathIo(AILBlock Root, long Inputs, long Outputs) : this() + { + Set(Root, Inputs, Outputs); + } + + public void Set(AILBlock Root, long Inputs, long Outputs) + { + if (!AllInputs.TryAdd(Root, Inputs)) + { + AllInputs[Root] |= Inputs; + } + + if (!CmnOutputs.TryAdd(Root, Outputs)) + { + CmnOutputs[Root] &= Outputs; + } + + AllOutputs |= Outputs; + } + + public long GetInputs(AILBlock Root) + { + if (AllInputs.TryGetValue(Root, out long Inputs)) + { + return Inputs | (AllOutputs & ~CmnOutputs[Root]); + } + + return 0; + } + + public long GetOutputs() + { + return AllOutputs; + } + } + + private Dictionary<AILBlock, PathIo> IntPaths; + private Dictionary<AILBlock, PathIo> VecPaths; + + private struct BlockIo + { + public AILBlock Block; + public AILBlock Entry; + + public long IntInputs; + public long VecInputs; + public long IntOutputs; + public long VecOutputs; + } + + private const int MaxOptGraphLength = 120; + + public ALocalAlloc(AILBlock[] Graph, AILBlock Root) + { + IntPaths = new Dictionary<AILBlock, PathIo>(); + VecPaths = new Dictionary<AILBlock, PathIo>(); + + if (Graph.Length < MaxOptGraphLength) + { + InitializeOptimal(Graph, Root); + } + else + { + InitializeFast(Graph); + } + } + + private void InitializeOptimal(AILBlock[] Graph, AILBlock Root) + { + //This will go through all possible paths on the graph, + //and store all inputs/outputs for each block. A register + //that was previously written to already is not considered an input. + //When a block can be reached by more than one path, then the + //output from all paths needs to be set for this block, and + //only outputs present in all of the parent blocks can be considered + //when doing input elimination. Each block chain have a root, that's where + //the code starts executing. They are present on the subroutine start point, + //and on call return points too (address written to X30 by BL). + HashSet<BlockIo> Visited = new HashSet<BlockIo>(); + + Queue<BlockIo> Unvisited = new Queue<BlockIo>(); + + void Enqueue(BlockIo Block) + { + if (!Visited.Contains(Block)) + { + Unvisited.Enqueue(Block); + + Visited.Add(Block); + } + } + + Enqueue(new BlockIo() + { + Block = Root, + Entry = Root + }); + + while (Unvisited.Count > 0) + { + BlockIo Current = Unvisited.Dequeue(); + + Current.IntInputs |= Current.Block.IntInputs & ~Current.IntOutputs; + Current.VecInputs |= Current.Block.VecInputs & ~Current.VecOutputs; + Current.IntOutputs |= Current.Block.IntOutputs; + Current.VecOutputs |= Current.Block.VecOutputs; + + //Check if this is a exit block + //(a block that returns or calls another sub). + if ((Current.Block.Next == null && + Current.Block.Branch == null) || Current.Block.HasStateStore) + { + if (!IntPaths.TryGetValue(Current.Block, out PathIo IntPath)) + { + IntPaths.Add(Current.Block, IntPath = new PathIo()); + } + + if (!VecPaths.TryGetValue(Current.Block, out PathIo VecPath)) + { + VecPaths.Add(Current.Block, VecPath = new PathIo()); + } + + IntPath.Set(Current.Entry, Current.IntInputs, Current.IntOutputs); + VecPath.Set(Current.Entry, Current.VecInputs, Current.VecOutputs); + } + + void EnqueueFromCurrent(AILBlock Block, bool RetTarget) + { + BlockIo BlkIO = new BlockIo() { Block = Block }; + + if (RetTarget) + { + BlkIO.Entry = Block; + BlkIO.IntInputs = 0; + BlkIO.VecInputs = 0; + BlkIO.IntOutputs = 0; + BlkIO.VecOutputs = 0; + } + else + { + BlkIO.Entry = Current.Entry; + BlkIO.IntInputs = Current.IntInputs; + BlkIO.VecInputs = Current.VecInputs; + BlkIO.IntOutputs = Current.IntOutputs; + BlkIO.VecOutputs = Current.VecOutputs; + } + + Enqueue(BlkIO); + } + + if (Current.Block.Next != null) + { + EnqueueFromCurrent(Current.Block.Next, Current.Block.HasStateStore); + } + + if (Current.Block.Branch != null) + { + EnqueueFromCurrent(Current.Block.Branch, false); + } + } + } + + private void InitializeFast(AILBlock[] Graph) + { + //This is WAY faster than InitializeOptimal, but results in + //uneeded loads and stores, so the resulting code will be slower. + long IntInputs = 0; + long IntOutputs = 0; + long VecInputs = 0; + long VecOutputs = 0; + + foreach (AILBlock Block in Graph) + { + IntInputs |= Block.IntInputs; + IntOutputs |= Block.IntOutputs; + VecInputs |= Block.VecInputs; + VecOutputs |= Block.VecOutputs; + } + + //It's possible that not all code paths writes to those output registers, + //in those cases if we attempt to write an output registers that was + //not written, we will be just writing zero and messing up the old register value. + //So we just need to ensure that all outputs are loaded. + IntInputs |= IntOutputs; + VecInputs |= VecOutputs; + + foreach (AILBlock Block in Graph) + { + IntPaths.Add(Block, new PathIo(Block, IntInputs, IntOutputs)); + VecPaths.Add(Block, new PathIo(Block, VecInputs, VecOutputs)); + } + } + + public long GetIntInputs(AILBlock Root) => GetInputsImpl(Root, IntPaths.Values); + public long GetVecInputs(AILBlock Root) => GetInputsImpl(Root, VecPaths.Values); + + private long GetInputsImpl(AILBlock Root, IEnumerable<PathIo> Values) + { + long Inputs = 0; + + foreach (PathIo Path in Values) + { + Inputs |= Path.GetInputs(Root); + } + + return Inputs; + } + + public long GetIntOutputs(AILBlock Block) => IntPaths[Block].GetOutputs(); + public long GetVecOutputs(AILBlock Block) => VecPaths[Block].GetOutputs(); + } +}
\ No newline at end of file |
