aboutsummaryrefslogtreecommitdiff
path: root/ARMeilleure/Common
diff options
context:
space:
mode:
authorriperiperi <rhy3756547@hotmail.com>2020-03-18 11:44:32 +0000
committerGitHub <noreply@github.com>2020-03-18 22:44:32 +1100
commit8226997bc7334ef2c29a1dadee72591f6d6037b1 (patch)
treef95b9aa233bbbef2f5288fb29c4c89cf8738ef74 /ARMeilleure/Common
parent7475e180b4344fa2cf60243d8257304871fad24a (diff)
CodeGen Optimisations (LSRA and Translator) (#978)
* Start of JIT garbage collection improvements - thread static pool for Operand, MemoryOperand, Operation - Operands and Operations are always to be constructed via their static helper classes, so they can be pooled. - removing LinkedList from Node for sources/destinations (replaced with List<>s for now, but probably could do arrays since size is bounded) - removing params constructors from Node - LinkedList<> to List<> with Clear() for Operand assignments/uses - ThreadStaticPool is very simple and basically just exists for the purpose of our specific translation allocation problem. Right now it will stay at the worst case allocation count for that thread (so far) - the pool can never shrink. - Still some cases of Operand[] that haven't been removed yet. Will need to evaluate them (eg. is there a reasonable max number of params for Calls?) * ConcurrentStack instead of ConcurrentQueue for Rejit * Optimize some parts of LSRA - BitMap now operates on 64-bit int rather than 32-bit - BitMap is now pooled in a ThreadStatic pool (within lrsa) - BitMap now is now its own iterator. Marginally speeds up iterating through the bits. - A few cases where enumerators were generated have been converted to forms that generate less garbage. - New data structure for sorting _usePositions in LiveIntervals. Much faster split, NextUseAfter, initial insertion. Random insertion is slightly slower. - That last one is WIP since you need to insert the values backwards. It would be ideal if it just flipped it for you, uncomplicating things on the caller side. * Use a static pool of thread static pools. (yes.) Prevents each execution thread creating its own lowCq pool and making me cry. * Move constant value to top, change naming convention. * Fix iteration of memory operands. * Increase max thread count. * Address Feedback
Diffstat (limited to 'ARMeilleure/Common')
-rw-r--r--ARMeilleure/Common/BitMap.cs87
-rw-r--r--ARMeilleure/Common/BitMapPool.cs19
-rw-r--r--ARMeilleure/Common/BitUtils.cs42
-rw-r--r--ARMeilleure/Common/SortedIntegerList.cs73
-rw-r--r--ARMeilleure/Common/ThreadStaticPool.cs104
5 files changed, 266 insertions, 59 deletions
diff --git a/ARMeilleure/Common/BitMap.cs b/ARMeilleure/Common/BitMap.cs
index 9dff271b..35100536 100644
--- a/ARMeilleure/Common/BitMap.cs
+++ b/ARMeilleure/Common/BitMap.cs
@@ -1,20 +1,50 @@
using System.Collections;
using System.Collections.Generic;
+using System.Numerics;
namespace ARMeilleure.Common
{
- class BitMap : IEnumerable<int>
+ class BitMap : IEnumerator<int>
{
- private const int IntSize = 32;
+ private const int IntSize = 64;
private const int IntMask = IntSize - 1;
- private List<int> _masks;
+ private List<long> _masks;
+
+ private int _enumIndex;
+ private long _enumMask;
+ private int _enumBit;
+
+ public int Current => _enumIndex * IntSize + _enumBit;
+ object IEnumerator.Current => Current;
+
+ public BitMap()
+ {
+ _masks = new List<long>(0);
+ }
public BitMap(int initialCapacity)
{
int count = (initialCapacity + IntMask) / IntSize;
- _masks = new List<int>(count);
+ _masks = new List<long>(count);
+
+ while (count-- > 0)
+ {
+ _masks.Add(0);
+ }
+ }
+
+ public void Reset(int initialCapacity)
+ {
+ int count = (initialCapacity + IntMask) / IntSize;
+
+ if (count > _masks.Capacity)
+ {
+ _masks.Capacity = count;
+ }
+
+ _masks.Clear();
while (count-- > 0)
{
@@ -29,7 +59,7 @@ namespace ARMeilleure.Common
int wordIndex = bit / IntSize;
int wordBit = bit & IntMask;
- int wordMask = 1 << wordBit;
+ long wordMask = 1L << wordBit;
if ((_masks[wordIndex] & wordMask) != 0)
{
@@ -48,7 +78,7 @@ namespace ARMeilleure.Common
int wordIndex = bit / IntSize;
int wordBit = bit & IntMask;
- int wordMask = 1 << wordBit;
+ long wordMask = 1L << wordBit;
_masks[wordIndex] &= ~wordMask;
}
@@ -60,7 +90,7 @@ namespace ARMeilleure.Common
int wordIndex = bit / IntSize;
int wordBit = bit & IntMask;
- return (_masks[wordIndex] & (1 << wordBit)) != 0;
+ return (_masks[wordIndex] & (1L << wordBit)) != 0;
}
public bool Set(BitMap map)
@@ -71,7 +101,7 @@ namespace ARMeilleure.Common
for (int index = 0; index < _masks.Count; index++)
{
- int newValue = _masks[index] | map._masks[index];
+ long newValue = _masks[index] | map._masks[index];
if (_masks[index] != newValue)
{
@@ -92,7 +122,7 @@ namespace ARMeilleure.Common
for (int index = 0; index < _masks.Count; index++)
{
- int newValue = _masks[index] & ~map._masks[index];
+ long newValue = _masks[index] & ~map._masks[index];
if (_masks[index] != newValue)
{
@@ -105,6 +135,10 @@ namespace ARMeilleure.Common
return modified;
}
+ #region IEnumerable<long> Methods
+
+ // Note: The bit enumerator is embedded in this class to avoid creating garbage when enumerating.
+
private void EnsureCapacity(int size)
{
while (_masks.Count * IntSize < size)
@@ -115,24 +149,37 @@ namespace ARMeilleure.Common
public IEnumerator<int> GetEnumerator()
{
- for (int index = 0; index < _masks.Count; index++)
- {
- int mask = _masks[index];
+ Reset();
+ return this;
+ }
- while (mask != 0)
+ public bool MoveNext()
+ {
+ if (_enumMask != 0)
+ {
+ _enumMask &= ~(1L << _enumBit);
+ }
+ while (_enumMask == 0)
+ {
+ if (++_enumIndex >= _masks.Count)
{
- int bit = BitUtils.LowestBitSet(mask);
-
- mask &= ~(1 << bit);
-
- yield return index * IntSize + bit;
+ return false;
}
+ _enumMask = _masks[_enumIndex];
}
+ _enumBit = BitOperations.TrailingZeroCount(_enumMask);
+ return true;
}
- IEnumerator IEnumerable.GetEnumerator()
+ public void Reset()
{
- return GetEnumerator();
+ _enumIndex = -1;
+ _enumMask = 0;
+ _enumBit = 0;
}
+
+ public void Dispose() { }
+
+#endregion
}
} \ No newline at end of file
diff --git a/ARMeilleure/Common/BitMapPool.cs b/ARMeilleure/Common/BitMapPool.cs
new file mode 100644
index 00000000..caba2317
--- /dev/null
+++ b/ARMeilleure/Common/BitMapPool.cs
@@ -0,0 +1,19 @@
+using System;
+
+namespace ARMeilleure.Common
+{
+ static class BitMapPool
+ {
+ public static BitMap Allocate(int initialCapacity)
+ {
+ BitMap result = ThreadStaticPool<BitMap>.Instance.Allocate();
+ result.Reset(initialCapacity);
+ return result;
+ }
+
+ public static void Release()
+ {
+ ThreadStaticPool<BitMap>.Instance.Clear();
+ }
+ }
+}
diff --git a/ARMeilleure/Common/BitUtils.cs b/ARMeilleure/Common/BitUtils.cs
index 7a29dcff..00fc6e5b 100644
--- a/ARMeilleure/Common/BitUtils.cs
+++ b/ARMeilleure/Common/BitUtils.cs
@@ -1,24 +1,13 @@
+using System.Numerics;
+
namespace ARMeilleure.Common
{
static class BitUtils
{
- private const int DeBrujinSequence = 0x77cb531;
-
- private static readonly int[] DeBrujinLbsLut;
-
private static readonly sbyte[] HbsNibbleLut;
static BitUtils()
{
- DeBrujinLbsLut = new int[32];
-
- for (int index = 0; index < DeBrujinLbsLut.Length; index++)
- {
- uint lutIndex = (uint)(DeBrujinSequence * (1 << index)) >> 27;
-
- DeBrujinLbsLut[lutIndex] = index;
- }
-
HbsNibbleLut = new sbyte[] { -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3 };
}
@@ -43,20 +32,7 @@ namespace ARMeilleure.Common
public static int HighestBitSet(int value)
{
- if (value == 0)
- {
- return -1;
- }
-
- for (int bit = 31; bit >= 0; bit--)
- {
- if (((value >> bit) & 1) != 0)
- {
- return bit;
- }
- }
-
- return -1;
+ return 31 - BitOperations.LeadingZeroCount((uint)value);
}
public static int HighestBitSetNibble(int value)
@@ -64,18 +40,6 @@ namespace ARMeilleure.Common
return HbsNibbleLut[value];
}
- public static int LowestBitSet(int value)
- {
- if (value == 0)
- {
- return -1;
- }
-
- int lsb = value & -value;
-
- return DeBrujinLbsLut[(uint)(DeBrujinSequence * lsb) >> 27];
- }
-
public static long Replicate(long bits, int size)
{
long output = 0;
diff --git a/ARMeilleure/Common/SortedIntegerList.cs b/ARMeilleure/Common/SortedIntegerList.cs
new file mode 100644
index 00000000..cceab62b
--- /dev/null
+++ b/ARMeilleure/Common/SortedIntegerList.cs
@@ -0,0 +1,73 @@
+using System;
+using System.Collections.Generic;
+
+namespace ARMeilleure.Common
+{
+ public class SortedIntegerList
+ {
+ private List<int> _items;
+
+ public int Count => _items.Count;
+
+ public int this[int index]
+ {
+ get
+ {
+ return _items[index];
+ }
+ set
+ {
+ _items[index] = value;
+ }
+ }
+
+ public SortedIntegerList()
+ {
+ _items = new List<int>();
+ }
+
+ public bool Add(int value)
+ {
+ if (_items.Count == 0 || value > Last())
+ {
+ _items.Add(value);
+ return true;
+ }
+ else
+ {
+ int index = _items.BinarySearch(value);
+ if (index >= 0)
+ {
+ return false;
+ }
+
+ _items.Insert(-1 - index, value);
+ return true;
+ }
+ }
+
+ public int FindLessEqualIndex(int value)
+ {
+ int index = _items.BinarySearch(value);
+ return (index < 0) ? (-2 - index) : index;
+ }
+
+ public void RemoveRange(int index, int count)
+ {
+ if (count > 0)
+ {
+ _items.RemoveRange(index, count);
+ }
+ }
+
+ public int Last()
+ {
+ return _items[Count - 1];
+ }
+
+ public List<int> GetList()
+ {
+ return _items;
+ }
+ }
+}
diff --git a/ARMeilleure/Common/ThreadStaticPool.cs b/ARMeilleure/Common/ThreadStaticPool.cs
new file mode 100644
index 00000000..cf3a7bb4
--- /dev/null
+++ b/ARMeilleure/Common/ThreadStaticPool.cs
@@ -0,0 +1,104 @@
+using System;
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+using System.Threading;
+
+namespace ARMeilleure.Common
+{
+ internal class ThreadStaticPool<T> where T : class, new()
+ {
+ private const int PoolSizeIncrement = 200;
+
+ [ThreadStatic]
+ private static ThreadStaticPool<T> _instance;
+ public static ThreadStaticPool<T> Instance
+ {
+ get
+ {
+ if (_instance == null)
+ {
+ PreparePool(0); // So that we can still use a pool when blindly initializing one.
+ }
+ return _instance;
+ }
+ }
+
+ private static ConcurrentDictionary<int, Stack<ThreadStaticPool<T>>> _pools = new ConcurrentDictionary<int, Stack<ThreadStaticPool<T>>>();
+
+ private static Stack<ThreadStaticPool<T>> GetPools(int groupId)
+ {
+ return _pools.GetOrAdd(groupId, x => new Stack<ThreadStaticPool<T>>());
+ }
+
+ public static void PreparePool(int groupId)
+ {
+ // Prepare the pool for this thread, ideally using an existing one from the specified group.
+ if (_instance == null)
+ {
+ Stack<ThreadStaticPool<T>> pools = GetPools(groupId);
+ lock (pools)
+ {
+ _instance = (pools.Count != 0) ? pools.Pop() : new ThreadStaticPool<T>(PoolSizeIncrement * 2);
+ }
+ }
+ }
+
+ public static void ReturnPool(int groupId)
+ {
+ // Reset and return the pool for this thread to the specified group.
+ Stack<ThreadStaticPool<T>> pools = GetPools(groupId);
+ lock (pools)
+ {
+ _instance.Clear();
+ pools.Push(_instance);
+ _instance = null;
+ }
+ }
+
+ private T[] _pool;
+ private int _poolUsed = -1;
+ private int _poolSize;
+
+ public ThreadStaticPool(int initialSize)
+ {
+ _pool = new T[initialSize];
+
+ for (int i = 0; i < initialSize; i++)
+ {
+ _pool[i] = new T();
+ }
+
+ _poolSize = initialSize;
+ }
+
+ public T Allocate()
+ {
+ int index = Interlocked.Increment(ref _poolUsed);
+ if (index >= _poolSize)
+ {
+ IncreaseSize();
+ }
+ return _pool[index];
+ }
+
+ private void IncreaseSize()
+ {
+ _poolSize += PoolSizeIncrement;
+
+ T[] newArray = new T[_poolSize];
+ Array.Copy(_pool, 0, newArray, 0, _pool.Length);
+
+ for (int i = _pool.Length; i < _poolSize; i++)
+ {
+ newArray[i] = new T();
+ }
+
+ Interlocked.Exchange(ref _pool, newArray);
+ }
+
+ public void Clear()
+ {
+ _poolUsed = -1;
+ }
+ }
+}