diff options
| author | TSR Berry <20988865+TSRBerry@users.noreply.github.com> | 2023-04-08 01:22:00 +0200 |
|---|---|---|
| committer | Mary <thog@protonmail.com> | 2023-04-27 23:51:14 +0200 |
| commit | cee712105850ac3385cd0091a923438167433f9f (patch) | |
| tree | 4a5274b21d8b7f938c0d0ce18736d3f2993b11b1 /Ryujinx.Graphics.Texture/Encoders | |
| parent | cd124bda587ef09668a971fa1cac1c3f0cfc9f21 (diff) | |
Move solution and projects to src
Diffstat (limited to 'Ryujinx.Graphics.Texture/Encoders')
| -rw-r--r-- | Ryujinx.Graphics.Texture/Encoders/BC7Encoder.cs | 1005 | ||||
| -rw-r--r-- | Ryujinx.Graphics.Texture/Encoders/EncodeMode.cs | 10 |
2 files changed, 0 insertions, 1015 deletions
diff --git a/Ryujinx.Graphics.Texture/Encoders/BC7Encoder.cs b/Ryujinx.Graphics.Texture/Encoders/BC7Encoder.cs deleted file mode 100644 index 35d36bce..00000000 --- a/Ryujinx.Graphics.Texture/Encoders/BC7Encoder.cs +++ /dev/null @@ -1,1005 +0,0 @@ -using Ryujinx.Graphics.Texture.Utils; -using System; -using System.Diagnostics; -using System.Numerics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; -using System.Threading.Tasks; - -namespace Ryujinx.Graphics.Texture.Encoders -{ - static class BC7Encoder - { - private const int MinColorVarianceForModeChange = 160; - - public static void Encode(Memory<byte> outputStorage, ReadOnlyMemory<byte> data, int width, int height, EncodeMode mode) - { - int widthInBlocks = (width + 3) / 4; - int heightInBlocks = (height + 3) / 4; - - bool fastMode = (mode & EncodeMode.ModeMask) == EncodeMode.Fast; - - if (mode.HasFlag(EncodeMode.Multithreaded)) - { - Parallel.For(0, heightInBlocks, (yInBlocks) => - { - Span<ulong> output = MemoryMarshal.Cast<byte, ulong>(outputStorage.Span); - int y = yInBlocks * 4; - - for (int xInBlocks = 0; xInBlocks < widthInBlocks; xInBlocks++) - { - int x = xInBlocks * 4; - Block block = CompressBlock(data.Span, x, y, width, height, fastMode); - - int offset = (yInBlocks * widthInBlocks + xInBlocks) * 2; - output[offset] = block.Low; - output[offset + 1] = block.High; - } - }); - } - else - { - Span<ulong> output = MemoryMarshal.Cast<byte, ulong>(outputStorage.Span); - int offset = 0; - - for (int y = 0; y < height; y += 4) - { - for (int x = 0; x < width; x += 4) - { - Block block = CompressBlock(data.Span, x, y, width, height, fastMode); - - output[offset++] = block.Low; - output[offset++] = block.High; - } - } - } - } - - private static readonly int[] _mostFrequentPartitions = new int[] - { - 0, 13, 2, 1, 15, 14, 10, 23 - }; - - private static Block CompressBlock(ReadOnlySpan<byte> data, int x, int y, int width, int height, bool fastMode) - { - int w = Math.Min(4, width - x); - int h = Math.Min(4, height - y); - - var dataUint = MemoryMarshal.Cast<byte, uint>(data); - - int baseOffset = y * width + x; - - Span<uint> tile = stackalloc uint[w * h]; - - for (int ty = 0; ty < h; ty++) - { - int rowOffset = baseOffset + ty * width; - - for (int tx = 0; tx < w; tx++) - { - tile[ty * w + tx] = dataUint[rowOffset + tx]; - } - } - - return fastMode ? EncodeFast(tile, w, h) : EncodeExhaustive(tile, w, h); - } - - private static Block EncodeFast(ReadOnlySpan<uint> tile, int w, int h) - { - (RgbaColor8 minColor, RgbaColor8 maxColor) = BC67Utils.GetMinMaxColors(tile, w, h); - - bool alphaNotOne = minColor.A != 255 || maxColor.A != 255; - int variance = BC67Utils.SquaredDifference(minColor.GetColor32(), maxColor.GetColor32()); - int selectedMode; - int indexMode = 0; - - if (alphaNotOne) - { - bool constantAlpha = minColor.A == maxColor.A; - if (constantAlpha) - { - selectedMode = variance > MinColorVarianceForModeChange ? 7 : 6; - } - else - { - if (variance > MinColorVarianceForModeChange) - { - Span<uint> uniqueRGB = stackalloc uint[16]; - Span<uint> uniqueAlpha = stackalloc uint[16]; - - int uniqueRGBCount = 0; - int uniqueAlphaCount = 0; - - uint rgbMask = new RgbaColor8(255, 255, 255, 0).ToUInt32(); - uint alphaMask = new RgbaColor8(0, 0, 0, 255).ToUInt32(); - - for (int i = 0; i < tile.Length; i++) - { - uint c = tile[i]; - - if (!uniqueRGB.Slice(0, uniqueRGBCount).Contains(c & rgbMask)) - { - uniqueRGB[uniqueRGBCount++] = c & rgbMask; - } - - if (!uniqueAlpha.Slice(0, uniqueAlphaCount).Contains(c & alphaMask)) - { - uniqueAlpha[uniqueAlphaCount++] = c & alphaMask; - } - } - - selectedMode = 4; - indexMode = uniqueRGBCount > uniqueAlphaCount ? 1 : 0; - } - else - { - selectedMode = 5; - } - } - } - else - { - if (variance > MinColorVarianceForModeChange) - { - selectedMode = 1; - } - else - { - selectedMode = 6; - } - } - - int selectedPartition = 0; - - if (selectedMode == 1 || selectedMode == 7) - { - int partitionSelectionLowestError = int.MaxValue; - - for (int i = 0; i < _mostFrequentPartitions.Length; i++) - { - int p = _mostFrequentPartitions[i]; - int error = GetEndPointSelectionErrorFast(tile, 2, p, w, h, partitionSelectionLowestError); - if (error < partitionSelectionLowestError) - { - partitionSelectionLowestError = error; - selectedPartition = p; - } - } - } - - return Encode(selectedMode, selectedPartition, 0, indexMode, fastMode: true, tile, w, h, out _); - } - - private static Block EncodeExhaustive(ReadOnlySpan<uint> tile, int w, int h) - { - Block bestBlock = default; - int lowestError = int.MaxValue; - int lowestErrorSubsets = int.MaxValue; - - for (int m = 0; m < 8; m++) - { - for (int r = 0; r < (m == 4 || m == 5 ? 4 : 1); r++) - { - for (int im = 0; im < (m == 4 ? 2 : 1); im++) - { - for (int p = 0; p < 1 << BC67Tables.BC7ModeInfos[m].PartitionBitCount; p++) - { - Block block = Encode(m, p, r, im, fastMode: false, tile, w, h, out int maxError); - if (maxError < lowestError || (maxError == lowestError && BC67Tables.BC7ModeInfos[m].SubsetCount < lowestErrorSubsets)) - { - lowestError = maxError; - lowestErrorSubsets = BC67Tables.BC7ModeInfos[m].SubsetCount; - bestBlock = block; - } - } - } - } - } - - return bestBlock; - } - - private static Block Encode( - int mode, - int partition, - int rotation, - int indexMode, - bool fastMode, - ReadOnlySpan<uint> tile, - int w, - int h, - out int errorSum) - { - BC7ModeInfo modeInfo = BC67Tables.BC7ModeInfos[mode]; - int subsetCount = modeInfo.SubsetCount; - int partitionBitCount = modeInfo.PartitionBitCount; - int rotationBitCount = modeInfo.RotationBitCount; - int indexModeBitCount = modeInfo.IndexModeBitCount; - int colorDepth = modeInfo.ColorDepth; - int alphaDepth = modeInfo.AlphaDepth; - int pBits = modeInfo.PBits; - int colorIndexBitCount = modeInfo.ColorIndexBitCount; - int alphaIndexBitCount = modeInfo.AlphaIndexBitCount; - bool separateAlphaIndices = alphaIndexBitCount != 0; - - uint alphaMask; - - if (separateAlphaIndices) - { - alphaMask = rotation switch - { - 1 => new RgbaColor8(255, 0, 0, 0).ToUInt32(), - 2 => new RgbaColor8(0, 255, 0, 0).ToUInt32(), - 3 => new RgbaColor8(0, 0, 255, 0).ToUInt32(), - _ => new RgbaColor8(0, 0, 0, 255).ToUInt32() - }; - } - else - { - alphaMask = new RgbaColor8(0, 0, 0, 0).ToUInt32(); - } - - if (indexMode != 0) - { - alphaMask = ~alphaMask; - } - - // - // Select color palette. - // - - Span<uint> endPoints0 = stackalloc uint[subsetCount]; - Span<uint> endPoints1 = stackalloc uint[subsetCount]; - - SelectEndPoints( - tile, - w, - h, - endPoints0, - endPoints1, - subsetCount, - partition, - colorIndexBitCount, - colorDepth, - alphaDepth, - ~alphaMask, - fastMode); - - if (separateAlphaIndices) - { - SelectEndPoints( - tile, - w, - h, - endPoints0, - endPoints1, - subsetCount, - partition, - alphaIndexBitCount, - colorDepth, - alphaDepth, - alphaMask, - fastMode); - } - - Span<int> pBitValues = stackalloc int[pBits]; - - for (int i = 0; i < pBits; i++) - { - int pBit; - - if (pBits == subsetCount) - { - pBit = GetPBit(endPoints0[i], endPoints1[i], colorDepth, alphaDepth); - } - else - { - int subset = i >> 1; - uint color = (i & 1) == 0 ? endPoints0[subset] : endPoints1[subset]; - pBit = GetPBit(color, colorDepth, alphaDepth); - } - - pBitValues[i] = pBit; - } - - int colorIndexCount = 1 << colorIndexBitCount; - int alphaIndexCount = 1 << alphaIndexBitCount; - - Span<byte> colorIndices = stackalloc byte[16]; - Span<byte> alphaIndices = stackalloc byte[16]; - - errorSum = BC67Utils.SelectIndices( - tile, - w, - h, - endPoints0, - endPoints1, - pBitValues, - colorIndices, - subsetCount, - partition, - colorIndexBitCount, - colorIndexCount, - colorDepth, - alphaDepth, - pBits, - alphaMask); - - if (separateAlphaIndices) - { - errorSum += BC67Utils.SelectIndices( - tile, - w, - h, - endPoints0, - endPoints1, - pBitValues, - alphaIndices, - subsetCount, - partition, - alphaIndexBitCount, - alphaIndexCount, - colorDepth, - alphaDepth, - pBits, - ~alphaMask); - } - - Span<bool> colorSwapSubset = stackalloc bool[3]; - - for (int i = 0; i < 3; i++) - { - colorSwapSubset[i] = colorIndices[BC67Tables.FixUpIndices[subsetCount - 1][partition][i]] >= (colorIndexCount >> 1); - } - - bool alphaSwapSubset = alphaIndices[0] >= (alphaIndexCount >> 1); - - Block block = new Block(); - - int offset = 0; - - block.Encode(1UL << mode, ref offset, mode + 1); - block.Encode((ulong)partition, ref offset, partitionBitCount); - block.Encode((ulong)rotation, ref offset, rotationBitCount); - block.Encode((ulong)indexMode, ref offset, indexModeBitCount); - - for (int comp = 0; comp < 3; comp++) - { - int rotatedComp = comp; - - if (((comp + 1) & 3) == rotation) - { - rotatedComp = 3; - } - - for (int subset = 0; subset < subsetCount; subset++) - { - RgbaColor8 color0 = RgbaColor8.FromUInt32(endPoints0[subset]); - RgbaColor8 color1 = RgbaColor8.FromUInt32(endPoints1[subset]); - - int pBit0 = -1, pBit1 = -1; - - if (pBits == subsetCount) - { - pBit0 = pBit1 = pBitValues[subset]; - } - else if (pBits != 0) - { - pBit0 = pBitValues[subset * 2]; - pBit1 = pBitValues[subset * 2 + 1]; - } - - if (indexMode == 0 ? colorSwapSubset[subset] : alphaSwapSubset) - { - block.Encode(BC67Utils.QuantizeComponent(color1.GetComponent(rotatedComp), colorDepth, pBit1), ref offset, colorDepth); - block.Encode(BC67Utils.QuantizeComponent(color0.GetComponent(rotatedComp), colorDepth, pBit0), ref offset, colorDepth); - } - else - { - block.Encode(BC67Utils.QuantizeComponent(color0.GetComponent(rotatedComp), colorDepth, pBit0), ref offset, colorDepth); - block.Encode(BC67Utils.QuantizeComponent(color1.GetComponent(rotatedComp), colorDepth, pBit1), ref offset, colorDepth); - } - } - } - - if (alphaDepth != 0) - { - int rotatedComp = (rotation - 1) & 3; - - for (int subset = 0; subset < subsetCount; subset++) - { - RgbaColor8 color0 = RgbaColor8.FromUInt32(endPoints0[subset]); - RgbaColor8 color1 = RgbaColor8.FromUInt32(endPoints1[subset]); - - int pBit0 = -1, pBit1 = -1; - - if (pBits == subsetCount) - { - pBit0 = pBit1 = pBitValues[subset]; - } - else if (pBits != 0) - { - pBit0 = pBitValues[subset * 2]; - pBit1 = pBitValues[subset * 2 + 1]; - } - - if (separateAlphaIndices && indexMode == 0 ? alphaSwapSubset : colorSwapSubset[subset]) - { - block.Encode(BC67Utils.QuantizeComponent(color1.GetComponent(rotatedComp), alphaDepth, pBit1), ref offset, alphaDepth); - block.Encode(BC67Utils.QuantizeComponent(color0.GetComponent(rotatedComp), alphaDepth, pBit0), ref offset, alphaDepth); - } - else - { - block.Encode(BC67Utils.QuantizeComponent(color0.GetComponent(rotatedComp), alphaDepth, pBit0), ref offset, alphaDepth); - block.Encode(BC67Utils.QuantizeComponent(color1.GetComponent(rotatedComp), alphaDepth, pBit1), ref offset, alphaDepth); - } - } - } - - for (int i = 0; i < pBits; i++) - { - block.Encode((ulong)pBitValues[i], ref offset, 1); - } - - byte[] fixUpTable = BC67Tables.FixUpIndices[subsetCount - 1][partition]; - - for (int i = 0; i < 16; i++) - { - int subset = BC67Tables.PartitionTable[subsetCount - 1][partition][i]; - byte index = colorIndices[i]; - - if (colorSwapSubset[subset]) - { - index = (byte)(index ^ (colorIndexCount - 1)); - } - - int finalIndexBitCount = i == fixUpTable[subset] ? colorIndexBitCount - 1 : colorIndexBitCount; - - Debug.Assert(index < (1 << finalIndexBitCount)); - - block.Encode(index, ref offset, finalIndexBitCount); - } - - if (separateAlphaIndices) - { - for (int i = 0; i < 16; i++) - { - byte index = alphaIndices[i]; - - if (alphaSwapSubset) - { - index = (byte)(index ^ (alphaIndexCount - 1)); - } - - int finalIndexBitCount = i == 0 ? alphaIndexBitCount - 1 : alphaIndexBitCount; - - Debug.Assert(index < (1 << finalIndexBitCount)); - - block.Encode(index, ref offset, finalIndexBitCount); - } - } - - return block; - } - - private static unsafe int GetEndPointSelectionErrorFast(ReadOnlySpan<uint> tile, int subsetCount, int partition, int w, int h, int maxError) - { - byte[] partitionTable = BC67Tables.PartitionTable[subsetCount - 1][partition]; - - Span<RgbaColor8> minColors = stackalloc RgbaColor8[subsetCount]; - Span<RgbaColor8> maxColors = stackalloc RgbaColor8[subsetCount]; - - BC67Utils.GetMinMaxColors(partitionTable, tile, w, h, minColors, maxColors, subsetCount); - - Span<uint> endPoints0 = stackalloc uint[subsetCount]; - Span<uint> endPoints1 = stackalloc uint[subsetCount]; - - SelectEndPointsFast(partitionTable, tile, w, h, subsetCount, minColors, maxColors, endPoints0, endPoints1, uint.MaxValue); - - Span<RgbaColor32> palette = stackalloc RgbaColor32[8]; - - int errorSum = 0; - - for (int subset = 0; subset < subsetCount; subset++) - { - RgbaColor32 blockDir = maxColors[subset].GetColor32() - minColors[subset].GetColor32(); - int sum = blockDir.R + blockDir.G + blockDir.B + blockDir.A; - if (sum != 0) - { - blockDir = (blockDir << 6) / new RgbaColor32(sum); - } - - uint c0 = endPoints0[subset]; - uint c1 = endPoints1[subset]; - - int pBit0 = GetPBit(c0, 6, 0); - int pBit1 = GetPBit(c1, 6, 0); - - c0 = BC67Utils.Quantize(RgbaColor8.FromUInt32(c0), 6, 0, pBit0).ToUInt32(); - c1 = BC67Utils.Quantize(RgbaColor8.FromUInt32(c1), 6, 0, pBit1).ToUInt32(); - - if (Sse41.IsSupported) - { - Vector128<byte> c0Rep = Vector128.Create(c0).AsByte(); - Vector128<byte> c1Rep = Vector128.Create(c1).AsByte(); - - Vector128<byte> c0c1 = Sse2.UnpackLow(c0Rep, c1Rep); - - Vector128<byte> rWeights; - Vector128<byte> lWeights; - - fixed (byte* pWeights = BC67Tables.Weights[1], pInvWeights = BC67Tables.InverseWeights[1]) - { - rWeights = Sse2.LoadScalarVector128((ulong*)pWeights).AsByte(); - lWeights = Sse2.LoadScalarVector128((ulong*)pInvWeights).AsByte(); - } - - Vector128<byte> iWeights = Sse2.UnpackLow(rWeights, lWeights); - Vector128<byte> iWeights01 = Sse2.UnpackLow(iWeights.AsInt16(), iWeights.AsInt16()).AsByte(); - Vector128<byte> iWeights23 = Sse2.UnpackHigh(iWeights.AsInt16(), iWeights.AsInt16()).AsByte(); - Vector128<byte> iWeights0 = Sse2.UnpackLow(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte(); - Vector128<byte> iWeights1 = Sse2.UnpackHigh(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte(); - Vector128<byte> iWeights2 = Sse2.UnpackLow(iWeights23.AsInt16(), iWeights23.AsInt16()).AsByte(); - Vector128<byte> iWeights3 = Sse2.UnpackHigh(iWeights23.AsInt16(), iWeights23.AsInt16()).AsByte(); - - static Vector128<short> ShiftRoundToNearest(Vector128<short> x) - { - return Sse2.ShiftRightLogical(Sse2.Add(x, Vector128.Create((short)32)), 6); - } - - Vector128<short> pal0 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights0.AsSByte())); - Vector128<short> pal1 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights1.AsSByte())); - Vector128<short> pal2 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights2.AsSByte())); - Vector128<short> pal3 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights3.AsSByte())); - - for (int i = 0; i < tile.Length; i++) - { - if (partitionTable[i] != subset) - { - continue; - } - - uint c = tile[i]; - - Vector128<short> color = Sse41.ConvertToVector128Int16(Vector128.Create(c).AsByte()); - - Vector128<short> delta0 = Sse2.Subtract(color, pal0); - Vector128<short> delta1 = Sse2.Subtract(color, pal1); - Vector128<short> delta2 = Sse2.Subtract(color, pal2); - Vector128<short> delta3 = Sse2.Subtract(color, pal3); - - Vector128<int> deltaSum0 = Sse2.MultiplyAddAdjacent(delta0, delta0); - Vector128<int> deltaSum1 = Sse2.MultiplyAddAdjacent(delta1, delta1); - Vector128<int> deltaSum2 = Sse2.MultiplyAddAdjacent(delta2, delta2); - Vector128<int> deltaSum3 = Sse2.MultiplyAddAdjacent(delta3, delta3); - - Vector128<int> deltaSum01 = Ssse3.HorizontalAdd(deltaSum0, deltaSum1); - Vector128<int> deltaSum23 = Ssse3.HorizontalAdd(deltaSum2, deltaSum3); - - Vector128<ushort> delta = Sse41.PackUnsignedSaturate(deltaSum01, deltaSum23); - - Vector128<ushort> min = Sse41.MinHorizontal(delta); - - errorSum += min.GetElement(0); - } - } - else - { - RgbaColor32 e032 = RgbaColor8.FromUInt32(c0).GetColor32(); - RgbaColor32 e132 = RgbaColor8.FromUInt32(c1).GetColor32(); - - palette[0] = e032; - palette[palette.Length - 1] = e132; - - for (int i = 1; i < palette.Length - 1; i++) - { - palette[i] = BC67Utils.Interpolate(e032, e132, i, 3); - } - - for (int i = 0; i < tile.Length; i++) - { - if (partitionTable[i] != subset) - { - continue; - } - - uint c = tile[i]; - RgbaColor32 color = Unsafe.As<uint, RgbaColor8>(ref c).GetColor32(); - - int bestMatchScore = int.MaxValue; - - for (int j = 0; j < palette.Length; j++) - { - int score = BC67Utils.SquaredDifference(color, palette[j]); - - if (score < bestMatchScore) - { - bestMatchScore = score; - } - } - - errorSum += bestMatchScore; - } - } - - // No point in continuing if we are already above maximum. - if (errorSum >= maxError) - { - return int.MaxValue; - } - } - - return errorSum; - } - - private static void SelectEndPoints( - ReadOnlySpan<uint> tile, - int w, - int h, - Span<uint> endPoints0, - Span<uint> endPoints1, - int subsetCount, - int partition, - int indexBitCount, - int colorDepth, - int alphaDepth, - uint writeMask, - bool fastMode) - { - byte[] partitionTable = BC67Tables.PartitionTable[subsetCount - 1][partition]; - - Span<RgbaColor8> minColors = stackalloc RgbaColor8[subsetCount]; - Span<RgbaColor8> maxColors = stackalloc RgbaColor8[subsetCount]; - - BC67Utils.GetMinMaxColors(partitionTable, tile, w, h, minColors, maxColors, subsetCount); - - uint inverseMask = ~writeMask; - - for (int i = 0; i < subsetCount; i++) - { - Unsafe.As<RgbaColor8, uint>(ref minColors[i]) |= inverseMask; - Unsafe.As<RgbaColor8, uint>(ref maxColors[i]) |= inverseMask; - } - - if (fastMode) - { - SelectEndPointsFast(partitionTable, tile, w, h, subsetCount, minColors, maxColors, endPoints0, endPoints1, writeMask); - } - else - { - Span<RgbaColor8> colors = stackalloc RgbaColor8[subsetCount * 16]; - Span<byte> counts = stackalloc byte[subsetCount]; - - int i = 0; - for (int ty = 0; ty < h; ty++) - { - for (int tx = 0; tx < w; tx++) - { - int subset = partitionTable[ty * 4 + tx]; - RgbaColor8 color = RgbaColor8.FromUInt32(tile[i++] | inverseMask); - - static void AddIfNew(Span<RgbaColor8> values, RgbaColor8 value, int subset, ref byte count) - { - for (int i = 0; i < count; i++) - { - if (values[subset * 16 + i] == value) - { - return; - } - } - - values[subset * 16 + count++] = value; - } - - AddIfNew(colors, color, subset, ref counts[subset]); - } - } - - for (int subset = 0; subset < subsetCount; subset++) - { - int offset = subset * 16; - - RgbaColor8 minColor = minColors[subset]; - RgbaColor8 maxColor = maxColors[subset]; - - ReadOnlySpan<RgbaColor8> subsetColors = colors.Slice(offset, counts[subset]); - - (RgbaColor8 e0, RgbaColor8 e1) = SelectEndPoints(subsetColors, minColor, maxColor, indexBitCount, colorDepth, alphaDepth, inverseMask); - - endPoints0[subset] = (endPoints0[subset] & inverseMask) | (e0.ToUInt32() & writeMask); - endPoints1[subset] = (endPoints1[subset] & inverseMask) | (e1.ToUInt32() & writeMask); - } - } - } - - private static unsafe void SelectEndPointsFast( - ReadOnlySpan<byte> partitionTable, - ReadOnlySpan<uint> tile, - int w, - int h, - int subsetCount, - ReadOnlySpan<RgbaColor8> minColors, - ReadOnlySpan<RgbaColor8> maxColors, - Span<uint> endPoints0, - Span<uint> endPoints1, - uint writeMask) - { - uint inverseMask = ~writeMask; - - if (Sse41.IsSupported && w == 4 && h == 4) - { - Vector128<byte> row0, row1, row2, row3; - Vector128<short> ones = Vector128<short>.AllBitsSet; - - fixed (uint* pTile = tile) - { - row0 = Sse2.LoadVector128(pTile).AsByte(); - row1 = Sse2.LoadVector128(pTile + 4).AsByte(); - row2 = Sse2.LoadVector128(pTile + 8).AsByte(); - row3 = Sse2.LoadVector128(pTile + 12).AsByte(); - } - - Vector128<byte> partitionMask; - - fixed (byte* pPartitionTable = partitionTable) - { - partitionMask = Sse2.LoadVector128(pPartitionTable); - } - - for (int subset = 0; subset < subsetCount; subset++) - { - RgbaColor32 blockDir = maxColors[subset].GetColor32() - minColors[subset].GetColor32(); - int sum = blockDir.R + blockDir.G + blockDir.B + blockDir.A; - if (sum != 0) - { - blockDir = (blockDir << 6) / new RgbaColor32(sum); - } - - Vector128<byte> bd = Vector128.Create(blockDir.GetColor8().ToUInt32()).AsByte(); - - Vector128<short> delta0 = Ssse3.MultiplyAddAdjacent(row0, bd.AsSByte()); - Vector128<short> delta1 = Ssse3.MultiplyAddAdjacent(row1, bd.AsSByte()); - Vector128<short> delta2 = Ssse3.MultiplyAddAdjacent(row2, bd.AsSByte()); - Vector128<short> delta3 = Ssse3.MultiplyAddAdjacent(row3, bd.AsSByte()); - - Vector128<short> delta01 = Ssse3.HorizontalAdd(delta0, delta1); - Vector128<short> delta23 = Ssse3.HorizontalAdd(delta2, delta3); - - Vector128<byte> subsetMask = Sse2.Xor(Sse2.CompareEqual(partitionMask, Vector128.Create((byte)subset)), ones.AsByte()); - - Vector128<short> subsetMask01 = Sse2.UnpackLow(subsetMask, subsetMask).AsInt16(); - Vector128<short> subsetMask23 = Sse2.UnpackHigh(subsetMask, subsetMask).AsInt16(); - - Vector128<ushort> min01 = Sse41.MinHorizontal(Sse2.Or(delta01, subsetMask01).AsUInt16()); - Vector128<ushort> min23 = Sse41.MinHorizontal(Sse2.Or(delta23, subsetMask23).AsUInt16()); - Vector128<ushort> max01 = Sse41.MinHorizontal(Sse2.Xor(Sse2.AndNot(subsetMask01, delta01), ones).AsUInt16()); - Vector128<ushort> max23 = Sse41.MinHorizontal(Sse2.Xor(Sse2.AndNot(subsetMask23, delta23), ones).AsUInt16()); - - uint minPos01 = min01.AsUInt32().GetElement(0); - uint minPos23 = min23.AsUInt32().GetElement(0); - uint maxPos01 = max01.AsUInt32().GetElement(0); - uint maxPos23 = max23.AsUInt32().GetElement(0); - - uint minDistColor = (ushort)minPos23 < (ushort)minPos01 - ? tile[(int)(minPos23 >> 16) + 8] - : tile[(int)(minPos01 >> 16)]; - - // Note that we calculate the maximum as the minimum of the inverse, so less here is actually greater. - uint maxDistColor = (ushort)maxPos23 < (ushort)maxPos01 - ? tile[(int)(maxPos23 >> 16) + 8] - : tile[(int)(maxPos01 >> 16)]; - - endPoints0[subset] = (endPoints0[subset] & inverseMask) | (minDistColor & writeMask); - endPoints1[subset] = (endPoints1[subset] & inverseMask) | (maxDistColor & writeMask); - } - } - else - { - for (int subset = 0; subset < subsetCount; subset++) - { - RgbaColor32 blockDir = maxColors[subset].GetColor32() - minColors[subset].GetColor32(); - blockDir = RgbaColor32.DivideGuarded(blockDir << 6, new RgbaColor32(blockDir.R + blockDir.G + blockDir.B + blockDir.A), 0); - - int minDist = int.MaxValue; - int maxDist = int.MinValue; - - RgbaColor8 minDistColor = default; - RgbaColor8 maxDistColor = default; - - int i = 0; - for (int ty = 0; ty < h; ty++) - { - for (int tx = 0; tx < w; tx++, i++) - { - if (partitionTable[ty * 4 + tx] != subset) - { - continue; - } - - RgbaColor8 color = RgbaColor8.FromUInt32(tile[i]); - int dist = RgbaColor32.Dot(color.GetColor32(), blockDir); - - if (minDist > dist) - { - minDist = dist; - minDistColor = color; - } - - if (maxDist < dist) - { - maxDist = dist; - maxDistColor = color; - } - } - } - - endPoints0[subset] = (endPoints0[subset] & inverseMask) | (minDistColor.ToUInt32() & writeMask); - endPoints1[subset] = (endPoints1[subset] & inverseMask) | (maxDistColor.ToUInt32() & writeMask); - } - } - } - - private static (RgbaColor8, RgbaColor8) SelectEndPoints( - ReadOnlySpan<RgbaColor8> values, - RgbaColor8 minValue, - RgbaColor8 maxValue, - int indexBitCount, - int colorDepth, - int alphaDepth, - uint alphaMask) - { - int n = values.Length; - int numInterpolatedColors = 1 << indexBitCount; - int numInterpolatedColorsMinus1 = numInterpolatedColors - 1; - - if (n == 0) - { - return (default, default); - } - - minValue = BC67Utils.Quantize(minValue, colorDepth, alphaDepth); - maxValue = BC67Utils.Quantize(maxValue, colorDepth, alphaDepth); - - RgbaColor32 blockDir = maxValue.GetColor32() - minValue.GetColor32(); - blockDir = RgbaColor32.DivideGuarded(blockDir << 6, new RgbaColor32(blockDir.R + blockDir.G + blockDir.B + blockDir.A), 0); - - int minDist = int.MaxValue; - int maxDist = 0; - - for (int i = 0; i < values.Length; i++) - { - RgbaColor8 color = values[i]; - int dist = RgbaColor32.Dot(BC67Utils.Quantize(color, colorDepth, alphaDepth).GetColor32(), blockDir); - - if (minDist >= dist) - { - minDist = dist; - } - - if (maxDist <= dist) - { - maxDist = dist; - } - } - - Span<RgbaColor8> palette = stackalloc RgbaColor8[numInterpolatedColors]; - - int distRange = Math.Max(1, maxDist - minDist); - - RgbaColor32 nV = new RgbaColor32(n); - - int bestErrorSum = int.MaxValue; - RgbaColor8 bestE0 = default; - RgbaColor8 bestE1 = default; - - Span<int> indices = stackalloc int[n]; - Span<RgbaColor32> colors = stackalloc RgbaColor32[n]; - - for (int maxIndex = numInterpolatedColorsMinus1; maxIndex >= 1; maxIndex--) - { - int sumX = 0; - int sumXX = 0; - int sumXXIncrement = 0; - - for (int i = 0; i < values.Length; i++) - { - RgbaColor32 color = values[i].GetColor32(); - - int dist = RgbaColor32.Dot(color, blockDir); - - int normalizedValue = ((dist - minDist) << 6) / distRange; - int texelIndex = (normalizedValue * maxIndex + 32) >> 6; - - indices[i] = texelIndex; - colors[i] = color; - - sumX += texelIndex; - sumXX += texelIndex * texelIndex; - sumXXIncrement += 1 + texelIndex * 2; - } - - for (int start = 0; start < numInterpolatedColors - maxIndex; start++) - { - RgbaColor32 sumY = new RgbaColor32(0); - RgbaColor32 sumXY = new RgbaColor32(0); - - for (int i = 0; i < indices.Length; i++) - { - RgbaColor32 y = colors[i]; - - sumY += y; - sumXY += new RgbaColor32(start + indices[i]) * y; - } - - RgbaColor32 sumXV = new RgbaColor32(sumX); - RgbaColor32 sumXXV = new RgbaColor32(sumXX); - RgbaColor32 m = RgbaColor32.DivideGuarded((nV * sumXY - sumXV * sumY) << 6, nV * sumXXV - sumXV * sumXV, 0); - RgbaColor32 b = ((sumY << 6) - m * sumXV) / nV; - - RgbaColor8 candidateE0 = (b >> 6).GetColor8(); - RgbaColor8 candidateE1 = ((b + m * new RgbaColor32(numInterpolatedColorsMinus1)) >> 6).GetColor8(); - - int pBit0 = GetPBit(candidateE0.ToUInt32(), colorDepth, alphaDepth); - int pBit1 = GetPBit(candidateE1.ToUInt32(), colorDepth, alphaDepth); - - int errorSum = BC67Utils.SelectIndices( - MemoryMarshal.Cast<RgbaColor8, uint>(values), - candidateE0.ToUInt32(), - candidateE1.ToUInt32(), - pBit0, - pBit1, - indexBitCount, - numInterpolatedColors, - colorDepth, - alphaDepth, - alphaMask); - - if (errorSum <= bestErrorSum) - { - bestErrorSum = errorSum; - bestE0 = candidateE0; - bestE1 = candidateE1; - } - - sumX += n; - sumXX += sumXXIncrement; - sumXXIncrement += 2 * n; - } - } - - return (bestE0, bestE1); - } - - private static int GetPBit(uint color, int colorDepth, int alphaDepth) - { - uint mask = 0x808080u >> colorDepth; - - if (alphaDepth != 0) - { - // If alpha is 0, let's assume the color information is not too important and prefer - // to preserve alpha instead. - if ((color >> 24) == 0) - { - return 0; - } - - mask |= 0x80000000u >> alphaDepth; - } - - color &= 0x7f7f7f7fu; - color += mask >> 1; - - int onesCount = BitOperations.PopCount(color & mask); - return onesCount >= 2 ? 1 : 0; - } - - private static int GetPBit(uint c0, uint c1, int colorDepth, int alphaDepth) - { - // Giving preference to the first endpoint yields better results, - // might be a side effect of the endpoint selection algorithm? - return GetPBit(c0, colorDepth, alphaDepth); - } - } -} diff --git a/Ryujinx.Graphics.Texture/Encoders/EncodeMode.cs b/Ryujinx.Graphics.Texture/Encoders/EncodeMode.cs deleted file mode 100644 index 5734d301..00000000 --- a/Ryujinx.Graphics.Texture/Encoders/EncodeMode.cs +++ /dev/null @@ -1,10 +0,0 @@ -namespace Ryujinx.Graphics.Texture.Encoders -{ - enum EncodeMode - { - Fast, - Exhaustive, - ModeMask = 0xff, - Multithreaded = 1 << 8 - } -} |
