diff options
| author | TSR Berry <20988865+TSRBerry@users.noreply.github.com> | 2023-04-08 01:22:00 +0200 |
|---|---|---|
| committer | Mary <thog@protonmail.com> | 2023-04-27 23:51:14 +0200 |
| commit | cee712105850ac3385cd0091a923438167433f9f (patch) | |
| tree | 4a5274b21d8b7f938c0d0ce18736d3f2993b11b1 /Ryujinx.Graphics.Texture | |
| parent | cd124bda587ef09668a971fa1cac1c3f0cfc9f21 (diff) | |
Move solution and projects to src
Diffstat (limited to 'Ryujinx.Graphics.Texture')
32 files changed, 0 insertions, 9565 deletions
diff --git a/Ryujinx.Graphics.Texture/Astc/AstcDecoder.cs b/Ryujinx.Graphics.Texture/Astc/AstcDecoder.cs deleted file mode 100644 index 08738583..00000000 --- a/Ryujinx.Graphics.Texture/Astc/AstcDecoder.cs +++ /dev/null @@ -1,1621 +0,0 @@ -using Ryujinx.Common.Utilities; -using System; -using System.Diagnostics; -using System.Linq; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; - -namespace Ryujinx.Graphics.Texture.Astc -{ - // https://github.com/GammaUNC/FasTC/blob/master/ASTCEncoder/src/Decompressor.cpp - public class AstcDecoder - { - private ReadOnlyMemory<byte> InputBuffer { get; } - private Memory<byte> OutputBuffer { get; } - - private int BlockSizeX { get; } - private int BlockSizeY { get; } - - private AstcLevel[] Levels { get; } - - private bool Success { get; set; } - - public int TotalBlockCount { get; } - - public AstcDecoder( - ReadOnlyMemory<byte> inputBuffer, - Memory<byte> outputBuffer, - int blockWidth, - int blockHeight, - int width, - int height, - int depth, - int levels, - int layers) - { - if ((uint)blockWidth > 12) - { - throw new ArgumentOutOfRangeException(nameof(blockWidth)); - } - - if ((uint)blockHeight > 12) - { - throw new ArgumentOutOfRangeException(nameof(blockHeight)); - } - - InputBuffer = inputBuffer; - OutputBuffer = outputBuffer; - - BlockSizeX = blockWidth; - BlockSizeY = blockHeight; - - Levels = new AstcLevel[levels * layers]; - - Success = true; - - TotalBlockCount = 0; - - int currentInputBlock = 0; - int currentOutputOffset = 0; - - for (int i = 0; i < levels; i++) - { - for (int j = 0; j < layers; j++) - { - ref AstcLevel level = ref Levels[i * layers + j]; - - level.ImageSizeX = Math.Max(1, width >> i); - level.ImageSizeY = Math.Max(1, height >> i); - level.ImageSizeZ = Math.Max(1, depth >> i); - - level.BlockCountX = (level.ImageSizeX + blockWidth - 1) / blockWidth; - level.BlockCountY = (level.ImageSizeY + blockHeight - 1) / blockHeight; - - level.StartBlock = currentInputBlock; - level.OutputByteOffset = currentOutputOffset; - - currentInputBlock += level.TotalBlockCount; - currentOutputOffset += level.PixelCount * 4; - } - } - - TotalBlockCount = currentInputBlock; - } - - private struct AstcLevel - { - public int ImageSizeX { get; set; } - public int ImageSizeY { get; set; } - public int ImageSizeZ { get; set; } - - public int BlockCountX { get; set; } - public int BlockCountY { get; set; } - - public int StartBlock { get; set; } - public int OutputByteOffset { get; set; } - - public int TotalBlockCount => BlockCountX * BlockCountY * ImageSizeZ; - public int PixelCount => ImageSizeX * ImageSizeY * ImageSizeZ; - } - - public static int QueryDecompressedSize(int sizeX, int sizeY, int sizeZ, int levelCount, int layerCount) - { - int size = 0; - - for (int i = 0; i < levelCount; i++) - { - int levelSizeX = Math.Max(1, sizeX >> i); - int levelSizeY = Math.Max(1, sizeY >> i); - int levelSizeZ = Math.Max(1, sizeZ >> i); - - size += levelSizeX * levelSizeY * levelSizeZ * layerCount; - } - - return size * 4; - } - - public void ProcessBlock(int index) - { - Buffer16 inputBlock = MemoryMarshal.Cast<byte, Buffer16>(InputBuffer.Span)[index]; - - Span<int> decompressedData = stackalloc int[144]; - - try - { - DecompressBlock(inputBlock, decompressedData, BlockSizeX, BlockSizeY); - } - catch (Exception) - { - Success = false; - } - - Span<byte> decompressedBytes = MemoryMarshal.Cast<int, byte>(decompressedData); - - AstcLevel levelInfo = GetLevelInfo(index); - - WriteDecompressedBlock(decompressedBytes, OutputBuffer.Span.Slice(levelInfo.OutputByteOffset), - index - levelInfo.StartBlock, levelInfo); - } - - private AstcLevel GetLevelInfo(int blockIndex) - { - foreach (AstcLevel levelInfo in Levels) - { - if (blockIndex < levelInfo.StartBlock + levelInfo.TotalBlockCount) - { - return levelInfo; - } - } - - throw new AstcDecoderException("Invalid block index."); - } - - private void WriteDecompressedBlock(ReadOnlySpan<byte> block, Span<byte> outputBuffer, int blockIndex, AstcLevel level) - { - int stride = level.ImageSizeX * 4; - - int blockCordX = blockIndex % level.BlockCountX; - int blockCordY = blockIndex / level.BlockCountX; - - int pixelCordX = blockCordX * BlockSizeX; - int pixelCordY = blockCordY * BlockSizeY; - - int outputPixelsX = Math.Min(pixelCordX + BlockSizeX, level.ImageSizeX) - pixelCordX; - int outputPixelsY = Math.Min(pixelCordY + BlockSizeY, level.ImageSizeY * level.ImageSizeZ) - pixelCordY; - - int outputStart = pixelCordX * 4 + pixelCordY * stride; - int outputOffset = outputStart; - - int inputOffset = 0; - - for (int i = 0; i < outputPixelsY; i++) - { - ReadOnlySpan<byte> blockRow = block.Slice(inputOffset, outputPixelsX * 4); - Span<byte> outputRow = outputBuffer.Slice(outputOffset); - blockRow.CopyTo(outputRow); - - inputOffset += BlockSizeX * 4; - outputOffset += stride; - } - } - - struct TexelWeightParams - { - public int Width; - public int Height; - public int MaxWeight; - public bool DualPlane; - public bool Error; - public bool VoidExtentLdr; - public bool VoidExtentHdr; - - public int GetPackedBitSize() - { - // How many indices do we have? - int indices = Height * Width; - - if (DualPlane) - { - indices *= 2; - } - - IntegerEncoded intEncoded = IntegerEncoded.CreateEncoding(MaxWeight); - - return intEncoded.GetBitLength(indices); - } - - public int GetNumWeightValues() - { - int ret = Width * Height; - - if (DualPlane) - { - ret *= 2; - } - - return ret; - } - } - - public static bool TryDecodeToRgba8( - ReadOnlyMemory<byte> data, - int blockWidth, - int blockHeight, - int width, - int height, - int depth, - int levels, - int layers, - out Span<byte> decoded) - { - byte[] output = new byte[QueryDecompressedSize(width, height, depth, levels, layers)]; - - AstcDecoder decoder = new AstcDecoder(data, output, blockWidth, blockHeight, width, height, depth, levels, layers); - - for (int i = 0; i < decoder.TotalBlockCount; i++) - { - decoder.ProcessBlock(i); - } - - decoded = output; - - return decoder.Success; - } - - public static bool TryDecodeToRgba8( - ReadOnlyMemory<byte> data, - Memory<byte> outputBuffer, - int blockWidth, - int blockHeight, - int width, - int height, - int depth, - int levels, - int layers) - { - AstcDecoder decoder = new AstcDecoder(data, outputBuffer, blockWidth, blockHeight, width, height, depth, levels, layers); - - for (int i = 0; i < decoder.TotalBlockCount; i++) - { - decoder.ProcessBlock(i); - } - - return decoder.Success; - } - - public static bool TryDecodeToRgba8P( - ReadOnlyMemory<byte> data, - Memory<byte> outputBuffer, - int blockWidth, - int blockHeight, - int width, - int height, - int depth, - int levels, - int layers) - { - AstcDecoder decoder = new AstcDecoder(data, outputBuffer, blockWidth, blockHeight, width, height, depth, levels, layers); - - // Lazy parallelism - Enumerable.Range(0, decoder.TotalBlockCount).AsParallel().ForAll(x => decoder.ProcessBlock(x)); - - return decoder.Success; - } - - public static bool TryDecodeToRgba8P( - ReadOnlyMemory<byte> data, - int blockWidth, - int blockHeight, - int width, - int height, - int depth, - int levels, - int layers, - out byte[] decoded) - { - byte[] output = new byte[QueryDecompressedSize(width, height, depth, levels, layers)]; - - AstcDecoder decoder = new AstcDecoder(data, output, blockWidth, blockHeight, width, height, depth, levels, layers); - - Enumerable.Range(0, decoder.TotalBlockCount).AsParallel().ForAll(x => decoder.ProcessBlock(x)); - - decoded = output; - - return decoder.Success; - } - - public static bool DecompressBlock( - Buffer16 inputBlock, - Span<int> outputBuffer, - int blockWidth, - int blockHeight) - { - BitStream128 bitStream = new BitStream128(inputBlock); - - DecodeBlockInfo(ref bitStream, out TexelWeightParams texelParams); - - if (texelParams.Error) - { - throw new AstcDecoderException("Invalid block mode"); - } - - if (texelParams.VoidExtentLdr) - { - FillVoidExtentLdr(ref bitStream, outputBuffer, blockWidth, blockHeight); - - return true; - } - - if (texelParams.VoidExtentHdr) - { - throw new AstcDecoderException("HDR void extent blocks are not supported."); - } - - if (texelParams.Width > blockWidth) - { - throw new AstcDecoderException("Texel weight grid width should be smaller than block width."); - } - - if (texelParams.Height > blockHeight) - { - throw new AstcDecoderException("Texel weight grid height should be smaller than block height."); - } - - // Read num partitions - int numberPartitions = bitStream.ReadBits(2) + 1; - Debug.Assert(numberPartitions <= 4); - - if (numberPartitions == 4 && texelParams.DualPlane) - { - throw new AstcDecoderException("Dual plane mode is incompatible with four partition blocks."); - } - - // Based on the number of partitions, read the color endpoint mode for - // each partition. - - // Determine partitions, partition index, and color endpoint modes - int planeIndices; - int partitionIndex; - - Span<uint> colorEndpointMode = stackalloc uint[4]; - - BitStream128 colorEndpointStream = new BitStream128(); - - // Read extra config data... - uint baseColorEndpointMode = 0; - - if (numberPartitions == 1) - { - colorEndpointMode[0] = (uint)bitStream.ReadBits(4); - partitionIndex = 0; - } - else - { - partitionIndex = bitStream.ReadBits(10); - baseColorEndpointMode = (uint)bitStream.ReadBits(6); - } - - uint baseMode = (baseColorEndpointMode & 3); - - // Remaining bits are color endpoint data... - int numberWeightBits = texelParams.GetPackedBitSize(); - int remainingBits = bitStream.BitsLeft - numberWeightBits; - - // Consider extra bits prior to texel data... - uint extraColorEndpointModeBits = 0; - - if (baseMode != 0) - { - switch (numberPartitions) - { - case 2: extraColorEndpointModeBits += 2; break; - case 3: extraColorEndpointModeBits += 5; break; - case 4: extraColorEndpointModeBits += 8; break; - default: Debug.Assert(false); break; - } - } - - remainingBits -= (int)extraColorEndpointModeBits; - - // Do we have a dual plane situation? - int planeSelectorBits = 0; - - if (texelParams.DualPlane) - { - planeSelectorBits = 2; - } - - remainingBits -= planeSelectorBits; - - // Read color data... - int colorDataBits = remainingBits; - - while (remainingBits > 0) - { - int numberBits = Math.Min(remainingBits, 8); - int bits = bitStream.ReadBits(numberBits); - colorEndpointStream.WriteBits(bits, numberBits); - remainingBits -= 8; - } - - // Read the plane selection bits - planeIndices = bitStream.ReadBits(planeSelectorBits); - - // Read the rest of the CEM - if (baseMode != 0) - { - uint extraColorEndpointMode = (uint)bitStream.ReadBits((int)extraColorEndpointModeBits); - uint tempColorEndpointMode = (extraColorEndpointMode << 6) | baseColorEndpointMode; - tempColorEndpointMode >>= 2; - - Span<bool> c = stackalloc bool[4]; - - for (int i = 0; i < numberPartitions; i++) - { - c[i] = (tempColorEndpointMode & 1) != 0; - tempColorEndpointMode >>= 1; - } - - Span<byte> m = stackalloc byte[4]; - - for (int i = 0; i < numberPartitions; i++) - { - m[i] = (byte)(tempColorEndpointMode & 3); - tempColorEndpointMode >>= 2; - Debug.Assert(m[i] <= 3); - } - - for (int i = 0; i < numberPartitions; i++) - { - colorEndpointMode[i] = baseMode; - if (!(c[i])) colorEndpointMode[i] -= 1; - colorEndpointMode[i] <<= 2; - colorEndpointMode[i] |= m[i]; - } - } - else if (numberPartitions > 1) - { - uint tempColorEndpointMode = baseColorEndpointMode >> 2; - - for (int i = 0; i < numberPartitions; i++) - { - colorEndpointMode[i] = tempColorEndpointMode; - } - } - - // Make sure everything up till here is sane. - for (int i = 0; i < numberPartitions; i++) - { - Debug.Assert(colorEndpointMode[i] < 16); - } - Debug.Assert(bitStream.BitsLeft == texelParams.GetPackedBitSize()); - - // Decode both color data and texel weight data - Span<int> colorValues = stackalloc int[32]; // Four values * two endpoints * four maximum partitions - DecodeColorValues(colorValues, ref colorEndpointStream, colorEndpointMode, numberPartitions, colorDataBits); - - EndPointSet endPoints; - unsafe { _ = &endPoints; } // Skip struct initialization - - int colorValuesPosition = 0; - - for (int i = 0; i < numberPartitions; i++) - { - ComputeEndpoints(endPoints.Get(i), colorValues, colorEndpointMode[i], ref colorValuesPosition); - } - - // Read the texel weight data. - Buffer16 texelWeightData = inputBlock; - - // Reverse everything - for (int i = 0; i < 8; i++) - { - byte a = ReverseByte(texelWeightData[i]); - byte b = ReverseByte(texelWeightData[15 - i]); - - texelWeightData[i] = b; - texelWeightData[15 - i] = a; - } - - // Make sure that higher non-texel bits are set to zero - int clearByteStart = (texelParams.GetPackedBitSize() >> 3) + 1; - texelWeightData[clearByteStart - 1] &= (byte)((1 << (texelParams.GetPackedBitSize() % 8)) - 1); - - int cLen = 16 - clearByteStart; - for (int i = clearByteStart; i < clearByteStart + cLen; i++) texelWeightData[i] = 0; - - IntegerSequence texelWeightValues; - unsafe { _ = &texelWeightValues; } // Skip struct initialization - texelWeightValues.Reset(); - - BitStream128 weightBitStream = new BitStream128(texelWeightData); - - IntegerEncoded.DecodeIntegerSequence(ref texelWeightValues, ref weightBitStream, texelParams.MaxWeight, texelParams.GetNumWeightValues()); - - // Blocks can be at most 12x12, so we can have as many as 144 weights - Weights weights; - unsafe { _ = &weights; } // Skip struct initialization - - UnquantizeTexelWeights(ref weights, ref texelWeightValues, ref texelParams, blockWidth, blockHeight); - - ushort[] table = Bits.Replicate8_16Table; - - // Now that we have endpoints and weights, we can interpolate and generate - // the proper decoding... - for (int j = 0; j < blockHeight; j++) - { - for (int i = 0; i < blockWidth; i++) - { - int partition = Select2dPartition(partitionIndex, i, j, numberPartitions, ((blockHeight * blockWidth) < 32)); - Debug.Assert(partition < numberPartitions); - - AstcPixel pixel = new AstcPixel(); - for (int component = 0; component < 4; component++) - { - int component0 = endPoints.Get(partition)[0].GetComponent(component); - component0 = table[component0]; - int component1 = endPoints.Get(partition)[1].GetComponent(component); - component1 = table[component1]; - - int plane = 0; - - if (texelParams.DualPlane && (((planeIndices + 1) & 3) == component)) - { - plane = 1; - } - - int weight = weights.Get(plane)[j * blockWidth + i]; - int finalComponent = (component0 * (64 - weight) + component1 * weight + 32) / 64; - - if (finalComponent == 65535) - { - pixel.SetComponent(component, 255); - } - else - { - double finalComponentFloat = finalComponent; - pixel.SetComponent(component, (int)(255.0 * (finalComponentFloat / 65536.0) + 0.5)); - } - } - - outputBuffer[j * blockWidth + i] = pixel.Pack(); - } - } - - return true; - } - - // Blocks can be at most 12x12, so we can have as many as 144 weights - [StructLayout(LayoutKind.Sequential, Size = 144 * sizeof(int) * Count)] - private struct Weights - { - private int _start; - - public const int Count = 2; - - public Span<int> this[int index] - { - get - { - if ((uint)index >= Count) - { - throw new ArgumentOutOfRangeException(); - } - - ref int start = ref Unsafe.Add(ref _start, index * 144); - - return MemoryMarshal.CreateSpan(ref start, 144); - } - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public Span<int> Get(int index) - { - ref int start = ref Unsafe.Add(ref _start, index * 144); - - return MemoryMarshal.CreateSpan(ref start, 144); - } - } - - private static int Select2dPartition(int seed, int x, int y, int partitionCount, bool isSmallBlock) - { - return SelectPartition(seed, x, y, 0, partitionCount, isSmallBlock); - } - - private static int SelectPartition(int seed, int x, int y, int z, int partitionCount, bool isSmallBlock) - { - if (partitionCount == 1) - { - return 0; - } - - if (isSmallBlock) - { - x <<= 1; - y <<= 1; - z <<= 1; - } - - seed += (partitionCount - 1) * 1024; - - int rightNum = Hash52((uint)seed); - byte seed01 = (byte)(rightNum & 0xF); - byte seed02 = (byte)((rightNum >> 4) & 0xF); - byte seed03 = (byte)((rightNum >> 8) & 0xF); - byte seed04 = (byte)((rightNum >> 12) & 0xF); - byte seed05 = (byte)((rightNum >> 16) & 0xF); - byte seed06 = (byte)((rightNum >> 20) & 0xF); - byte seed07 = (byte)((rightNum >> 24) & 0xF); - byte seed08 = (byte)((rightNum >> 28) & 0xF); - byte seed09 = (byte)((rightNum >> 18) & 0xF); - byte seed10 = (byte)((rightNum >> 22) & 0xF); - byte seed11 = (byte)((rightNum >> 26) & 0xF); - byte seed12 = (byte)(((rightNum >> 30) | (rightNum << 2)) & 0xF); - - seed01 *= seed01; seed02 *= seed02; - seed03 *= seed03; seed04 *= seed04; - seed05 *= seed05; seed06 *= seed06; - seed07 *= seed07; seed08 *= seed08; - seed09 *= seed09; seed10 *= seed10; - seed11 *= seed11; seed12 *= seed12; - - int seedHash1, seedHash2, seedHash3; - - if ((seed & 1) != 0) - { - seedHash1 = (seed & 2) != 0 ? 4 : 5; - seedHash2 = (partitionCount == 3) ? 6 : 5; - } - else - { - seedHash1 = (partitionCount == 3) ? 6 : 5; - seedHash2 = (seed & 2) != 0 ? 4 : 5; - } - - seedHash3 = (seed & 0x10) != 0 ? seedHash1 : seedHash2; - - seed01 >>= seedHash1; seed02 >>= seedHash2; seed03 >>= seedHash1; seed04 >>= seedHash2; - seed05 >>= seedHash1; seed06 >>= seedHash2; seed07 >>= seedHash1; seed08 >>= seedHash2; - seed09 >>= seedHash3; seed10 >>= seedHash3; seed11 >>= seedHash3; seed12 >>= seedHash3; - - int a = seed01 * x + seed02 * y + seed11 * z + (rightNum >> 14); - int b = seed03 * x + seed04 * y + seed12 * z + (rightNum >> 10); - int c = seed05 * x + seed06 * y + seed09 * z + (rightNum >> 6); - int d = seed07 * x + seed08 * y + seed10 * z + (rightNum >> 2); - - a &= 0x3F; b &= 0x3F; c &= 0x3F; d &= 0x3F; - - if (partitionCount < 4) d = 0; - if (partitionCount < 3) c = 0; - - if (a >= b && a >= c && a >= d) return 0; - else if (b >= c && b >= d) return 1; - else if (c >= d) return 2; - return 3; - } - - static int Hash52(uint val) - { - val ^= val >> 15; val -= val << 17; val += val << 7; val += val << 4; - val ^= val >> 5; val += val << 16; val ^= val >> 7; val ^= val >> 3; - val ^= val << 6; val ^= val >> 17; - - return (int)val; - } - - static void UnquantizeTexelWeights( - ref Weights outputBuffer, - ref IntegerSequence weights, - ref TexelWeightParams texelParams, - int blockWidth, - int blockHeight) - { - int weightIndices = 0; - Weights unquantized; - unsafe { _ = &unquantized; } // Skip struct initialization - - Span<IntegerEncoded> weightsList = weights.List; - Span<int> unquantized0 = unquantized[0]; - Span<int> unquantized1 = unquantized[1]; - - for (int i = 0; i < weightsList.Length; i++) - { - unquantized0[weightIndices] = UnquantizeTexelWeight(weightsList[i]); - - if (texelParams.DualPlane) - { - i++; - unquantized1[weightIndices] = UnquantizeTexelWeight(weightsList[i]); - - if (i == weightsList.Length) - { - break; - } - } - - if (++weightIndices >= texelParams.Width * texelParams.Height) break; - } - - // Do infill if necessary (Section C.2.18) ... - int ds = (1024 + blockWidth / 2) / (blockWidth - 1); - int dt = (1024 + blockHeight / 2) / (blockHeight - 1); - - int planeScale = texelParams.DualPlane ? 2 : 1; - - for (int plane = 0; plane < planeScale; plane++) - { - Span<int> unquantizedSpan = unquantized.Get(plane); - Span<int> outputSpan = outputBuffer.Get(plane); - - for (int t = 0; t < blockHeight; t++) - { - for (int s = 0; s < blockWidth; s++) - { - int cs = ds * s; - int ct = dt * t; - - int gs = (cs * (texelParams.Width - 1) + 32) >> 6; - int gt = (ct * (texelParams.Height - 1) + 32) >> 6; - - int js = gs >> 4; - int fs = gs & 0xF; - - int jt = gt >> 4; - int ft = gt & 0x0F; - - int w11 = (fs * ft + 8) >> 4; - - int v0 = js + jt * texelParams.Width; - - int weight = 8; - - int wxh = texelParams.Width * texelParams.Height; - - if (v0 < wxh) - { - weight += unquantizedSpan[v0] * (16 - fs - ft + w11); - - if (v0 + 1 < wxh) - { - weight += unquantizedSpan[v0 + 1] * (fs - w11); - } - } - - if (v0 + texelParams.Width < wxh) - { - weight += unquantizedSpan[v0 + texelParams.Width] * (ft - w11); - - if (v0 + texelParams.Width + 1 < wxh) - { - weight += unquantizedSpan[v0 + texelParams.Width + 1] * w11; - } - } - - outputSpan[t * blockWidth + s] = weight >> 4; - } - } - } - } - - static int UnquantizeTexelWeight(IntegerEncoded intEncoded) - { - int bitValue = intEncoded.BitValue; - int bitLength = intEncoded.NumberBits; - - int a = Bits.Replicate1_7(bitValue & 1); - int b = 0, c = 0, d = 0; - - int result = 0; - - switch (intEncoded.GetEncoding()) - { - case IntegerEncoded.EIntegerEncoding.JustBits: - result = Bits.Replicate(bitValue, bitLength, 6); - break; - - case IntegerEncoded.EIntegerEncoding.Trit: - { - d = intEncoded.TritValue; - Debug.Assert(d < 3); - - switch (bitLength) - { - case 0: - { - result = d switch - { - 0 => 0, - 1 => 32, - 2 => 63, - _ => 0 - }; - - break; - } - - case 1: - { - c = 50; - break; - } - - case 2: - { - c = 23; - int b2 = (bitValue >> 1) & 1; - b = (b2 << 6) | (b2 << 2) | b2; - - break; - } - - case 3: - { - c = 11; - int cb = (bitValue >> 1) & 3; - b = (cb << 5) | cb; - - break; - } - - default: - throw new AstcDecoderException("Invalid trit encoding for texel weight."); - } - - break; - } - - case IntegerEncoded.EIntegerEncoding.Quint: - { - d = intEncoded.QuintValue; - Debug.Assert(d < 5); - - switch (bitLength) - { - case 0: - { - result = d switch - { - 0 => 0, - 1 => 16, - 2 => 32, - 3 => 47, - 4 => 63, - _ => 0 - }; - - break; - } - - case 1: - { - c = 28; - - break; - } - - case 2: - { - c = 13; - int b2 = (bitValue >> 1) & 1; - b = (b2 << 6) | (b2 << 1); - - break; - } - - default: - throw new AstcDecoderException("Invalid quint encoding for texel weight."); - } - - break; - } - } - - if (intEncoded.GetEncoding() != IntegerEncoded.EIntegerEncoding.JustBits && bitLength > 0) - { - // Decode the value... - result = d * c + b; - result ^= a; - result = (a & 0x20) | (result >> 2); - } - - Debug.Assert(result < 64); - - // Change from [0,63] to [0,64] - if (result > 32) - { - result += 1; - } - - return result; - } - - static byte ReverseByte(byte b) - { - // Taken from http://graphics.stanford.edu/~seander/bithacks.html#ReverseByteWith64Bits - return (byte)((((b) * 0x80200802L) & 0x0884422110L) * 0x0101010101L >> 32); - } - - static Span<uint> ReadUintColorValues(int number, Span<int> colorValues, ref int colorValuesPosition) - { - Span<int> ret = colorValues.Slice(colorValuesPosition, number); - - colorValuesPosition += number; - - return MemoryMarshal.Cast<int, uint>(ret); - } - - static Span<int> ReadIntColorValues(int number, Span<int> colorValues, ref int colorValuesPosition) - { - Span<int> ret = colorValues.Slice(colorValuesPosition, number); - - colorValuesPosition += number; - - return ret; - } - - static void ComputeEndpoints( - Span<AstcPixel> endPoints, - Span<int> colorValues, - uint colorEndpointMode, - ref int colorValuesPosition) - { - switch (colorEndpointMode) - { - case 0: - { - Span<uint> val = ReadUintColorValues(2, colorValues, ref colorValuesPosition); - - endPoints[0] = new AstcPixel(0xFF, (short)val[0], (short)val[0], (short)val[0]); - endPoints[1] = new AstcPixel(0xFF, (short)val[1], (short)val[1], (short)val[1]); - - break; - } - - - case 1: - { - Span<uint> val = ReadUintColorValues(2, colorValues, ref colorValuesPosition); - int l0 = (int)((val[0] >> 2) | (val[1] & 0xC0)); - int l1 = (int)Math.Max(l0 + (val[1] & 0x3F), 0xFFU); - - endPoints[0] = new AstcPixel(0xFF, (short)l0, (short)l0, (short)l0); - endPoints[1] = new AstcPixel(0xFF, (short)l1, (short)l1, (short)l1); - - break; - } - - case 4: - { - Span<uint> val = ReadUintColorValues(4, colorValues, ref colorValuesPosition); - - endPoints[0] = new AstcPixel((short)val[2], (short)val[0], (short)val[0], (short)val[0]); - endPoints[1] = new AstcPixel((short)val[3], (short)val[1], (short)val[1], (short)val[1]); - - break; - } - - case 5: - { - Span<int> val = ReadIntColorValues(4, colorValues, ref colorValuesPosition); - - Bits.BitTransferSigned(ref val[1], ref val[0]); - Bits.BitTransferSigned(ref val[3], ref val[2]); - - endPoints[0] = new AstcPixel((short)val[2], (short)val[0], (short)val[0], (short)val[0]); - endPoints[1] = new AstcPixel((short)(val[2] + val[3]), (short)(val[0] + val[1]), (short)(val[0] + val[1]), (short)(val[0] + val[1])); - - endPoints[0].ClampByte(); - endPoints[1].ClampByte(); - - break; - } - - case 6: - { - Span<uint> val = ReadUintColorValues(4, colorValues, ref colorValuesPosition); - - endPoints[0] = new AstcPixel(0xFF, (short)(val[0] * val[3] >> 8), (short)(val[1] * val[3] >> 8), (short)(val[2] * val[3] >> 8)); - endPoints[1] = new AstcPixel(0xFF, (short)val[0], (short)val[1], (short)val[2]); - - break; - } - - case 8: - { - Span<uint> val = ReadUintColorValues(6, colorValues, ref colorValuesPosition); - - if (val[1] + val[3] + val[5] >= val[0] + val[2] + val[4]) - { - endPoints[0] = new AstcPixel(0xFF, (short)val[0], (short)val[2], (short)val[4]); - endPoints[1] = new AstcPixel(0xFF, (short)val[1], (short)val[3], (short)val[5]); - } - else - { - endPoints[0] = AstcPixel.BlueContract(0xFF, (short)val[1], (short)val[3], (short)val[5]); - endPoints[1] = AstcPixel.BlueContract(0xFF, (short)val[0], (short)val[2], (short)val[4]); - } - - break; - } - - case 9: - { - Span<int> val = ReadIntColorValues(6, colorValues, ref colorValuesPosition); - - Bits.BitTransferSigned(ref val[1], ref val[0]); - Bits.BitTransferSigned(ref val[3], ref val[2]); - Bits.BitTransferSigned(ref val[5], ref val[4]); - - if (val[1] + val[3] + val[5] >= 0) - { - endPoints[0] = new AstcPixel(0xFF, (short)val[0], (short)val[2], (short)val[4]); - endPoints[1] = new AstcPixel(0xFF, (short)(val[0] + val[1]), (short)(val[2] + val[3]), (short)(val[4] + val[5])); - } - else - { - endPoints[0] = AstcPixel.BlueContract(0xFF, val[0] + val[1], val[2] + val[3], val[4] + val[5]); - endPoints[1] = AstcPixel.BlueContract(0xFF, val[0], val[2], val[4]); - } - - endPoints[0].ClampByte(); - endPoints[1].ClampByte(); - - break; - } - - case 10: - { - Span<uint> val = ReadUintColorValues(6, colorValues, ref colorValuesPosition); - - endPoints[0] = new AstcPixel((short)val[4], (short)(val[0] * val[3] >> 8), (short)(val[1] * val[3] >> 8), (short)(val[2] * val[3] >> 8)); - endPoints[1] = new AstcPixel((short)val[5], (short)val[0], (short)val[1], (short)val[2]); - - break; - } - - case 12: - { - Span<uint> val = ReadUintColorValues(8, colorValues, ref colorValuesPosition); - - if (val[1] + val[3] + val[5] >= val[0] + val[2] + val[4]) - { - endPoints[0] = new AstcPixel((short)val[6], (short)val[0], (short)val[2], (short)val[4]); - endPoints[1] = new AstcPixel((short)val[7], (short)val[1], (short)val[3], (short)val[5]); - } - else - { - endPoints[0] = AstcPixel.BlueContract((short)val[7], (short)val[1], (short)val[3], (short)val[5]); - endPoints[1] = AstcPixel.BlueContract((short)val[6], (short)val[0], (short)val[2], (short)val[4]); - } - - break; - } - - case 13: - { - Span<int> val = ReadIntColorValues(8, colorValues, ref colorValuesPosition); - - Bits.BitTransferSigned(ref val[1], ref val[0]); - Bits.BitTransferSigned(ref val[3], ref val[2]); - Bits.BitTransferSigned(ref val[5], ref val[4]); - Bits.BitTransferSigned(ref val[7], ref val[6]); - - if (val[1] + val[3] + val[5] >= 0) - { - endPoints[0] = new AstcPixel((short)val[6], (short)val[0], (short)val[2], (short)val[4]); - endPoints[1] = new AstcPixel((short)(val[7] + val[6]), (short)(val[0] + val[1]), (short)(val[2] + val[3]), (short)(val[4] + val[5])); - } - else - { - endPoints[0] = AstcPixel.BlueContract(val[6] + val[7], val[0] + val[1], val[2] + val[3], val[4] + val[5]); - endPoints[1] = AstcPixel.BlueContract(val[6], val[0], val[2], val[4]); - } - - endPoints[0].ClampByte(); - endPoints[1].ClampByte(); - - break; - } - - default: - throw new AstcDecoderException("Unsupported color endpoint mode (is it HDR?)"); - } - } - - static void DecodeColorValues( - Span<int> outputValues, - ref BitStream128 colorBitStream, - Span<uint> modes, - int numberPartitions, - int numberBitsForColorData) - { - // First figure out how many color values we have - int numberValues = 0; - - for (int i = 0; i < numberPartitions; i++) - { - numberValues += (int)((modes[i] >> 2) + 1) << 1; - } - - // Then based on the number of values and the remaining number of bits, - // figure out the max value for each of them... - int range = 256; - - while (--range > 0) - { - IntegerEncoded intEncoded = IntegerEncoded.CreateEncoding(range); - int bitLength = intEncoded.GetBitLength(numberValues); - - if (bitLength <= numberBitsForColorData) - { - // Find the smallest possible range that matches the given encoding - while (--range > 0) - { - IntegerEncoded newIntEncoded = IntegerEncoded.CreateEncoding(range); - if (!newIntEncoded.MatchesEncoding(intEncoded)) - { - break; - } - } - - // Return to last matching range. - range++; - break; - } - } - - // We now have enough to decode our integer sequence. - IntegerSequence integerEncodedSequence; - unsafe { _ = &integerEncodedSequence; } // Skip struct initialization - integerEncodedSequence.Reset(); - - IntegerEncoded.DecodeIntegerSequence(ref integerEncodedSequence, ref colorBitStream, range, numberValues); - - // Once we have the decoded values, we need to dequantize them to the 0-255 range - // This procedure is outlined in ASTC spec C.2.13 - int outputIndices = 0; - - foreach (ref IntegerEncoded intEncoded in integerEncodedSequence.List) - { - int bitLength = intEncoded.NumberBits; - int bitValue = intEncoded.BitValue; - - Debug.Assert(bitLength >= 1); - - int a = 0, b = 0, c = 0, d = 0; - // A is just the lsb replicated 9 times. - a = Bits.Replicate(bitValue & 1, 1, 9); - - switch (intEncoded.GetEncoding()) - { - case IntegerEncoded.EIntegerEncoding.JustBits: - { - outputValues[outputIndices++] = Bits.Replicate(bitValue, bitLength, 8); - - break; - } - - case IntegerEncoded.EIntegerEncoding.Trit: - { - d = intEncoded.TritValue; - - switch (bitLength) - { - case 1: - { - c = 204; - - break; - } - - case 2: - { - c = 93; - // B = b000b0bb0 - int b2 = (bitValue >> 1) & 1; - b = (b2 << 8) | (b2 << 4) | (b2 << 2) | (b2 << 1); - - break; - } - - case 3: - { - c = 44; - // B = cb000cbcb - int cb = (bitValue >> 1) & 3; - b = (cb << 7) | (cb << 2) | cb; - - break; - } - - - case 4: - { - c = 22; - // B = dcb000dcb - int dcb = (bitValue >> 1) & 7; - b = (dcb << 6) | dcb; - - break; - } - - case 5: - { - c = 11; - // B = edcb000ed - int edcb = (bitValue >> 1) & 0xF; - b = (edcb << 5) | (edcb >> 2); - - break; - } - - case 6: - { - c = 5; - // B = fedcb000f - int fedcb = (bitValue >> 1) & 0x1F; - b = (fedcb << 4) | (fedcb >> 4); - - break; - } - - default: - throw new AstcDecoderException("Unsupported trit encoding for color values."); - } - - break; - } - - case IntegerEncoded.EIntegerEncoding.Quint: - { - d = intEncoded.QuintValue; - - switch (bitLength) - { - case 1: - { - c = 113; - - break; - } - - case 2: - { - c = 54; - // B = b0000bb00 - int b2 = (bitValue >> 1) & 1; - b = (b2 << 8) | (b2 << 3) | (b2 << 2); - - break; - } - - case 3: - { - c = 26; - // B = cb0000cbc - int cb = (bitValue >> 1) & 3; - b = (cb << 7) | (cb << 1) | (cb >> 1); - - break; - } - - case 4: - { - c = 13; - // B = dcb0000dc - int dcb = (bitValue >> 1) & 7; - b = (dcb << 6) | (dcb >> 1); - - break; - } - - case 5: - { - c = 6; - // B = edcb0000e - int edcb = (bitValue >> 1) & 0xF; - b = (edcb << 5) | (edcb >> 3); - - break; - } - - default: - throw new AstcDecoderException("Unsupported quint encoding for color values."); - } - break; - } - } - - if (intEncoded.GetEncoding() != IntegerEncoded.EIntegerEncoding.JustBits) - { - int T = d * c + b; - T ^= a; - T = (a & 0x80) | (T >> 2); - - outputValues[outputIndices++] = T; - } - } - - // Make sure that each of our values is in the proper range... - for (int i = 0; i < numberValues; i++) - { - Debug.Assert(outputValues[i] <= 255); - } - } - - static void FillVoidExtentLdr(ref BitStream128 bitStream, Span<int> outputBuffer, int blockWidth, int blockHeight) - { - // Don't actually care about the void extent, just read the bits... - for (int i = 0; i < 4; ++i) - { - bitStream.ReadBits(13); - } - - // Decode the RGBA components and renormalize them to the range [0, 255] - ushort r = (ushort)bitStream.ReadBits(16); - ushort g = (ushort)bitStream.ReadBits(16); - ushort b = (ushort)bitStream.ReadBits(16); - ushort a = (ushort)bitStream.ReadBits(16); - - int rgba = (r >> 8) | (g & 0xFF00) | ((b) & 0xFF00) << 8 | ((a) & 0xFF00) << 16; - - for (int j = 0; j < blockHeight; j++) - { - for (int i = 0; i < blockWidth; i++) - { - outputBuffer[j * blockWidth + i] = rgba; - } - } - } - - static void DecodeBlockInfo(ref BitStream128 bitStream, out TexelWeightParams texelParams) - { - texelParams = new TexelWeightParams(); - - // Read the entire block mode all at once - ushort modeBits = (ushort)bitStream.ReadBits(11); - - // Does this match the void extent block mode? - if ((modeBits & 0x01FF) == 0x1FC) - { - if ((modeBits & 0x200) != 0) - { - texelParams.VoidExtentHdr = true; - } - else - { - texelParams.VoidExtentLdr = true; - } - - // Next two bits must be one. - if ((modeBits & 0x400) == 0 || bitStream.ReadBits(1) == 0) - { - texelParams.Error = true; - } - - return; - } - - // First check if the last four bits are zero - if ((modeBits & 0xF) == 0) - { - texelParams.Error = true; - - return; - } - - // If the last two bits are zero, then if bits - // [6-8] are all ones, this is also reserved. - if ((modeBits & 0x3) == 0 && (modeBits & 0x1C0) == 0x1C0) - { - texelParams.Error = true; - - return; - } - - // Otherwise, there is no error... Figure out the layout - // of the block mode. Layout is determined by a number - // between 0 and 9 corresponding to table C.2.8 of the - // ASTC spec. - int layout; - - if ((modeBits & 0x1) != 0 || (modeBits & 0x2) != 0) - { - // layout is in [0-4] - if ((modeBits & 0x8) != 0) - { - // layout is in [2-4] - if ((modeBits & 0x4) != 0) - { - // layout is in [3-4] - if ((modeBits & 0x100) != 0) - { - layout = 4; - } - else - { - layout = 3; - } - } - else - { - layout = 2; - } - } - else - { - // layout is in [0-1] - if ((modeBits & 0x4) != 0) - { - layout = 1; - } - else - { - layout = 0; - } - } - } - else - { - // layout is in [5-9] - if ((modeBits & 0x100) != 0) - { - // layout is in [7-9] - if ((modeBits & 0x80) != 0) - { - // layout is in [7-8] - Debug.Assert((modeBits & 0x40) == 0); - - if ((modeBits & 0x20) != 0) - { - layout = 8; - } - else - { - layout = 7; - } - } - else - { - layout = 9; - } - } - else - { - // layout is in [5-6] - if ((modeBits & 0x80) != 0) - { - layout = 6; - } - else - { - layout = 5; - } - } - } - - Debug.Assert(layout < 10); - - // Determine R - int r = (modeBits >> 4) & 1; - if (layout < 5) - { - r |= (modeBits & 0x3) << 1; - } - else - { - r |= (modeBits & 0xC) >> 1; - } - - Debug.Assert(2 <= r && r <= 7); - - // Determine width & height - switch (layout) - { - case 0: - { - int a = (modeBits >> 5) & 0x3; - int b = (modeBits >> 7) & 0x3; - - texelParams.Width = b + 4; - texelParams.Height = a + 2; - - break; - } - - case 1: - { - int a = (modeBits >> 5) & 0x3; - int b = (modeBits >> 7) & 0x3; - - texelParams.Width = b + 8; - texelParams.Height = a + 2; - - break; - } - - case 2: - { - int a = (modeBits >> 5) & 0x3; - int b = (modeBits >> 7) & 0x3; - - texelParams.Width = a + 2; - texelParams.Height = b + 8; - - break; - } - - case 3: - { - int a = (modeBits >> 5) & 0x3; - int b = (modeBits >> 7) & 0x1; - - texelParams.Width = a + 2; - texelParams.Height = b + 6; - - break; - } - - case 4: - { - int a = (modeBits >> 5) & 0x3; - int b = (modeBits >> 7) & 0x1; - - texelParams.Width = b + 2; - texelParams.Height = a + 2; - - break; - } - - case 5: - { - int a = (modeBits >> 5) & 0x3; - - texelParams.Width = 12; - texelParams.Height = a + 2; - - break; - } - - case 6: - { - int a = (modeBits >> 5) & 0x3; - - texelParams.Width = a + 2; - texelParams.Height = 12; - - break; - } - - case 7: - { - texelParams.Width = 6; - texelParams.Height = 10; - - break; - } - - case 8: - { - texelParams.Width = 10; - texelParams.Height = 6; - break; - } - - case 9: - { - int a = (modeBits >> 5) & 0x3; - int b = (modeBits >> 9) & 0x3; - - texelParams.Width = a + 6; - texelParams.Height = b + 6; - - break; - } - - default: - // Don't know this layout... - texelParams.Error = true; - break; - } - - // Determine whether or not we're using dual planes - // and/or high precision layouts. - bool d = ((layout != 9) && ((modeBits & 0x400) != 0)); - bool h = (layout != 9) && ((modeBits & 0x200) != 0); - - if (h) - { - ReadOnlySpan<byte> maxWeights = new byte[] { 9, 11, 15, 19, 23, 31 }; - texelParams.MaxWeight = maxWeights[r - 2]; - } - else - { - ReadOnlySpan<byte> maxWeights = new byte[] { 1, 2, 3, 4, 5, 7 }; - texelParams.MaxWeight = maxWeights[r - 2]; - } - - texelParams.DualPlane = d; - } - } -} diff --git a/Ryujinx.Graphics.Texture/Astc/AstcDecoderException.cs b/Ryujinx.Graphics.Texture/Astc/AstcDecoderException.cs deleted file mode 100644 index fdc48267..00000000 --- a/Ryujinx.Graphics.Texture/Astc/AstcDecoderException.cs +++ /dev/null @@ -1,9 +0,0 @@ -using System; - -namespace Ryujinx.Graphics.Texture.Astc -{ - public class AstcDecoderException : Exception - { - public AstcDecoderException(string exMsg) : base(exMsg) { } - } -}
\ No newline at end of file diff --git a/Ryujinx.Graphics.Texture/Astc/AstcPixel.cs b/Ryujinx.Graphics.Texture/Astc/AstcPixel.cs deleted file mode 100644 index 13197714..00000000 --- a/Ryujinx.Graphics.Texture/Astc/AstcPixel.cs +++ /dev/null @@ -1,68 +0,0 @@ -using System; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; - -namespace Ryujinx.Graphics.Texture.Astc -{ - [StructLayout(LayoutKind.Sequential)] - struct AstcPixel - { - internal const int StructSize = 12; - - public short A; - public short R; - public short G; - public short B; - - private uint _bitDepthInt; - - private Span<byte> BitDepth => MemoryMarshal.CreateSpan(ref Unsafe.As<uint, byte>(ref _bitDepthInt), 4); - private Span<short> Components => MemoryMarshal.CreateSpan(ref A, 4); - - public AstcPixel(short a, short r, short g, short b) - { - A = a; - R = r; - G = g; - B = b; - - _bitDepthInt = 0x08080808; - } - - public void ClampByte() - { - R = Math.Min(Math.Max(R, (short)0), (short)255); - G = Math.Min(Math.Max(G, (short)0), (short)255); - B = Math.Min(Math.Max(B, (short)0), (short)255); - A = Math.Min(Math.Max(A, (short)0), (short)255); - } - - public short GetComponent(int index) - { - return Components[index]; - } - - public void SetComponent(int index, int value) - { - Components[index] = (short)value; - } - - public int Pack() - { - return A << 24 | - B << 16 | - G << 8 | - R << 0; - } - - // Adds more precision to the blue channel as described - // in C.2.14 - public static AstcPixel BlueContract(int a, int r, int g, int b) - { - return new AstcPixel((short)(a), - (short)((r + b) >> 1), - (short)((g + b) >> 1), - (short)(b)); - } - } -} diff --git a/Ryujinx.Graphics.Texture/Astc/BitStream128.cs b/Ryujinx.Graphics.Texture/Astc/BitStream128.cs deleted file mode 100644 index 3bf9769f..00000000 --- a/Ryujinx.Graphics.Texture/Astc/BitStream128.cs +++ /dev/null @@ -1,72 +0,0 @@ -using Ryujinx.Common.Utilities; -using System; -using System.Diagnostics; - -namespace Ryujinx.Graphics.Texture.Astc -{ - public struct BitStream128 - { - private Buffer16 _data; - public int BitsLeft { get; set; } - - public BitStream128(Buffer16 data) - { - _data = data; - BitsLeft = 128; - } - - public int ReadBits(int bitCount) - { - Debug.Assert(bitCount < 32); - - if (bitCount == 0) - { - return 0; - } - - int mask = (1 << bitCount) - 1; - int value = _data.As<int>() & mask; - - Span<ulong> span = _data.AsSpan<ulong>(); - - ulong carry = span[1] << (64 - bitCount); - span[0] = (span[0] >> bitCount) | carry; - span[1] >>= bitCount; - - BitsLeft -= bitCount; - - return value; - } - - public void WriteBits(int value, int bitCount) - { - Debug.Assert(bitCount < 32); - - if (bitCount == 0) return; - - ulong maskedValue = (uint)(value & ((1 << bitCount) - 1)); - - Span<ulong> span = _data.AsSpan<ulong>(); - - if (BitsLeft < 64) - { - ulong lowMask = maskedValue << BitsLeft; - span[0] |= lowMask; - } - - if (BitsLeft + bitCount > 64) - { - if (BitsLeft > 64) - { - span[1] |= maskedValue << (BitsLeft - 64); - } - else - { - span[1] |= maskedValue >> (64 - BitsLeft); - } - } - - BitsLeft += bitCount; - } - } -}
\ No newline at end of file diff --git a/Ryujinx.Graphics.Texture/Astc/Bits.cs b/Ryujinx.Graphics.Texture/Astc/Bits.cs deleted file mode 100644 index b140a20a..00000000 --- a/Ryujinx.Graphics.Texture/Astc/Bits.cs +++ /dev/null @@ -1,66 +0,0 @@ -namespace Ryujinx.Graphics.Texture.Astc -{ - internal static class Bits - { - public static readonly ushort[] Replicate8_16Table; - public static readonly byte[] Replicate1_7Table; - - static Bits() - { - Replicate8_16Table = new ushort[0x200]; - Replicate1_7Table = new byte[0x200]; - - for (int i = 0; i < 0x200; i++) - { - Replicate8_16Table[i] = (ushort)Replicate(i, 8, 16); - Replicate1_7Table[i] = (byte)Replicate(i, 1, 7); - } - } - - public static int Replicate8_16(int value) - { - return Replicate8_16Table[value]; - } - - public static int Replicate1_7(int value) - { - return Replicate1_7Table[value]; - } - - public static int Replicate(int value, int numberBits, int toBit) - { - if (numberBits == 0) return 0; - if (toBit == 0) return 0; - - int tempValue = value & ((1 << numberBits) - 1); - int retValue = tempValue; - int resLength = numberBits; - - while (resLength < toBit) - { - int comp = 0; - if (numberBits > toBit - resLength) - { - int newShift = toBit - resLength; - comp = numberBits - newShift; - numberBits = newShift; - } - retValue <<= numberBits; - retValue |= tempValue >> comp; - resLength += numberBits; - } - - return retValue; - } - - // Transfers a bit as described in C.2.14 - public static void BitTransferSigned(ref int a, ref int b) - { - b >>= 1; - b |= a & 0x80; - a >>= 1; - a &= 0x3F; - if ((a & 0x20) != 0) a -= 0x40; - } - } -} diff --git a/Ryujinx.Graphics.Texture/Astc/EndPointSet.cs b/Ryujinx.Graphics.Texture/Astc/EndPointSet.cs deleted file mode 100644 index 45e61ca2..00000000 --- a/Ryujinx.Graphics.Texture/Astc/EndPointSet.cs +++ /dev/null @@ -1,23 +0,0 @@ -using System; -using System.Diagnostics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; - -namespace Ryujinx.Graphics.Texture.Astc -{ - [StructLayout(LayoutKind.Sequential, Size = AstcPixel.StructSize * 8)] - internal struct EndPointSet - { - private AstcPixel _start; - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public Span<AstcPixel> Get(int index) - { - Debug.Assert(index < 4); - - ref AstcPixel start = ref Unsafe.Add(ref _start, index * 2); - - return MemoryMarshal.CreateSpan(ref start, 2); - } - } -} diff --git a/Ryujinx.Graphics.Texture/Astc/IntegerEncoded.cs b/Ryujinx.Graphics.Texture/Astc/IntegerEncoded.cs deleted file mode 100644 index 065de46b..00000000 --- a/Ryujinx.Graphics.Texture/Astc/IntegerEncoded.cs +++ /dev/null @@ -1,345 +0,0 @@ -using System; -using System.Numerics; - -namespace Ryujinx.Graphics.Texture.Astc -{ - internal struct IntegerEncoded - { - internal const int StructSize = 8; - private static readonly IntegerEncoded[] Encodings; - - public enum EIntegerEncoding : byte - { - JustBits, - Quint, - Trit - } - - EIntegerEncoding _encoding; - public byte NumberBits { get; private set; } - public byte TritValue { get; private set; } - public byte QuintValue { get; private set; } - public int BitValue { get; private set; } - - static IntegerEncoded() - { - Encodings = new IntegerEncoded[0x100]; - - for (int i = 0; i < Encodings.Length; i++) - { - Encodings[i] = CreateEncodingCalc(i); - } - } - - public IntegerEncoded(EIntegerEncoding encoding, int numBits) - { - _encoding = encoding; - NumberBits = (byte)numBits; - BitValue = 0; - TritValue = 0; - QuintValue = 0; - } - - public bool MatchesEncoding(IntegerEncoded other) - { - return _encoding == other._encoding && NumberBits == other.NumberBits; - } - - public EIntegerEncoding GetEncoding() - { - return _encoding; - } - - public int GetBitLength(int numberVals) - { - int totalBits = NumberBits * numberVals; - if (_encoding == EIntegerEncoding.Trit) - { - totalBits += (numberVals * 8 + 4) / 5; - } - else if (_encoding == EIntegerEncoding.Quint) - { - totalBits += (numberVals * 7 + 2) / 3; - } - return totalBits; - } - - public static IntegerEncoded CreateEncoding(int maxVal) - { - return Encodings[maxVal]; - } - - private static IntegerEncoded CreateEncodingCalc(int maxVal) - { - while (maxVal > 0) - { - int check = maxVal + 1; - - // Is maxVal a power of two? - if ((check & (check - 1)) == 0) - { - return new IntegerEncoded(EIntegerEncoding.JustBits, BitOperations.PopCount((uint)maxVal)); - } - - // Is maxVal of the type 3*2^n - 1? - if ((check % 3 == 0) && ((check / 3) & ((check / 3) - 1)) == 0) - { - return new IntegerEncoded(EIntegerEncoding.Trit, BitOperations.PopCount((uint)(check / 3 - 1))); - } - - // Is maxVal of the type 5*2^n - 1? - if ((check % 5 == 0) && ((check / 5) & ((check / 5) - 1)) == 0) - { - return new IntegerEncoded(EIntegerEncoding.Quint, BitOperations.PopCount((uint)(check / 5 - 1))); - } - - // Apparently it can't be represented with a bounded integer sequence... - // just iterate. - maxVal--; - } - - return new IntegerEncoded(EIntegerEncoding.JustBits, 0); - } - - public static void DecodeTritBlock( - ref BitStream128 bitStream, - ref IntegerSequence listIntegerEncoded, - int numberBitsPerValue) - { - // Implement the algorithm in section C.2.12 - Span<int> m = stackalloc int[5]; - - m[0] = bitStream.ReadBits(numberBitsPerValue); - int encoded = bitStream.ReadBits(2); - m[1] = bitStream.ReadBits(numberBitsPerValue); - encoded |= bitStream.ReadBits(2) << 2; - m[2] = bitStream.ReadBits(numberBitsPerValue); - encoded |= bitStream.ReadBits(1) << 4; - m[3] = bitStream.ReadBits(numberBitsPerValue); - encoded |= bitStream.ReadBits(2) << 5; - m[4] = bitStream.ReadBits(numberBitsPerValue); - encoded |= bitStream.ReadBits(1) << 7; - - ReadOnlySpan<byte> encodings = GetTritEncoding(encoded); - - IntegerEncoded intEncoded = new IntegerEncoded(EIntegerEncoding.Trit, numberBitsPerValue); - - for (int i = 0; i < 5; i++) - { - intEncoded.BitValue = m[i]; - intEncoded.TritValue = encodings[i]; - - listIntegerEncoded.Add(ref intEncoded); - } - } - - public static void DecodeQuintBlock( - ref BitStream128 bitStream, - ref IntegerSequence listIntegerEncoded, - int numberBitsPerValue) - { - ReadOnlySpan<byte> interleavedBits = new byte[] { 3, 2, 2 }; - - // Implement the algorithm in section C.2.12 - Span<int> m = stackalloc int[3]; - ulong encoded = 0; - int encodedBitsRead = 0; - - for (int i = 0; i < m.Length; i++) - { - m[i] = bitStream.ReadBits(numberBitsPerValue); - - uint encodedBits = (uint)bitStream.ReadBits(interleavedBits[i]); - - encoded |= encodedBits << encodedBitsRead; - encodedBitsRead += interleavedBits[i]; - } - - ReadOnlySpan<byte> encodings = GetQuintEncoding((int)encoded); - - for (int i = 0; i < 3; i++) - { - IntegerEncoded intEncoded = new IntegerEncoded(EIntegerEncoding.Quint, numberBitsPerValue) - { - BitValue = m[i], - QuintValue = encodings[i] - }; - - listIntegerEncoded.Add(ref intEncoded); - } - } - - public static void DecodeIntegerSequence( - ref IntegerSequence decodeIntegerSequence, - ref BitStream128 bitStream, - int maxRange, - int numberValues) - { - // Determine encoding parameters - IntegerEncoded intEncoded = CreateEncoding(maxRange); - - // Start decoding - int numberValuesDecoded = 0; - while (numberValuesDecoded < numberValues) - { - switch (intEncoded.GetEncoding()) - { - case EIntegerEncoding.Quint: - { - DecodeQuintBlock(ref bitStream, ref decodeIntegerSequence, intEncoded.NumberBits); - numberValuesDecoded += 3; - - break; - } - - case EIntegerEncoding.Trit: - { - DecodeTritBlock(ref bitStream, ref decodeIntegerSequence, intEncoded.NumberBits); - numberValuesDecoded += 5; - - break; - } - - case EIntegerEncoding.JustBits: - { - intEncoded.BitValue = bitStream.ReadBits(intEncoded.NumberBits); - decodeIntegerSequence.Add(ref intEncoded); - numberValuesDecoded++; - - break; - } - } - } - } - - private static ReadOnlySpan<byte> GetTritEncoding(int index) - { - return TritEncodings.Slice(index * 5, 5); - } - - private static ReadOnlySpan<byte> GetQuintEncoding(int index) - { - return QuintEncodings.Slice(index * 3, 3); - } - - private static ReadOnlySpan<byte> TritEncodings => new byte[] - { - 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, 0, 0, 0, - 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, - 2, 1, 0, 0, 0, 1, 0, 2, 0, 0, 0, 2, 0, 0, 0, - 1, 2, 0, 0, 0, 2, 2, 0, 0, 0, 2, 0, 2, 0, 0, - 0, 2, 2, 0, 0, 1, 2, 2, 0, 0, 2, 2, 2, 0, 0, - 2, 0, 2, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, - 2, 0, 1, 0, 0, 0, 1, 2, 0, 0, 0, 1, 1, 0, 0, - 1, 1, 1, 0, 0, 2, 1, 1, 0, 0, 1, 1, 2, 0, 0, - 0, 2, 1, 0, 0, 1, 2, 1, 0, 0, 2, 2, 1, 0, 0, - 2, 1, 2, 0, 0, 0, 0, 0, 2, 2, 1, 0, 0, 2, 2, - 2, 0, 0, 2, 2, 0, 0, 2, 2, 2, 0, 0, 0, 1, 0, - 1, 0, 0, 1, 0, 2, 0, 0, 1, 0, 0, 0, 2, 1, 0, - 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 2, 1, 0, 1, 0, - 1, 0, 2, 1, 0, 0, 2, 0, 1, 0, 1, 2, 0, 1, 0, - 2, 2, 0, 1, 0, 2, 0, 2, 1, 0, 0, 2, 2, 1, 0, - 1, 2, 2, 1, 0, 2, 2, 2, 1, 0, 2, 0, 2, 1, 0, - 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 2, 0, 1, 1, 0, - 0, 1, 2, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, - 2, 1, 1, 1, 0, 1, 1, 2, 1, 0, 0, 2, 1, 1, 0, - 1, 2, 1, 1, 0, 2, 2, 1, 1, 0, 2, 1, 2, 1, 0, - 0, 1, 0, 2, 2, 1, 1, 0, 2, 2, 2, 1, 0, 2, 2, - 1, 0, 2, 2, 2, 0, 0, 0, 2, 0, 1, 0, 0, 2, 0, - 2, 0, 0, 2, 0, 0, 0, 2, 2, 0, 0, 1, 0, 2, 0, - 1, 1, 0, 2, 0, 2, 1, 0, 2, 0, 1, 0, 2, 2, 0, - 0, 2, 0, 2, 0, 1, 2, 0, 2, 0, 2, 2, 0, 2, 0, - 2, 0, 2, 2, 0, 0, 2, 2, 2, 0, 1, 2, 2, 2, 0, - 2, 2, 2, 2, 0, 2, 0, 2, 2, 0, 0, 0, 1, 2, 0, - 1, 0, 1, 2, 0, 2, 0, 1, 2, 0, 0, 1, 2, 2, 0, - 0, 1, 1, 2, 0, 1, 1, 1, 2, 0, 2, 1, 1, 2, 0, - 1, 1, 2, 2, 0, 0, 2, 1, 2, 0, 1, 2, 1, 2, 0, - 2, 2, 1, 2, 0, 2, 1, 2, 2, 0, 0, 2, 0, 2, 2, - 1, 2, 0, 2, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 2, - 0, 0, 0, 0, 2, 1, 0, 0, 0, 2, 2, 0, 0, 0, 2, - 0, 0, 2, 0, 2, 0, 1, 0, 0, 2, 1, 1, 0, 0, 2, - 2, 1, 0, 0, 2, 1, 0, 2, 0, 2, 0, 2, 0, 0, 2, - 1, 2, 0, 0, 2, 2, 2, 0, 0, 2, 2, 0, 2, 0, 2, - 0, 2, 2, 0, 2, 1, 2, 2, 0, 2, 2, 2, 2, 0, 2, - 2, 0, 2, 0, 2, 0, 0, 1, 0, 2, 1, 0, 1, 0, 2, - 2, 0, 1, 0, 2, 0, 1, 2, 0, 2, 0, 1, 1, 0, 2, - 1, 1, 1, 0, 2, 2, 1, 1, 0, 2, 1, 1, 2, 0, 2, - 0, 2, 1, 0, 2, 1, 2, 1, 0, 2, 2, 2, 1, 0, 2, - 2, 1, 2, 0, 2, 0, 2, 2, 2, 2, 1, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 0, 0, 0, 1, - 1, 0, 0, 0, 1, 2, 0, 0, 0, 1, 0, 0, 2, 0, 1, - 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 2, 1, 0, 0, 1, - 1, 0, 2, 0, 1, 0, 2, 0, 0, 1, 1, 2, 0, 0, 1, - 2, 2, 0, 0, 1, 2, 0, 2, 0, 1, 0, 2, 2, 0, 1, - 1, 2, 2, 0, 1, 2, 2, 2, 0, 1, 2, 0, 2, 0, 1, - 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 2, 0, 1, 0, 1, - 0, 1, 2, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, - 2, 1, 1, 0, 1, 1, 1, 2, 0, 1, 0, 2, 1, 0, 1, - 1, 2, 1, 0, 1, 2, 2, 1, 0, 1, 2, 1, 2, 0, 1, - 0, 0, 1, 2, 2, 1, 0, 1, 2, 2, 2, 0, 1, 2, 2, - 0, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, - 2, 0, 0, 1, 1, 0, 0, 2, 1, 1, 0, 1, 0, 1, 1, - 1, 1, 0, 1, 1, 2, 1, 0, 1, 1, 1, 0, 2, 1, 1, - 0, 2, 0, 1, 1, 1, 2, 0, 1, 1, 2, 2, 0, 1, 1, - 2, 0, 2, 1, 1, 0, 2, 2, 1, 1, 1, 2, 2, 1, 1, - 2, 2, 2, 1, 1, 2, 0, 2, 1, 1, 0, 0, 1, 1, 1, - 1, 0, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 2, 1, 1, - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, - 1, 1, 2, 1, 1, 0, 2, 1, 1, 1, 1, 2, 1, 1, 1, - 2, 2, 1, 1, 1, 2, 1, 2, 1, 1, 0, 1, 1, 2, 2, - 1, 1, 1, 2, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 2, - 0, 0, 0, 2, 1, 1, 0, 0, 2, 1, 2, 0, 0, 2, 1, - 0, 0, 2, 2, 1, 0, 1, 0, 2, 1, 1, 1, 0, 2, 1, - 2, 1, 0, 2, 1, 1, 0, 2, 2, 1, 0, 2, 0, 2, 1, - 1, 2, 0, 2, 1, 2, 2, 0, 2, 1, 2, 0, 2, 2, 1, - 0, 2, 2, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 2, 1, - 2, 0, 2, 2, 1, 0, 0, 1, 2, 1, 1, 0, 1, 2, 1, - 2, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 1, 1, 2, 1, - 1, 1, 1, 2, 1, 2, 1, 1, 2, 1, 1, 1, 2, 2, 1, - 0, 2, 1, 2, 1, 1, 2, 1, 2, 1, 2, 2, 1, 2, 1, - 2, 1, 2, 2, 1, 0, 2, 1, 2, 2, 1, 2, 1, 2, 2, - 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 0, 0, 0, 1, 2, - 1, 0, 0, 1, 2, 2, 0, 0, 1, 2, 0, 0, 2, 1, 2, - 0, 1, 0, 1, 2, 1, 1, 0, 1, 2, 2, 1, 0, 1, 2, - 1, 0, 2, 1, 2, 0, 2, 0, 1, 2, 1, 2, 0, 1, 2, - 2, 2, 0, 1, 2, 2, 0, 2, 1, 2, 0, 2, 2, 1, 2, - 1, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 0, 2, 1, 2, - 0, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 0, 1, 1, 2, - 0, 1, 2, 1, 2, 0, 1, 1, 1, 2, 1, 1, 1, 1, 2, - 2, 1, 1, 1, 2, 1, 1, 2, 1, 2, 0, 2, 1, 1, 2, - 1, 2, 1, 1, 2, 2, 2, 1, 1, 2, 2, 1, 2, 1, 2, - 0, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 1, 2, 2, 2 - }; - - private static ReadOnlySpan<byte> QuintEncodings => new byte[] - { - 0, 0, 0, 1, 0, 0, 2, 0, 0, 3, 0, 0, 4, 0, 0, - 0, 4, 0, 4, 4, 0, 4, 4, 4, 0, 1, 0, 1, 1, 0, - 2, 1, 0, 3, 1, 0, 4, 1, 0, 1, 4, 0, 4, 4, 1, - 4, 4, 4, 0, 2, 0, 1, 2, 0, 2, 2, 0, 3, 2, 0, - 4, 2, 0, 2, 4, 0, 4, 4, 2, 4, 4, 4, 0, 3, 0, - 1, 3, 0, 2, 3, 0, 3, 3, 0, 4, 3, 0, 3, 4, 0, - 4, 4, 3, 4, 4, 4, 0, 0, 1, 1, 0, 1, 2, 0, 1, - 3, 0, 1, 4, 0, 1, 0, 4, 1, 4, 0, 4, 0, 4, 4, - 0, 1, 1, 1, 1, 1, 2, 1, 1, 3, 1, 1, 4, 1, 1, - 1, 4, 1, 4, 1, 4, 1, 4, 4, 0, 2, 1, 1, 2, 1, - 2, 2, 1, 3, 2, 1, 4, 2, 1, 2, 4, 1, 4, 2, 4, - 2, 4, 4, 0, 3, 1, 1, 3, 1, 2, 3, 1, 3, 3, 1, - 4, 3, 1, 3, 4, 1, 4, 3, 4, 3, 4, 4, 0, 0, 2, - 1, 0, 2, 2, 0, 2, 3, 0, 2, 4, 0, 2, 0, 4, 2, - 2, 0, 4, 3, 0, 4, 0, 1, 2, 1, 1, 2, 2, 1, 2, - 3, 1, 2, 4, 1, 2, 1, 4, 2, 2, 1, 4, 3, 1, 4, - 0, 2, 2, 1, 2, 2, 2, 2, 2, 3, 2, 2, 4, 2, 2, - 2, 4, 2, 2, 2, 4, 3, 2, 4, 0, 3, 2, 1, 3, 2, - 2, 3, 2, 3, 3, 2, 4, 3, 2, 3, 4, 2, 2, 3, 4, - 3, 3, 4, 0, 0, 3, 1, 0, 3, 2, 0, 3, 3, 0, 3, - 4, 0, 3, 0, 4, 3, 0, 0, 4, 1, 0, 4, 0, 1, 3, - 1, 1, 3, 2, 1, 3, 3, 1, 3, 4, 1, 3, 1, 4, 3, - 0, 1, 4, 1, 1, 4, 0, 2, 3, 1, 2, 3, 2, 2, 3, - 3, 2, 3, 4, 2, 3, 2, 4, 3, 0, 2, 4, 1, 2, 4, - 0, 3, 3, 1, 3, 3, 2, 3, 3, 3, 3, 3, 4, 3, 3, - 3, 4, 3, 0, 3, 4, 1, 3, 4 - }; - } -} diff --git a/Ryujinx.Graphics.Texture/Astc/IntegerSequence.cs b/Ryujinx.Graphics.Texture/Astc/IntegerSequence.cs deleted file mode 100644 index 367b6809..00000000 --- a/Ryujinx.Graphics.Texture/Astc/IntegerSequence.cs +++ /dev/null @@ -1,31 +0,0 @@ -using System; -using System.Diagnostics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; - -namespace Ryujinx.Graphics.Texture.Astc -{ - [StructLayout(LayoutKind.Sequential, Size = IntegerEncoded.StructSize * Capacity + sizeof(int))] - internal struct IntegerSequence - { - private const int Capacity = 100; - - private int _length; - private IntegerEncoded _start; - - public Span<IntegerEncoded> List => MemoryMarshal.CreateSpan(ref _start, _length); - - public void Reset() => _length = 0; - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void Add(ref IntegerEncoded item) - { - Debug.Assert(_length < Capacity); - - int oldLength = _length; - _length++; - - List[oldLength] = item; - } - } -} diff --git a/Ryujinx.Graphics.Texture/BC6Decoder.cs b/Ryujinx.Graphics.Texture/BC6Decoder.cs deleted file mode 100644 index 819bf022..00000000 --- a/Ryujinx.Graphics.Texture/BC6Decoder.cs +++ /dev/null @@ -1,819 +0,0 @@ -using Ryujinx.Graphics.Texture.Utils; -using System; -using System.Runtime.InteropServices; - -namespace Ryujinx.Graphics.Texture -{ - static class BC6Decoder - { - private const int HalfOne = 0x3C00; - - public static void Decode(Span<byte> output, ReadOnlySpan<byte> data, int width, int height, bool signed) - { - ReadOnlySpan<Block> blocks = MemoryMarshal.Cast<byte, Block>(data); - - Span<ulong> output64 = MemoryMarshal.Cast<byte, ulong>(output); - - int wInBlocks = (width + 3) / 4; - int hInBlocks = (height + 3) / 4; - - for (int y = 0; y < hInBlocks; y++) - { - int y2 = y * 4; - int bh = Math.Min(4, height - y2); - - for (int x = 0; x < wInBlocks; x++) - { - int x2 = x * 4; - int bw = Math.Min(4, width - x2); - - DecodeBlock(blocks[y * wInBlocks + x], output64.Slice(y2 * width + x2), bw, bh, width, signed); - } - } - } - - private static void DecodeBlock(Block block, Span<ulong> output, int w, int h, int width, bool signed) - { - int mode = (int)(block.Low & 3); - if ((mode & 2) != 0) - { - mode = (int)(block.Low & 0x1f); - } - - Span<RgbaColor32> endPoints = stackalloc RgbaColor32[4]; - int subsetCount = DecodeEndPoints(ref block, endPoints, mode, signed); - if (subsetCount == 0) - { - // Mode is invalid, the spec mandates that hardware fills the block with - // a opaque black color. - for (int ty = 0; ty < h; ty++) - { - int baseOffs = ty * width; - - for (int tx = 0; tx < w; tx++) - { - output[baseOffs + tx] = (ulong)HalfOne << 48; - } - } - - return; - } - - int partition; - int indexBitCount; - ulong indices; - - if (subsetCount > 1) - { - partition = (int)((block.High >> 13) & 0x1F); - indexBitCount = 3; - - int fixUpIndex = BC67Tables.FixUpIndices[subsetCount - 1][partition][1] * 3; - ulong lowMask = (ulong.MaxValue >> (65 - fixUpIndex)) << 3; - ulong highMask = ulong.MaxValue << (fixUpIndex + 3); - - indices = ((block.High >> 16) & highMask) | ((block.High >> 17) & lowMask) | ((block.High >> 18) & 3); - } - else - { - partition = 0; - indexBitCount = 4; - indices = (block.High & ~0xFUL) | ((block.High >> 1) & 7); - } - - ulong indexMask = (1UL << indexBitCount) - 1; - - for (int ty = 0; ty < h; ty++) - { - int baseOffs = ty * width; - - for (int tx = 0; tx < w; tx++) - { - int offs = baseOffs + tx; - int index = (int)(indices & indexMask); - int endPointBase = BC67Tables.PartitionTable[subsetCount - 1][partition][ty * 4 + tx] << 1; - - RgbaColor32 color1 = endPoints[endPointBase]; - RgbaColor32 color2 = endPoints[endPointBase + 1]; - - RgbaColor32 color = BC67Utils.Interpolate(color1, color2, index, indexBitCount); - - output[offs] = - (ulong)FinishUnquantize(color.R, signed) | - ((ulong)FinishUnquantize(color.G, signed) << 16) | - ((ulong)FinishUnquantize(color.B, signed) << 32) | - ((ulong)HalfOne << 48); - - indices >>= indexBitCount; - } - } - } - - private static int DecodeEndPoints(ref Block block, Span<RgbaColor32> endPoints, int mode, bool signed) - { - ulong low = block.Low; - ulong high = block.High; - - int r0 = 0, g0 = 0, b0 = 0, r1 = 0, g1 = 0, b1 = 0, r2 = 0, g2 = 0, b2 = 0, r3 = 0, g3 = 0, b3 = 0; - int subsetCount; - - switch (mode) - { - case 0: - r0 = (int)(low >> 5) & 0x3FF; - g0 = (int)(low >> 15) & 0x3FF; - b0 = (int)(low >> 25) & 0x3FF; - - if (signed) - { - r0 = SignExtend(r0, 10); - g0 = SignExtend(g0, 10); - b0 = SignExtend(b0, 10); - } - - r1 = r0 + SignExtend((int)(low >> 35), 5); - g1 = g0 + SignExtend((int)(low >> 45), 5); - b1 = b0 + SignExtend((int)(low >> 55), 5); - - r2 = r0 + SignExtend((int)(high >> 1), 5); - g2 = g0 + SignExtend((int)(((low << 2) & 0x10) | ((low >> 41) & 0xF)), 5); - b2 = b0 + SignExtend((int)(((low << 1) & 0x10) | ((high << 3) & 0x08) | (low >> 61)), 5); - - r3 = r0 + SignExtend((int)(high >> 7), 5); - g3 = g0 + SignExtend((int)(((low >> 36) & 0x10) | ((low >> 51) & 0xF)), 5); - b3 = b0 + SignExtend((int)( - ((low) & 0x10) | - ((high >> 9) & 0x08) | - ((high >> 4) & 0x04) | - ((low >> 59) & 0x02) | - ((low >> 50) & 0x01)), 5); - - r0 = Unquantize(r0, 10, signed); - g0 = Unquantize(g0, 10, signed); - b0 = Unquantize(b0, 10, signed); - - r1 = Unquantize(r1 & 0x3FF, 10, signed); - g1 = Unquantize(g1 & 0x3FF, 10, signed); - b1 = Unquantize(b1 & 0x3FF, 10, signed); - - r2 = Unquantize(r2 & 0x3FF, 10, signed); - g2 = Unquantize(g2 & 0x3FF, 10, signed); - b2 = Unquantize(b2 & 0x3FF, 10, signed); - - r3 = Unquantize(r3 & 0x3FF, 10, signed); - g3 = Unquantize(g3 & 0x3FF, 10, signed); - b3 = Unquantize(b3 & 0x3FF, 10, signed); - - subsetCount = 2; - break; - case 1: - r0 = (int)(low >> 5) & 0x7F; - g0 = (int)(low >> 15) & 0x7F; - b0 = (int)(low >> 25) & 0x7F; - - if (signed) - { - r0 = SignExtend(r0, 7); - g0 = SignExtend(g0, 7); - b0 = SignExtend(b0, 7); - } - - r1 = r0 + SignExtend((int)(low >> 35), 6); - g1 = g0 + SignExtend((int)(low >> 45), 6); - b1 = b0 + SignExtend((int)(low >> 55), 6); - - r2 = r0 + SignExtend((int)(high >> 1), 6); - g2 = g0 + SignExtend((int)(((low << 3) & 0x20) | ((low >> 20) & 0x10) | ((low >> 41) & 0x0F)), 6); - b2 = b0 + SignExtend((int)( - ((low >> 17) & 0x20) | - ((low >> 10) & 0x10) | - ((high << 3) & 0x08) | - (low >> 61)), 6); - - r3 = r0 + SignExtend((int)(high >> 7), 6); - g3 = g0 + SignExtend((int)(((low << 1) & 0x30) | ((low >> 51) & 0xF)), 6); - b3 = b0 + SignExtend((int)( - ((low >> 28) & 0x20) | - ((low >> 30) & 0x10) | - ((low >> 29) & 0x08) | - ((low >> 21) & 0x04) | - ((low >> 12) & 0x03)), 6); - - r0 = Unquantize(r0, 7, signed); - g0 = Unquantize(g0, 7, signed); - b0 = Unquantize(b0, 7, signed); - - r1 = Unquantize(r1 & 0x7F, 7, signed); - g1 = Unquantize(g1 & 0x7F, 7, signed); - b1 = Unquantize(b1 & 0x7F, 7, signed); - - r2 = Unquantize(r2 & 0x7F, 7, signed); - g2 = Unquantize(g2 & 0x7F, 7, signed); - b2 = Unquantize(b2 & 0x7F, 7, signed); - - r3 = Unquantize(r3 & 0x7F, 7, signed); - g3 = Unquantize(g3 & 0x7F, 7, signed); - b3 = Unquantize(b3 & 0x7F, 7, signed); - - subsetCount = 2; - break; - case 2: - r0 = (int)(((low >> 30) & 0x400) | ((low >> 5) & 0x3FF)); - g0 = (int)(((low >> 39) & 0x400) | ((low >> 15) & 0x3FF)); - b0 = (int)(((low >> 49) & 0x400) | ((low >> 25) & 0x3FF)); - - if (signed) - { - r0 = SignExtend(r0, 11); - g0 = SignExtend(g0, 11); - b0 = SignExtend(b0, 11); - } - - r1 = r0 + SignExtend((int)(low >> 35), 5); - g1 = g0 + SignExtend((int)(low >> 45), 4); - b1 = b0 + SignExtend((int)(low >> 55), 4); - - r2 = r0 + SignExtend((int)(high >> 1), 5); - g2 = g0 + SignExtend((int)(low >> 41), 4); - b2 = b0 + SignExtend((int)(((high << 3) & 8) | (low >> 61)), 4); - - r3 = r0 + SignExtend((int)(high >> 7), 5); - g3 = g0 + SignExtend((int)(low >> 51), 4); - b3 = b0 + SignExtend((int)( - ((high >> 9) & 8) | - ((high >> 4) & 4) | - ((low >> 59) & 2) | - ((low >> 50) & 1)), 4); - - r0 = Unquantize(r0, 11, signed); - g0 = Unquantize(g0, 11, signed); - b0 = Unquantize(b0, 11, signed); - - r1 = Unquantize(r1 & 0x7FF, 11, signed); - g1 = Unquantize(g1 & 0x7FF, 11, signed); - b1 = Unquantize(b1 & 0x7FF, 11, signed); - - r2 = Unquantize(r2 & 0x7FF, 11, signed); - g2 = Unquantize(g2 & 0x7FF, 11, signed); - b2 = Unquantize(b2 & 0x7FF, 11, signed); - - r3 = Unquantize(r3 & 0x7FF, 11, signed); - g3 = Unquantize(g3 & 0x7FF, 11, signed); - b3 = Unquantize(b3 & 0x7FF, 11, signed); - - subsetCount = 2; - break; - case 3: - r0 = (int)(low >> 5) & 0x3FF; - g0 = (int)(low >> 15) & 0x3FF; - b0 = (int)(low >> 25) & 0x3FF; - - r1 = (int)(low >> 35) & 0x3FF; - g1 = (int)(low >> 45) & 0x3FF; - b1 = (int)(((high << 9) & 0x200) | (low >> 55)); - - if (signed) - { - r0 = SignExtend(r0, 10); - g0 = SignExtend(g0, 10); - b0 = SignExtend(b0, 10); - - r1 = SignExtend(r1, 10); - g1 = SignExtend(g1, 10); - b1 = SignExtend(b1, 10); - } - - r0 = Unquantize(r0, 10, signed); - g0 = Unquantize(g0, 10, signed); - b0 = Unquantize(b0, 10, signed); - - r1 = Unquantize(r1, 10, signed); - g1 = Unquantize(g1, 10, signed); - b1 = Unquantize(b1, 10, signed); - - subsetCount = 1; - break; - case 6: - r0 = (int)(((low >> 29) & 0x400) | ((low >> 5) & 0x3FF)); - g0 = (int)(((low >> 40) & 0x400) | ((low >> 15) & 0x3FF)); - b0 = (int)(((low >> 49) & 0x400) | ((low >> 25) & 0x3FF)); - - if (signed) - { - r0 = SignExtend(r0, 11); - g0 = SignExtend(g0, 11); - b0 = SignExtend(b0, 11); - } - - r1 = r0 + SignExtend((int)(low >> 35), 4); - g1 = g0 + SignExtend((int)(low >> 45), 5); - b1 = b0 + SignExtend((int)(low >> 55), 4); - - r2 = r0 + SignExtend((int)(high >> 1), 4); - g2 = g0 + SignExtend((int)(((high >> 7) & 0x10) | ((low >> 41) & 0x0F)), 5); - b2 = b0 + SignExtend((int)(((high << 3) & 0x08) | ((low >> 61))), 4); - - r3 = r0 + SignExtend((int)(high >> 7), 4); - g3 = g0 + SignExtend((int)(((low >> 36) & 0x10) | ((low >> 51) & 0x0F)), 5); - b3 = b0 + SignExtend((int)( - ((high >> 9) & 8) | - ((high >> 4) & 4) | - ((low >> 59) & 2) | - ((high >> 5) & 1)), 4); - - r0 = Unquantize(r0, 11, signed); - g0 = Unquantize(g0, 11, signed); - b0 = Unquantize(b0, 11, signed); - - r1 = Unquantize(r1 & 0x7FF, 11, signed); - g1 = Unquantize(g1 & 0x7FF, 11, signed); - b1 = Unquantize(b1 & 0x7FF, 11, signed); - - r2 = Unquantize(r2 & 0x7FF, 11, signed); - g2 = Unquantize(g2 & 0x7FF, 11, signed); - b2 = Unquantize(b2 & 0x7FF, 11, signed); - - r3 = Unquantize(r3 & 0x7FF, 11, signed); - g3 = Unquantize(g3 & 0x7FF, 11, signed); - b3 = Unquantize(b3 & 0x7FF, 11, signed); - - subsetCount = 2; - break; - case 7: - r0 = (int)(((low >> 34) & 0x400) | ((low >> 5) & 0x3FF)); - g0 = (int)(((low >> 44) & 0x400) | ((low >> 15) & 0x3FF)); - b0 = (int)(((high << 10) & 0x400) | ((low >> 25) & 0x3FF)); - - if (signed) - { - r0 = SignExtend(r0, 11); - g0 = SignExtend(g0, 11); - b0 = SignExtend(b0, 11); - } - - r1 = (r0 + SignExtend((int)(low >> 35), 9)) & 0x7FF; - g1 = (g0 + SignExtend((int)(low >> 45), 9)) & 0x7FF; - b1 = (b0 + SignExtend((int)(low >> 55), 9)) & 0x7FF; - - r0 = Unquantize(r0, 11, signed); - g0 = Unquantize(g0, 11, signed); - b0 = Unquantize(b0, 11, signed); - - r1 = Unquantize(r1, 11, signed); - g1 = Unquantize(g1, 11, signed); - b1 = Unquantize(b1, 11, signed); - - subsetCount = 1; - break; - case 10: - r0 = (int)(((low >> 29) & 0x400) | ((low >> 5) & 0x3FF)); - g0 = (int)(((low >> 39) & 0x400) | ((low >> 15) & 0x3FF)); - b0 = (int)(((low >> 50) & 0x400) | ((low >> 25) & 0x3FF)); - - if (signed) - { - r0 = SignExtend(r0, 11); - g0 = SignExtend(g0, 11); - b0 = SignExtend(b0, 11); - } - - r1 = r0 + SignExtend((int)(low >> 35), 4); - g1 = g0 + SignExtend((int)(low >> 45), 4); - b1 = b0 + SignExtend((int)(low >> 55), 5); - - r2 = r0 + SignExtend((int)(high >> 1), 4); - g2 = g0 + SignExtend((int)(low >> 41), 4); - b2 = b0 + SignExtend((int)(((low >> 36) & 0x10) | ((high << 3) & 8) | (low >> 61)), 5); - - r3 = r0 + SignExtend((int)(high >> 7), 4); - g3 = g0 + SignExtend((int)(low >> 51), 4); - b3 = b0 + SignExtend((int)( - ((high >> 7) & 0x10) | - ((high >> 9) & 0x08) | - ((high >> 4) & 0x06) | - ((low >> 50) & 0x01)), 5); - - r0 = Unquantize(r0, 11, signed); - g0 = Unquantize(g0, 11, signed); - b0 = Unquantize(b0, 11, signed); - - r1 = Unquantize(r1 & 0x7FF, 11, signed); - g1 = Unquantize(g1 & 0x7FF, 11, signed); - b1 = Unquantize(b1 & 0x7FF, 11, signed); - - r2 = Unquantize(r2 & 0x7FF, 11, signed); - g2 = Unquantize(g2 & 0x7FF, 11, signed); - b2 = Unquantize(b2 & 0x7FF, 11, signed); - - r3 = Unquantize(r3 & 0x7FF, 11, signed); - g3 = Unquantize(g3 & 0x7FF, 11, signed); - b3 = Unquantize(b3 & 0x7FF, 11, signed); - - subsetCount = 2; - break; - case 11: - r0 = (int)(((low >> 32) & 0x800) | ((low >> 34) & 0x400) | ((low >> 5) & 0x3FF)); - g0 = (int)(((low >> 42) & 0x800) | ((low >> 44) & 0x400) | ((low >> 15) & 0x3FF)); - b0 = (int)(((low >> 52) & 0x800) | ((high << 10) & 0x400) | ((low >> 25) & 0x3FF)); - - if (signed) - { - r0 = SignExtend(r0, 12); - g0 = SignExtend(g0, 12); - b0 = SignExtend(b0, 12); - } - - r1 = (r0 + SignExtend((int)(low >> 35), 8)) & 0xFFF; - g1 = (g0 + SignExtend((int)(low >> 45), 8)) & 0xFFF; - b1 = (b0 + SignExtend((int)(low >> 55), 8)) & 0xFFF; - - r0 = Unquantize(r0, 12, signed); - g0 = Unquantize(g0, 12, signed); - b0 = Unquantize(b0, 12, signed); - - r1 = Unquantize(r1, 12, signed); - g1 = Unquantize(g1, 12, signed); - b1 = Unquantize(b1, 12, signed); - - subsetCount = 1; - break; - case 14: - r0 = (int)(low >> 5) & 0x1FF; - g0 = (int)(low >> 15) & 0x1FF; - b0 = (int)(low >> 25) & 0x1FF; - - if (signed) - { - r0 = SignExtend(r0, 9); - g0 = SignExtend(g0, 9); - b0 = SignExtend(b0, 9); - } - - r1 = r0 + SignExtend((int)(low >> 35), 5); - g1 = g0 + SignExtend((int)(low >> 45), 5); - b1 = b0 + SignExtend((int)(low >> 55), 5); - - r2 = r0 + SignExtend((int)(high >> 1), 5); - g2 = g0 + SignExtend((int)(((low >> 20) & 0x10) | ((low >> 41) & 0xF)), 5); - b2 = b0 + SignExtend((int)(((low >> 10) & 0x10) | ((high << 3) & 8) | (low >> 61)), 5); - - r3 = r0 + SignExtend((int)(high >> 7), 5); - g3 = g0 + SignExtend((int)(((low >> 36) & 0x10) | ((low >> 51) & 0xF)), 5); - b3 = b0 + SignExtend((int)( - ((low >> 30) & 0x10) | - ((high >> 9) & 0x08) | - ((high >> 4) & 0x04) | - ((low >> 59) & 0x02) | - ((low >> 50) & 0x01)), 5); - - r0 = Unquantize(r0, 9, signed); - g0 = Unquantize(g0, 9, signed); - b0 = Unquantize(b0, 9, signed); - - r1 = Unquantize(r1 & 0x1FF, 9, signed); - g1 = Unquantize(g1 & 0x1FF, 9, signed); - b1 = Unquantize(b1 & 0x1FF, 9, signed); - - r2 = Unquantize(r2 & 0x1FF, 9, signed); - g2 = Unquantize(g2 & 0x1FF, 9, signed); - b2 = Unquantize(b2 & 0x1FF, 9, signed); - - r3 = Unquantize(r3 & 0x1FF, 9, signed); - g3 = Unquantize(g3 & 0x1FF, 9, signed); - b3 = Unquantize(b3 & 0x1FF, 9, signed); - - subsetCount = 2; - break; - case 15: - r0 = (BitReverse6((int)(low >> 39) & 0x3F) << 10) | ((int)(low >> 5) & 0x3FF); - g0 = (BitReverse6((int)(low >> 49) & 0x3F) << 10) | ((int)(low >> 15) & 0x3FF); - b0 = ((BitReverse6((int)(low >> 59)) | (int)(high & 1)) << 10) | ((int)(low >> 25) & 0x3FF); - - if (signed) - { - r0 = SignExtend(r0, 16); - g0 = SignExtend(g0, 16); - b0 = SignExtend(b0, 16); - } - - r1 = (r0 + SignExtend((int)(low >> 35), 4)) & 0xFFFF; - g1 = (g0 + SignExtend((int)(low >> 45), 4)) & 0xFFFF; - b1 = (b0 + SignExtend((int)(low >> 55), 4)) & 0xFFFF; - - subsetCount = 1; - break; - case 18: - r0 = (int)(low >> 5) & 0xFF; - g0 = (int)(low >> 15) & 0xFF; - b0 = (int)(low >> 25) & 0xFF; - - if (signed) - { - r0 = SignExtend(r0, 8); - g0 = SignExtend(g0, 8); - b0 = SignExtend(b0, 8); - } - - r1 = r0 + SignExtend((int)(low >> 35), 6); - g1 = g0 + SignExtend((int)(low >> 45), 5); - b1 = b0 + SignExtend((int)(low >> 55), 5); - - r2 = r0 + SignExtend((int)(high >> 1), 6); - g2 = g0 + SignExtend((int)(((low >> 20) & 0x10) | ((low >> 41) & 0xF)), 5); - b2 = b0 + SignExtend((int)(((low >> 10) & 0x10) | ((high << 3) & 8) | (low >> 61)), 5); - - r3 = r0 + SignExtend((int)(high >> 7), 6); - g3 = g0 + SignExtend((int)(((low >> 9) & 0x10) | ((low >> 51) & 0xF)), 5); - b3 = b0 + SignExtend((int)( - ((low >> 30) & 0x18) | - ((low >> 21) & 0x04) | - ((low >> 59) & 0x02) | - ((low >> 50) & 0x01)), 5); - - r0 = Unquantize(r0, 8, signed); - g0 = Unquantize(g0, 8, signed); - b0 = Unquantize(b0, 8, signed); - - r1 = Unquantize(r1 & 0xFF, 8, signed); - g1 = Unquantize(g1 & 0xFF, 8, signed); - b1 = Unquantize(b1 & 0xFF, 8, signed); - - r2 = Unquantize(r2 & 0xFF, 8, signed); - g2 = Unquantize(g2 & 0xFF, 8, signed); - b2 = Unquantize(b2 & 0xFF, 8, signed); - - r3 = Unquantize(r3 & 0xFF, 8, signed); - g3 = Unquantize(g3 & 0xFF, 8, signed); - b3 = Unquantize(b3 & 0xFF, 8, signed); - - subsetCount = 2; - break; - case 22: - r0 = (int)(low >> 5) & 0xFF; - g0 = (int)(low >> 15) & 0xFF; - b0 = (int)(low >> 25) & 0xFF; - - if (signed) - { - r0 = SignExtend(r0, 8); - g0 = SignExtend(g0, 8); - b0 = SignExtend(b0, 8); - } - - r1 = r0 + SignExtend((int)(low >> 35), 5); - g1 = g0 + SignExtend((int)(low >> 45), 6); - b1 = b0 + SignExtend((int)(low >> 55), 5); - - r2 = r0 + SignExtend((int)(high >> 1), 5); - g2 = g0 + SignExtend((int)(((low >> 18) & 0x20) | ((low >> 20) & 0x10) | ((low >> 41) & 0xF)), 6); - b2 = b0 + SignExtend((int)(((low >> 10) & 0x10) | ((high << 3) & 0x08) | (low >> 61)), 5); - - r3 = r0 + SignExtend((int)(high >> 7), 5); - g3 = g0 + SignExtend((int)(((low >> 28) & 0x20) | ((low >> 36) & 0x10) | ((low >> 51) & 0x0F)), 6); - b3 = b0 + SignExtend((int)( - ((low >> 30) & 0x10) | - ((high >> 9) & 0x08) | - ((high >> 4) & 0x04) | - ((low >> 59) & 0x02) | - ((low >> 13) & 0x01)), 5); - - r0 = Unquantize(r0, 8, signed); - g0 = Unquantize(g0, 8, signed); - b0 = Unquantize(b0, 8, signed); - - r1 = Unquantize(r1 & 0xFF, 8, signed); - g1 = Unquantize(g1 & 0xFF, 8, signed); - b1 = Unquantize(b1 & 0xFF, 8, signed); - - r2 = Unquantize(r2 & 0xFF, 8, signed); - g2 = Unquantize(g2 & 0xFF, 8, signed); - b2 = Unquantize(b2 & 0xFF, 8, signed); - - r3 = Unquantize(r3 & 0xFF, 8, signed); - g3 = Unquantize(g3 & 0xFF, 8, signed); - b3 = Unquantize(b3 & 0xFF, 8, signed); - - subsetCount = 2; - break; - case 26: - r0 = (int)(low >> 5) & 0xFF; - g0 = (int)(low >> 15) & 0xFF; - b0 = (int)(low >> 25) & 0xFF; - - if (signed) - { - r0 = SignExtend(r0, 8); - g0 = SignExtend(g0, 8); - b0 = SignExtend(b0, 8); - } - - r1 = r0 + SignExtend((int)(low >> 35), 5); - g1 = g0 + SignExtend((int)(low >> 45), 5); - b1 = b0 + SignExtend((int)(low >> 55), 6); - - r2 = r0 + SignExtend((int)(high >> 1), 5); - g2 = g0 + SignExtend((int)(((low >> 20) & 0x10) | ((low >> 41) & 0xF)), 5); - b2 = b0 + SignExtend((int)( - ((low >> 18) & 0x20) | - ((low >> 10) & 0x10) | - ((high << 3) & 0x08) | - (low >> 61)), 6); - - r3 = r0 + SignExtend((int)(high >> 7), 5); - g3 = g0 + SignExtend((int)(((low >> 36) & 0x10) | ((low >> 51) & 0xF)), 5); - b3 = b0 + SignExtend((int)( - ((low >> 28) & 0x20) | - ((low >> 30) & 0x10) | - ((high >> 9) & 0x08) | - ((high >> 4) & 0x04) | - ((low >> 12) & 0x02) | - ((low >> 50) & 0x01)), 6); - - r0 = Unquantize(r0, 8, signed); - g0 = Unquantize(g0, 8, signed); - b0 = Unquantize(b0, 8, signed); - - r1 = Unquantize(r1 & 0xFF, 8, signed); - g1 = Unquantize(g1 & 0xFF, 8, signed); - b1 = Unquantize(b1 & 0xFF, 8, signed); - - r2 = Unquantize(r2 & 0xFF, 8, signed); - g2 = Unquantize(g2 & 0xFF, 8, signed); - b2 = Unquantize(b2 & 0xFF, 8, signed); - - r3 = Unquantize(r3 & 0xFF, 8, signed); - g3 = Unquantize(g3 & 0xFF, 8, signed); - b3 = Unquantize(b3 & 0xFF, 8, signed); - - subsetCount = 2; - break; - case 30: - r0 = (int)(low >> 5) & 0x3F; - g0 = (int)(low >> 15) & 0x3F; - b0 = (int)(low >> 25) & 0x3F; - - r1 = (int)(low >> 35) & 0x3F; - g1 = (int)(low >> 45) & 0x3F; - b1 = (int)(low >> 55) & 0x3F; - - r2 = (int)(high >> 1) & 0x3F; - g2 = (int)(((low >> 16) & 0x20) | ((low >> 20) & 0x10) | ((low >> 41) & 0xF)); - b2 = (int)(((low >> 17) & 0x20) | ((low >> 10) & 0x10) | ((high << 3) & 0x08) | (low >> 61)); - - r3 = (int)(high >> 7) & 0x3F; - g3 = (int)(((low >> 26) & 0x20) | ((low >> 7) & 0x10) | ((low >> 51) & 0xF)); - b3 = (int)( - ((low >> 28) & 0x20) | - ((low >> 30) & 0x10) | - ((low >> 29) & 0x08) | - ((low >> 21) & 0x04) | - ((low >> 12) & 0x03)); - - if (signed) - { - r0 = SignExtend(r0, 6); - g0 = SignExtend(g0, 6); - b0 = SignExtend(b0, 6); - - r1 = SignExtend(r1, 6); - g1 = SignExtend(g1, 6); - b1 = SignExtend(b1, 6); - - r2 = SignExtend(r2, 6); - g2 = SignExtend(g2, 6); - b2 = SignExtend(b2, 6); - - r3 = SignExtend(r3, 6); - g3 = SignExtend(g3, 6); - b3 = SignExtend(b3, 6); - } - - r0 = Unquantize(r0, 6, signed); - g0 = Unquantize(g0, 6, signed); - b0 = Unquantize(b0, 6, signed); - - r1 = Unquantize(r1, 6, signed); - g1 = Unquantize(g1, 6, signed); - b1 = Unquantize(b1, 6, signed); - - r2 = Unquantize(r2, 6, signed); - g2 = Unquantize(g2, 6, signed); - b2 = Unquantize(b2, 6, signed); - - r3 = Unquantize(r3, 6, signed); - g3 = Unquantize(g3, 6, signed); - b3 = Unquantize(b3, 6, signed); - - subsetCount = 2; - break; - default: - subsetCount = 0; - break; - } - - if (subsetCount > 0) - { - endPoints[0] = new RgbaColor32(r0, g0, b0, HalfOne); - endPoints[1] = new RgbaColor32(r1, g1, b1, HalfOne); - - if (subsetCount > 1) - { - endPoints[2] = new RgbaColor32(r2, g2, b2, HalfOne); - endPoints[3] = new RgbaColor32(r3, g3, b3, HalfOne); - } - } - - return subsetCount; - } - - private static int SignExtend(int value, int bits) - { - int shift = 32 - bits; - return (value << shift) >> shift; - } - - private static int Unquantize(int value, int bits, bool signed) - { - if (signed) - { - if (bits >= 16) - { - return value; - } - else - { - bool sign = value < 0; - - if (sign) - { - value = -value; - } - - if (value == 0) - { - return value; - } - else if (value >= ((1 << (bits - 1)) - 1)) - { - value = 0x7FFF; - } - else - { - value = ((value << 15) + 0x4000) >> (bits - 1); - } - - if (sign) - { - value = -value; - } - } - } - else - { - if (bits >= 15 || value == 0) - { - return value; - } - else if (value == ((1 << bits) - 1)) - { - return 0xFFFF; - } - else - { - return ((value << 16) + 0x8000) >> bits; - } - } - - return value; - } - - private static ushort FinishUnquantize(int value, bool signed) - { - if (signed) - { - value = value < 0 ? -((-value * 31) >> 5) : (value * 31) >> 5; - - int sign = 0; - if (value < 0) - { - sign = 0x8000; - value = -value; - } - - return (ushort)(sign | value); - } - else - { - return (ushort)((value * 31) >> 6); - } - } - - private static int BitReverse6(int value) - { - value = ((value >> 1) & 0x55) | ((value << 1) & 0xaa); - value = ((value >> 2) & 0x33) | ((value << 2) & 0xcc); - value = ((value >> 4) & 0x0f) | ((value << 4) & 0xf0); - return value >> 2; - } - } -} diff --git a/Ryujinx.Graphics.Texture/BC7Decoder.cs b/Ryujinx.Graphics.Texture/BC7Decoder.cs deleted file mode 100644 index b865a559..00000000 --- a/Ryujinx.Graphics.Texture/BC7Decoder.cs +++ /dev/null @@ -1,220 +0,0 @@ -using Ryujinx.Graphics.Texture.Utils; -using System; -using System.Diagnostics; -using System.Numerics; -using System.Runtime.InteropServices; - -namespace Ryujinx.Graphics.Texture -{ - static class BC7Decoder - { - public static void Decode(Span<byte> output, ReadOnlySpan<byte> data, int width, int height) - { - ReadOnlySpan<Block> blocks = MemoryMarshal.Cast<byte, Block>(data); - - Span<uint> output32 = MemoryMarshal.Cast<byte, uint>(output); - - int wInBlocks = (width + 3) / 4; - int hInBlocks = (height + 3) / 4; - - for (int y = 0; y < hInBlocks; y++) - { - int y2 = y * 4; - int bh = Math.Min(4, height - y2); - - for (int x = 0; x < wInBlocks; x++) - { - int x2 = x * 4; - int bw = Math.Min(4, width - x2); - - DecodeBlock(blocks[y * wInBlocks + x], output32.Slice(y2 * width + x2), bw, bh, width); - } - } - } - - private static void DecodeBlock(Block block, Span<uint> output, int w, int h, int width) - { - int mode = BitOperations.TrailingZeroCount((byte)block.Low | 0x100); - if (mode == 8) - { - // Mode is invalid, the spec mandates that hardware fills the block with - // a transparent black color. - for (int ty = 0; ty < h; ty++) - { - int baseOffs = ty * width; - - for (int tx = 0; tx < w; tx++) - { - int offs = baseOffs + tx; - - output[offs] = 0; - } - } - - return; - } - - BC7ModeInfo modeInfo = BC67Tables.BC7ModeInfos[mode]; - - int offset = mode + 1; - int partition = (int)block.Decode(ref offset, modeInfo.PartitionBitCount); - int rotation = (int)block.Decode(ref offset, modeInfo.RotationBitCount); - int indexMode = (int)block.Decode(ref offset, modeInfo.IndexModeBitCount); - - Debug.Assert(partition < 64); - Debug.Assert(rotation < 4); - Debug.Assert(indexMode < 2); - - int endPointCount = modeInfo.SubsetCount * 2; - - Span<RgbaColor32> endPoints = stackalloc RgbaColor32[endPointCount]; - Span<byte> pValues = stackalloc byte[modeInfo.PBits]; - - endPoints.Fill(new RgbaColor32(0, 0, 0, 255)); - - for (int i = 0; i < endPointCount; i++) - { - endPoints[i].R = (int)block.Decode(ref offset, modeInfo.ColorDepth); - } - - for (int i = 0; i < endPointCount; i++) - { - endPoints[i].G = (int)block.Decode(ref offset, modeInfo.ColorDepth); - } - - for (int i = 0; i < endPointCount; i++) - { - endPoints[i].B = (int)block.Decode(ref offset, modeInfo.ColorDepth); - } - - if (modeInfo.AlphaDepth != 0) - { - for (int i = 0; i < endPointCount; i++) - { - endPoints[i].A = (int)block.Decode(ref offset, modeInfo.AlphaDepth); - } - } - - for (int i = 0; i < modeInfo.PBits; i++) - { - pValues[i] = (byte)block.Decode(ref offset, 1); - } - - for (int i = 0; i < endPointCount; i++) - { - int pBit = -1; - - if (modeInfo.PBits != 0) - { - int pIndex = (i * modeInfo.PBits) / endPointCount; - pBit = pValues[pIndex]; - } - - Unquantize(ref endPoints[i], modeInfo.ColorDepth, modeInfo.AlphaDepth, pBit); - } - - byte[] partitionTable = BC67Tables.PartitionTable[modeInfo.SubsetCount - 1][partition]; - byte[] fixUpTable = BC67Tables.FixUpIndices[modeInfo.SubsetCount - 1][partition]; - - Span<byte> colorIndices = stackalloc byte[16]; - - for (int i = 0; i < 16; i++) - { - byte subset = partitionTable[i]; - int bitCount = i == fixUpTable[subset] ? modeInfo.ColorIndexBitCount - 1 : modeInfo.ColorIndexBitCount; - - colorIndices[i] = (byte)block.Decode(ref offset, bitCount); - Debug.Assert(colorIndices[i] < 16); - } - - Span<byte> alphaIndices = stackalloc byte[16]; - - if (modeInfo.AlphaIndexBitCount != 0) - { - for (int i = 0; i < 16; i++) - { - int bitCount = i != 0 ? modeInfo.AlphaIndexBitCount : modeInfo.AlphaIndexBitCount - 1; - - alphaIndices[i] = (byte)block.Decode(ref offset, bitCount); - Debug.Assert(alphaIndices[i] < 16); - } - } - - for (int ty = 0; ty < h; ty++) - { - int baseOffs = ty * width; - - for (int tx = 0; tx < w; tx++) - { - int i = ty * 4 + tx; - - RgbaColor32 color; - - byte subset = partitionTable[i]; - - RgbaColor32 color1 = endPoints[subset * 2]; - RgbaColor32 color2 = endPoints[subset * 2 + 1]; - - if (modeInfo.AlphaIndexBitCount != 0) - { - if (indexMode == 0) - { - color = BC67Utils.Interpolate(color1, color2, colorIndices[i], alphaIndices[i], modeInfo.ColorIndexBitCount, modeInfo.AlphaIndexBitCount); - } - else - { - color = BC67Utils.Interpolate(color1, color2, alphaIndices[i], colorIndices[i], modeInfo.AlphaIndexBitCount, modeInfo.ColorIndexBitCount); - } - } - else - { - color = BC67Utils.Interpolate(color1, color2, colorIndices[i], colorIndices[i], modeInfo.ColorIndexBitCount, modeInfo.ColorIndexBitCount); - } - - if (rotation != 0) - { - int a = color.A; - - switch (rotation) - { - case 1: color.A = color.R; color.R = a; break; - case 2: color.A = color.G; color.G = a; break; - case 3: color.A = color.B; color.B = a; break; - } - } - - RgbaColor8 color8 = color.GetColor8(); - - output[baseOffs + tx] = color8.ToUInt32(); - } - } - } - - private static void Unquantize(ref RgbaColor32 color, int colorDepth, int alphaDepth, int pBit) - { - color.R = UnquantizeComponent(color.R, colorDepth, pBit); - color.G = UnquantizeComponent(color.G, colorDepth, pBit); - color.B = UnquantizeComponent(color.B, colorDepth, pBit); - color.A = alphaDepth != 0 ? UnquantizeComponent(color.A, alphaDepth, pBit) : 255; - } - - private static int UnquantizeComponent(int component, int bits, int pBit) - { - int shift = 8 - bits; - int value = component << shift; - - if (pBit >= 0) - { - Debug.Assert(pBit <= 1); - value |= value >> (bits + 1); - value |= pBit << (shift - 1); - } - else - { - value |= value >> bits; - } - - return value; - } - } -} diff --git a/Ryujinx.Graphics.Texture/BCnDecoder.cs b/Ryujinx.Graphics.Texture/BCnDecoder.cs deleted file mode 100644 index b21fa4d1..00000000 --- a/Ryujinx.Graphics.Texture/BCnDecoder.cs +++ /dev/null @@ -1,894 +0,0 @@ -using Ryujinx.Common; -using System; -using System.Buffers.Binary; -using System.Runtime.InteropServices; -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; - -namespace Ryujinx.Graphics.Texture -{ - public static class BCnDecoder - { - private const int BlockWidth = 4; - private const int BlockHeight = 4; - - public static byte[] DecodeBC1(ReadOnlySpan<byte> data, int width, int height, int depth, int levels, int layers) - { - int size = 0; - - for (int l = 0; l < levels; l++) - { - size += Math.Max(1, width >> l) * Math.Max(1, height >> l) * Math.Max(1, depth >> l) * layers * 4; - } - - byte[] output = new byte[size]; - - Span<byte> tile = stackalloc byte[BlockWidth * BlockHeight * 4]; - - Span<uint> tileAsUint = MemoryMarshal.Cast<byte, uint>(tile); - Span<uint> outputAsUint = MemoryMarshal.Cast<byte, uint>(output); - - Span<Vector128<byte>> tileAsVector128 = MemoryMarshal.Cast<byte, Vector128<byte>>(tile); - - Span<Vector128<byte>> outputLine0 = default; - Span<Vector128<byte>> outputLine1 = default; - Span<Vector128<byte>> outputLine2 = default; - Span<Vector128<byte>> outputLine3 = default; - - int imageBaseOOffs = 0; - - for (int l = 0; l < levels; l++) - { - int w = BitUtils.DivRoundUp(width, BlockWidth); - int h = BitUtils.DivRoundUp(height, BlockHeight); - - for (int l2 = 0; l2 < layers; l2++) - { - for (int z = 0; z < depth; z++) - { - for (int y = 0; y < h; y++) - { - int baseY = y * BlockHeight; - int copyHeight = Math.Min(BlockHeight, height - baseY); - int lineBaseOOffs = imageBaseOOffs + baseY * width; - - if (copyHeight == 4) - { - outputLine0 = MemoryMarshal.Cast<uint, Vector128<byte>>(outputAsUint.Slice(lineBaseOOffs)); - outputLine1 = MemoryMarshal.Cast<uint, Vector128<byte>>(outputAsUint.Slice(lineBaseOOffs + width)); - outputLine2 = MemoryMarshal.Cast<uint, Vector128<byte>>(outputAsUint.Slice(lineBaseOOffs + width * 2)); - outputLine3 = MemoryMarshal.Cast<uint, Vector128<byte>>(outputAsUint.Slice(lineBaseOOffs + width * 3)); - } - - for (int x = 0; x < w; x++) - { - int baseX = x * BlockWidth; - int copyWidth = Math.Min(BlockWidth, width - baseX); - - BC1DecodeTileRgb(tile, data); - - if ((copyWidth | copyHeight) == 4) - { - outputLine0[x] = tileAsVector128[0]; - outputLine1[x] = tileAsVector128[1]; - outputLine2[x] = tileAsVector128[2]; - outputLine3[x] = tileAsVector128[3]; - } - else - { - int pixelBaseOOffs = lineBaseOOffs + baseX; - - for (int tY = 0; tY < copyHeight; tY++) - { - tileAsUint.Slice(tY * 4, copyWidth).CopyTo(outputAsUint.Slice(pixelBaseOOffs + width * tY, copyWidth)); - } - } - - data = data.Slice(8); - } - } - - imageBaseOOffs += width * height; - } - } - - width = Math.Max(1, width >> 1); - height = Math.Max(1, height >> 1); - depth = Math.Max(1, depth >> 1); - } - - return output; - } - - public static byte[] DecodeBC2(ReadOnlySpan<byte> data, int width, int height, int depth, int levels, int layers) - { - int size = 0; - - for (int l = 0; l < levels; l++) - { - size += Math.Max(1, width >> l) * Math.Max(1, height >> l) * Math.Max(1, depth >> l) * layers * 4; - } - - byte[] output = new byte[size]; - - Span<byte> tile = stackalloc byte[BlockWidth * BlockHeight * 4]; - - Span<uint> tileAsUint = MemoryMarshal.Cast<byte, uint>(tile); - Span<uint> outputAsUint = MemoryMarshal.Cast<byte, uint>(output); - - Span<Vector128<byte>> tileAsVector128 = MemoryMarshal.Cast<byte, Vector128<byte>>(tile); - - Span<Vector128<byte>> outputLine0 = default; - Span<Vector128<byte>> outputLine1 = default; - Span<Vector128<byte>> outputLine2 = default; - Span<Vector128<byte>> outputLine3 = default; - - int imageBaseOOffs = 0; - - for (int l = 0; l < levels; l++) - { - int w = BitUtils.DivRoundUp(width, BlockWidth); - int h = BitUtils.DivRoundUp(height, BlockHeight); - - for (int l2 = 0; l2 < layers; l2++) - { - for (int z = 0; z < depth; z++) - { - for (int y = 0; y < h; y++) - { - int baseY = y * BlockHeight; - int copyHeight = Math.Min(BlockHeight, height - baseY); - int lineBaseOOffs = imageBaseOOffs + baseY * width; - - if (copyHeight == 4) - { - outputLine0 = MemoryMarshal.Cast<uint, Vector128<byte>>(outputAsUint.Slice(lineBaseOOffs)); - outputLine1 = MemoryMarshal.Cast<uint, Vector128<byte>>(outputAsUint.Slice(lineBaseOOffs + width)); - outputLine2 = MemoryMarshal.Cast<uint, Vector128<byte>>(outputAsUint.Slice(lineBaseOOffs + width * 2)); - outputLine3 = MemoryMarshal.Cast<uint, Vector128<byte>>(outputAsUint.Slice(lineBaseOOffs + width * 3)); - } - - for (int x = 0; x < w; x++) - { - int baseX = x * BlockWidth; - int copyWidth = Math.Min(BlockWidth, width - baseX); - - BC23DecodeTileRgb(tile, data.Slice(8)); - - ulong block = BinaryPrimitives.ReadUInt64LittleEndian(data); - - for (int i = 3; i < BlockWidth * BlockHeight * 4; i += 4, block >>= 4) - { - tile[i] = (byte)((block & 0xf) | (block << 4)); - } - - if ((copyWidth | copyHeight) == 4) - { - outputLine0[x] = tileAsVector128[0]; - outputLine1[x] = tileAsVector128[1]; - outputLine2[x] = tileAsVector128[2]; - outputLine3[x] = tileAsVector128[3]; - } - else - { - int pixelBaseOOffs = lineBaseOOffs + baseX; - - for (int tY = 0; tY < copyHeight; tY++) - { - tileAsUint.Slice(tY * 4, copyWidth).CopyTo(outputAsUint.Slice(pixelBaseOOffs + width * tY, copyWidth)); - } - } - - data = data.Slice(16); - } - } - - imageBaseOOffs += width * height; - } - } - - width = Math.Max(1, width >> 1); - height = Math.Max(1, height >> 1); - depth = Math.Max(1, depth >> 1); - } - - return output; - } - - public static byte[] DecodeBC3(ReadOnlySpan<byte> data, int width, int height, int depth, int levels, int layers) - { - int size = 0; - - for (int l = 0; l < levels; l++) - { - size += Math.Max(1, width >> l) * Math.Max(1, height >> l) * Math.Max(1, depth >> l) * layers * 4; - } - - byte[] output = new byte[size]; - - Span<byte> tile = stackalloc byte[BlockWidth * BlockHeight * 4]; - Span<byte> rPal = stackalloc byte[8]; - - Span<uint> tileAsUint = MemoryMarshal.Cast<byte, uint>(tile); - Span<uint> outputAsUint = MemoryMarshal.Cast<byte, uint>(output); - - Span<Vector128<byte>> tileAsVector128 = MemoryMarshal.Cast<byte, Vector128<byte>>(tile); - - Span<Vector128<byte>> outputLine0 = default; - Span<Vector128<byte>> outputLine1 = default; - Span<Vector128<byte>> outputLine2 = default; - Span<Vector128<byte>> outputLine3 = default; - - int imageBaseOOffs = 0; - - for (int l = 0; l < levels; l++) - { - int w = BitUtils.DivRoundUp(width, BlockWidth); - int h = BitUtils.DivRoundUp(height, BlockHeight); - - for (int l2 = 0; l2 < layers; l2++) - { - for (int z = 0; z < depth; z++) - { - for (int y = 0; y < h; y++) - { - int baseY = y * BlockHeight; - int copyHeight = Math.Min(BlockHeight, height - baseY); - int lineBaseOOffs = imageBaseOOffs + baseY * width; - - if (copyHeight == 4) - { - outputLine0 = MemoryMarshal.Cast<uint, Vector128<byte>>(outputAsUint.Slice(lineBaseOOffs)); - outputLine1 = MemoryMarshal.Cast<uint, Vector128<byte>>(outputAsUint.Slice(lineBaseOOffs + width)); - outputLine2 = MemoryMarshal.Cast<uint, Vector128<byte>>(outputAsUint.Slice(lineBaseOOffs + width * 2)); - outputLine3 = MemoryMarshal.Cast<uint, Vector128<byte>>(outputAsUint.Slice(lineBaseOOffs + width * 3)); - } - - for (int x = 0; x < w; x++) - { - int baseX = x * BlockWidth; - int copyWidth = Math.Min(BlockWidth, width - baseX); - - BC23DecodeTileRgb(tile, data.Slice(8)); - - ulong block = BinaryPrimitives.ReadUInt64LittleEndian(data); - - rPal[0] = (byte)block; - rPal[1] = (byte)(block >> 8); - - BCnLerpAlphaUnorm(rPal); - BCnDecodeTileAlphaRgba(tile, rPal, block >> 16); - - if ((copyWidth | copyHeight) == 4) - { - outputLine0[x] = tileAsVector128[0]; - outputLine1[x] = tileAsVector128[1]; - outputLine2[x] = tileAsVector128[2]; - outputLine3[x] = tileAsVector128[3]; - } - else - { - int pixelBaseOOffs = lineBaseOOffs + baseX; - - for (int tY = 0; tY < copyHeight; tY++) - { - tileAsUint.Slice(tY * 4, copyWidth).CopyTo(outputAsUint.Slice(pixelBaseOOffs + width * tY, copyWidth)); - } - } - - data = data.Slice(16); - } - } - - imageBaseOOffs += width * height; - } - } - - width = Math.Max(1, width >> 1); - height = Math.Max(1, height >> 1); - depth = Math.Max(1, depth >> 1); - } - - return output; - } - - public static byte[] DecodeBC4(ReadOnlySpan<byte> data, int width, int height, int depth, int levels, int layers, bool signed) - { - int size = 0; - - for (int l = 0; l < levels; l++) - { - size += BitUtils.AlignUp(Math.Max(1, width >> l), 4) * Math.Max(1, height >> l) * Math.Max(1, depth >> l) * layers; - } - - // Backends currently expect a stride alignment of 4 bytes, so output width must be aligned. - int alignedWidth = BitUtils.AlignUp(width, 4); - - byte[] output = new byte[size]; - Span<byte> outputSpan = new Span<byte>(output); - - ReadOnlySpan<ulong> data64 = MemoryMarshal.Cast<byte, ulong>(data); - - Span<byte> tile = stackalloc byte[BlockWidth * BlockHeight]; - Span<byte> rPal = stackalloc byte[8]; - - Span<uint> tileAsUint = MemoryMarshal.Cast<byte, uint>(tile); - - Span<uint> outputLine0 = default; - Span<uint> outputLine1 = default; - Span<uint> outputLine2 = default; - Span<uint> outputLine3 = default; - - int imageBaseOOffs = 0; - - for (int l = 0; l < levels; l++) - { - int w = BitUtils.DivRoundUp(width, BlockWidth); - int h = BitUtils.DivRoundUp(height, BlockHeight); - - for (int l2 = 0; l2 < layers; l2++) - { - for (int z = 0; z < depth; z++) - { - for (int y = 0; y < h; y++) - { - int baseY = y * BlockHeight; - int copyHeight = Math.Min(BlockHeight, height - baseY); - int lineBaseOOffs = imageBaseOOffs + baseY * alignedWidth; - - if (copyHeight == 4) - { - outputLine0 = MemoryMarshal.Cast<byte, uint>(outputSpan.Slice(lineBaseOOffs)); - outputLine1 = MemoryMarshal.Cast<byte, uint>(outputSpan.Slice(lineBaseOOffs + alignedWidth)); - outputLine2 = MemoryMarshal.Cast<byte, uint>(outputSpan.Slice(lineBaseOOffs + alignedWidth * 2)); - outputLine3 = MemoryMarshal.Cast<byte, uint>(outputSpan.Slice(lineBaseOOffs + alignedWidth * 3)); - } - - for (int x = 0; x < w; x++) - { - int baseX = x * BlockWidth; - int copyWidth = Math.Min(BlockWidth, width - baseX); - - ulong block = data64[0]; - - rPal[0] = (byte)block; - rPal[1] = (byte)(block >> 8); - - if (signed) - { - BCnLerpAlphaSnorm(rPal); - } - else - { - BCnLerpAlphaUnorm(rPal); - } - - BCnDecodeTileAlpha(tile, rPal, block >> 16); - - if ((copyWidth | copyHeight) == 4) - { - outputLine0[x] = tileAsUint[0]; - outputLine1[x] = tileAsUint[1]; - outputLine2[x] = tileAsUint[2]; - outputLine3[x] = tileAsUint[3]; - } - else - { - int pixelBaseOOffs = lineBaseOOffs + baseX; - - for (int tY = 0; tY < copyHeight; tY++) - { - tile.Slice(tY * 4, copyWidth).CopyTo(outputSpan.Slice(pixelBaseOOffs + alignedWidth * tY, copyWidth)); - } - } - - data64 = data64.Slice(1); - } - } - - imageBaseOOffs += alignedWidth * height; - } - } - - width = Math.Max(1, width >> 1); - height = Math.Max(1, height >> 1); - depth = Math.Max(1, depth >> 1); - - alignedWidth = BitUtils.AlignUp(width, 4); - } - - return output; - } - - public static byte[] DecodeBC5(ReadOnlySpan<byte> data, int width, int height, int depth, int levels, int layers, bool signed) - { - int size = 0; - - for (int l = 0; l < levels; l++) - { - size += BitUtils.AlignUp(Math.Max(1, width >> l), 2) * Math.Max(1, height >> l) * Math.Max(1, depth >> l) * layers * 2; - } - - // Backends currently expect a stride alignment of 4 bytes, so output width must be aligned. - int alignedWidth = BitUtils.AlignUp(width, 2); - - byte[] output = new byte[size]; - - ReadOnlySpan<ulong> data64 = MemoryMarshal.Cast<byte, ulong>(data); - - Span<byte> rTile = stackalloc byte[BlockWidth * BlockHeight * 2]; - Span<byte> gTile = stackalloc byte[BlockWidth * BlockHeight * 2]; - Span<byte> rPal = stackalloc byte[8]; - Span<byte> gPal = stackalloc byte[8]; - - Span<ushort> outputAsUshort = MemoryMarshal.Cast<byte, ushort>(output); - - Span<uint> rTileAsUint = MemoryMarshal.Cast<byte, uint>(rTile); - Span<uint> gTileAsUint = MemoryMarshal.Cast<byte, uint>(gTile); - - Span<ulong> outputLine0 = default; - Span<ulong> outputLine1 = default; - Span<ulong> outputLine2 = default; - Span<ulong> outputLine3 = default; - - int imageBaseOOffs = 0; - - for (int l = 0; l < levels; l++) - { - int w = BitUtils.DivRoundUp(width, BlockWidth); - int h = BitUtils.DivRoundUp(height, BlockHeight); - - for (int l2 = 0; l2 < layers; l2++) - { - for (int z = 0; z < depth; z++) - { - for (int y = 0; y < h; y++) - { - int baseY = y * BlockHeight; - int copyHeight = Math.Min(BlockHeight, height - baseY); - int lineBaseOOffs = imageBaseOOffs + baseY * alignedWidth; - - if (copyHeight == 4) - { - outputLine0 = MemoryMarshal.Cast<ushort, ulong>(outputAsUshort.Slice(lineBaseOOffs)); - outputLine1 = MemoryMarshal.Cast<ushort, ulong>(outputAsUshort.Slice(lineBaseOOffs + alignedWidth)); - outputLine2 = MemoryMarshal.Cast<ushort, ulong>(outputAsUshort.Slice(lineBaseOOffs + alignedWidth * 2)); - outputLine3 = MemoryMarshal.Cast<ushort, ulong>(outputAsUshort.Slice(lineBaseOOffs + alignedWidth * 3)); - } - - for (int x = 0; x < w; x++) - { - int baseX = x * BlockWidth; - int copyWidth = Math.Min(BlockWidth, width - baseX); - - ulong blockL = data64[0]; - ulong blockH = data64[1]; - - rPal[0] = (byte)blockL; - rPal[1] = (byte)(blockL >> 8); - gPal[0] = (byte)blockH; - gPal[1] = (byte)(blockH >> 8); - - if (signed) - { - BCnLerpAlphaSnorm(rPal); - BCnLerpAlphaSnorm(gPal); - } - else - { - BCnLerpAlphaUnorm(rPal); - BCnLerpAlphaUnorm(gPal); - } - - BCnDecodeTileAlpha(rTile, rPal, blockL >> 16); - BCnDecodeTileAlpha(gTile, gPal, blockH >> 16); - - if ((copyWidth | copyHeight) == 4) - { - outputLine0[x] = InterleaveBytes(rTileAsUint[0], gTileAsUint[0]); - outputLine1[x] = InterleaveBytes(rTileAsUint[1], gTileAsUint[1]); - outputLine2[x] = InterleaveBytes(rTileAsUint[2], gTileAsUint[2]); - outputLine3[x] = InterleaveBytes(rTileAsUint[3], gTileAsUint[3]); - } - else - { - int pixelBaseOOffs = lineBaseOOffs + baseX; - - for (int tY = 0; tY < copyHeight; tY++) - { - int line = pixelBaseOOffs + alignedWidth * tY; - - for (int tX = 0; tX < copyWidth; tX++) - { - int texel = tY * BlockWidth + tX; - - outputAsUshort[line + tX] = (ushort)(rTile[texel] | (gTile[texel] << 8)); - } - } - } - - data64 = data64.Slice(2); - } - } - - imageBaseOOffs += alignedWidth * height; - } - } - - width = Math.Max(1, width >> 1); - height = Math.Max(1, height >> 1); - depth = Math.Max(1, depth >> 1); - - alignedWidth = BitUtils.AlignUp(width, 2); - } - - return output; - } - - public static byte[] DecodeBC6(ReadOnlySpan<byte> data, int width, int height, int depth, int levels, int layers, bool signed) - { - int size = 0; - - for (int l = 0; l < levels; l++) - { - size += Math.Max(1, width >> l) * Math.Max(1, height >> l) * Math.Max(1, depth >> l) * layers * 8; - } - - byte[] output = new byte[size]; - - int inputOffset = 0; - int outputOffset = 0; - - for (int l = 0; l < levels; l++) - { - int w = BitUtils.DivRoundUp(width, BlockWidth); - int h = BitUtils.DivRoundUp(height, BlockHeight); - - for (int l2 = 0; l2 < layers; l2++) - { - for (int z = 0; z < depth; z++) - { - BC6Decoder.Decode(output.AsSpan().Slice(outputOffset), data.Slice(inputOffset), width, height, signed); - - inputOffset += w * h * 16; - outputOffset += width * height * 8; - } - } - - width = Math.Max(1, width >> 1); - height = Math.Max(1, height >> 1); - depth = Math.Max(1, depth >> 1); - } - - return output; - } - - public static byte[] DecodeBC7(ReadOnlySpan<byte> data, int width, int height, int depth, int levels, int layers) - { - int size = 0; - - for (int l = 0; l < levels; l++) - { - size += Math.Max(1, width >> l) * Math.Max(1, height >> l) * Math.Max(1, depth >> l) * layers * 4; - } - - byte[] output = new byte[size]; - - int inputOffset = 0; - int outputOffset = 0; - - for (int l = 0; l < levels; l++) - { - int w = BitUtils.DivRoundUp(width, BlockWidth); - int h = BitUtils.DivRoundUp(height, BlockHeight); - - for (int l2 = 0; l2 < layers; l2++) - { - for (int z = 0; z < depth; z++) - { - BC7Decoder.Decode(output.AsSpan().Slice(outputOffset), data.Slice(inputOffset), width, height); - - inputOffset += w * h * 16; - outputOffset += width * height * 4; - } - } - - width = Math.Max(1, width >> 1); - height = Math.Max(1, height >> 1); - depth = Math.Max(1, depth >> 1); - } - - return output; - } - - private static ulong InterleaveBytes(uint left, uint right) - { - return InterleaveBytesWithZeros(left) | (InterleaveBytesWithZeros(right) << 8); - } - - private static ulong InterleaveBytesWithZeros(uint value) - { - ulong output = value; - output = (output ^ (output << 16)) & 0xffff0000ffffUL; - output = (output ^ (output << 8)) & 0xff00ff00ff00ffUL; - return output; - } - - private static void BCnLerpAlphaUnorm(Span<byte> alpha) - { - byte a0 = alpha[0]; - byte a1 = alpha[1]; - - if (a0 > a1) - { - alpha[2] = (byte)((6 * a0 + 1 * a1) / 7); - alpha[3] = (byte)((5 * a0 + 2 * a1) / 7); - alpha[4] = (byte)((4 * a0 + 3 * a1) / 7); - alpha[5] = (byte)((3 * a0 + 4 * a1) / 7); - alpha[6] = (byte)((2 * a0 + 5 * a1) / 7); - alpha[7] = (byte)((1 * a0 + 6 * a1) / 7); - } - else - { - alpha[2] = (byte)((4 * a0 + 1 * a1) / 5); - alpha[3] = (byte)((3 * a0 + 2 * a1) / 5); - alpha[4] = (byte)((2 * a0 + 3 * a1) / 5); - alpha[5] = (byte)((1 * a0 + 4 * a1) / 5); - alpha[6] = 0; - alpha[7] = 0xff; - } - } - - private static void BCnLerpAlphaSnorm(Span<byte> alpha) - { - sbyte a0 = (sbyte)alpha[0]; - sbyte a1 = (sbyte)alpha[1]; - - if (a0 > a1) - { - alpha[2] = (byte)((6 * a0 + 1 * a1) / 7); - alpha[3] = (byte)((5 * a0 + 2 * a1) / 7); - alpha[4] = (byte)((4 * a0 + 3 * a1) / 7); - alpha[5] = (byte)((3 * a0 + 4 * a1) / 7); - alpha[6] = (byte)((2 * a0 + 5 * a1) / 7); - alpha[7] = (byte)((1 * a0 + 6 * a1) / 7); - } - else - { - alpha[2] = (byte)((4 * a0 + 1 * a1) / 5); - alpha[3] = (byte)((3 * a0 + 2 * a1) / 5); - alpha[4] = (byte)((2 * a0 + 3 * a1) / 5); - alpha[5] = (byte)((1 * a0 + 4 * a1) / 5); - alpha[6] = 0x80; - alpha[7] = 0x7f; - } - } - - private unsafe static void BCnDecodeTileAlpha(Span<byte> output, Span<byte> rPal, ulong rI) - { - if (Avx2.IsSupported) - { - Span<Vector128<byte>> outputAsVector128 = MemoryMarshal.Cast<byte, Vector128<byte>>(output); - - Vector128<uint> shifts = Vector128.Create(0u, 3u, 6u, 9u); - Vector128<uint> masks = Vector128.Create(7u); - - Vector128<byte> vClut; - - fixed (byte* pRPal = rPal) - { - vClut = Sse2.LoadScalarVector128((ulong*)pRPal).AsByte(); - } - - Vector128<uint> indices0 = Vector128.Create((uint)rI); - Vector128<uint> indices1 = Vector128.Create((uint)(rI >> 24)); - Vector128<uint> indices00 = Avx2.ShiftRightLogicalVariable(indices0, shifts); - Vector128<uint> indices10 = Avx2.ShiftRightLogicalVariable(indices1, shifts); - Vector128<uint> indices01 = Sse2.ShiftRightLogical(indices00, 12); - Vector128<uint> indices11 = Sse2.ShiftRightLogical(indices10, 12); - indices00 = Sse2.And(indices00, masks); - indices10 = Sse2.And(indices10, masks); - indices01 = Sse2.And(indices01, masks); - indices11 = Sse2.And(indices11, masks); - - Vector128<ushort> indicesW0 = Sse41.PackUnsignedSaturate(indices00.AsInt32(), indices01.AsInt32()); - Vector128<ushort> indicesW1 = Sse41.PackUnsignedSaturate(indices10.AsInt32(), indices11.AsInt32()); - - Vector128<byte> indices = Sse2.PackUnsignedSaturate(indicesW0.AsInt16(), indicesW1.AsInt16()); - - outputAsVector128[0] = Ssse3.Shuffle(vClut, indices); - } - else - { - for (int i = 0; i < BlockWidth * BlockHeight; i++, rI >>= 3) - { - output[i] = rPal[(int)(rI & 7)]; - } - } - } - - private unsafe static void BCnDecodeTileAlphaRgba(Span<byte> output, Span<byte> rPal, ulong rI) - { - if (Avx2.IsSupported) - { - Span<Vector256<uint>> outputAsVector256 = MemoryMarshal.Cast<byte, Vector256<uint>>(output); - - Vector256<uint> shifts = Vector256.Create(0u, 3u, 6u, 9u, 12u, 15u, 18u, 21u); - - Vector128<uint> vClut128; - - fixed (byte* pRPal = rPal) - { - vClut128 = Sse2.LoadScalarVector128((ulong*)pRPal).AsUInt32(); - } - - Vector256<uint> vClut = Avx2.ConvertToVector256Int32(vClut128.AsByte()).AsUInt32(); - vClut = Avx2.ShiftLeftLogical(vClut, 24); - - Vector256<uint> indices0 = Vector256.Create((uint)rI); - Vector256<uint> indices1 = Vector256.Create((uint)(rI >> 24)); - - indices0 = Avx2.ShiftRightLogicalVariable(indices0, shifts); - indices1 = Avx2.ShiftRightLogicalVariable(indices1, shifts); - - outputAsVector256[0] = Avx2.Or(outputAsVector256[0], Avx2.PermuteVar8x32(vClut, indices0)); - outputAsVector256[1] = Avx2.Or(outputAsVector256[1], Avx2.PermuteVar8x32(vClut, indices1)); - } - else - { - for (int i = 3; i < BlockWidth * BlockHeight * 4; i += 4, rI >>= 3) - { - output[i] = rPal[(int)(rI & 7)]; - } - } - } - - private unsafe static void BC1DecodeTileRgb(Span<byte> output, ReadOnlySpan<byte> input) - { - Span<uint> clut = stackalloc uint[4]; - - uint c0c1 = BinaryPrimitives.ReadUInt32LittleEndian(input); - uint c0 = (ushort)c0c1; - uint c1 = (ushort)(c0c1 >> 16); - - clut[0] = ConvertRgb565ToRgb888(c0) | 0xff000000; - clut[1] = ConvertRgb565ToRgb888(c1) | 0xff000000; - clut[2] = BC1LerpRgb2(clut[0], clut[1], c0, c1); - clut[3] = BC1LerpRgb3(clut[0], clut[1], c0, c1); - - BCnDecodeTileRgb(clut, output, input); - } - - private unsafe static void BC23DecodeTileRgb(Span<byte> output, ReadOnlySpan<byte> input) - { - Span<uint> clut = stackalloc uint[4]; - - uint c0c1 = BinaryPrimitives.ReadUInt32LittleEndian(input); - uint c0 = (ushort)c0c1; - uint c1 = (ushort)(c0c1 >> 16); - - clut[0] = ConvertRgb565ToRgb888(c0); - clut[1] = ConvertRgb565ToRgb888(c1); - clut[2] = BC23LerpRgb2(clut[0], clut[1]); - clut[3] = BC23LerpRgb3(clut[0], clut[1]); - - BCnDecodeTileRgb(clut, output, input); - } - - private unsafe static void BCnDecodeTileRgb(Span<uint> clut, Span<byte> output, ReadOnlySpan<byte> input) - { - if (Avx2.IsSupported) - { - Span<Vector256<uint>> outputAsVector256 = MemoryMarshal.Cast<byte, Vector256<uint>>(output); - - Vector256<uint> shifts0 = Vector256.Create(0u, 2u, 4u, 6u, 8u, 10u, 12u, 14u); - Vector256<uint> shifts1 = Vector256.Create(16u, 18u, 20u, 22u, 24u, 26u, 28u, 30u); - Vector256<uint> masks = Vector256.Create(3u); - - Vector256<uint> vClut; - - fixed (uint* pClut = &clut[0]) - { - vClut = Sse2.LoadVector128(pClut).ToVector256Unsafe(); - } - - Vector256<uint> indices0; - - fixed (byte* pInput = input) - { - indices0 = Avx2.BroadcastScalarToVector256((uint*)(pInput + 4)); - } - - Vector256<uint> indices1 = indices0; - - indices0 = Avx2.ShiftRightLogicalVariable(indices0, shifts0); - indices1 = Avx2.ShiftRightLogicalVariable(indices1, shifts1); - indices0 = Avx2.And(indices0, masks); - indices1 = Avx2.And(indices1, masks); - - outputAsVector256[0] = Avx2.PermuteVar8x32(vClut, indices0); - outputAsVector256[1] = Avx2.PermuteVar8x32(vClut, indices1); - } - else - { - Span<uint> outputAsUint = MemoryMarshal.Cast<byte, uint>(output); - - uint indices = BinaryPrimitives.ReadUInt32LittleEndian(input.Slice(4)); - - for (int i = 0; i < BlockWidth * BlockHeight; i++, indices >>= 2) - { - outputAsUint[i] = clut[(int)(indices & 3)]; - } - } - } - - private static uint BC1LerpRgb2(uint color0, uint color1, uint c0, uint c1) - { - if (c0 > c1) - { - return BC23LerpRgb2(color0, color1) | 0xff000000; - } - - uint carry = color0 & color1; - uint addHalve = ((color0 ^ color1) >> 1) & 0x7f7f7f; - return (addHalve + carry) | 0xff000000; - } - - private static uint BC23LerpRgb2(uint color0, uint color1) - { - uint r0 = (byte)color0; - uint g0 = color0 & 0xff00; - uint b0 = color0 & 0xff0000; - - uint r1 = (byte)color1; - uint g1 = color1 & 0xff00; - uint b1 = color1 & 0xff0000; - - uint mixR = (2 * r0 + r1) / 3; - uint mixG = (2 * g0 + g1) / 3; - uint mixB = (2 * b0 + b1) / 3; - - return mixR | (mixG & 0xff00) | (mixB & 0xff0000); - } - - private static uint BC1LerpRgb3(uint color0, uint color1, uint c0, uint c1) - { - if (c0 > c1) - { - return BC23LerpRgb3(color0, color1) | 0xff000000; - } - - return 0; - } - - private static uint BC23LerpRgb3(uint color0, uint color1) - { - uint r0 = (byte)color0; - uint g0 = color0 & 0xff00; - uint b0 = color0 & 0xff0000; - - uint r1 = (byte)color1; - uint g1 = color1 & 0xff00; - uint b1 = color1 & 0xff0000; - - uint mixR = (2 * r1 + r0) / 3; - uint mixG = (2 * g1 + g0) / 3; - uint mixB = (2 * b1 + b0) / 3; - - return mixR | (mixG & 0xff00) | (mixB & 0xff0000); - } - - private static uint ConvertRgb565ToRgb888(uint value) - { - uint b = (value & 0x1f) << 19; - uint g = (value << 5) & 0xfc00; - uint r = (value >> 8) & 0xf8; - - b |= b >> 5; - g |= g >> 6; - r |= r >> 5; - - return r | (g & 0xff00) | (b & 0xff0000); - } - } -}
\ No newline at end of file diff --git a/Ryujinx.Graphics.Texture/BCnEncoder.cs b/Ryujinx.Graphics.Texture/BCnEncoder.cs deleted file mode 100644 index 02b79c1b..00000000 --- a/Ryujinx.Graphics.Texture/BCnEncoder.cs +++ /dev/null @@ -1,60 +0,0 @@ -using Ryujinx.Common; -using Ryujinx.Graphics.Texture.Encoders; -using System; - -namespace Ryujinx.Graphics.Texture -{ - public static class BCnEncoder - { - private const int BlockWidth = 4; - private const int BlockHeight = 4; - - public static byte[] EncodeBC7(byte[] data, int width, int height, int depth, int levels, int layers) - { - int size = 0; - - for (int l = 0; l < levels; l++) - { - int w = BitUtils.DivRoundUp(Math.Max(1, width >> l), BlockWidth); - int h = BitUtils.DivRoundUp(Math.Max(1, height >> l), BlockHeight); - - size += w * h * 16 * Math.Max(1, depth >> l) * layers; - } - - byte[] output = new byte[size]; - - int imageBaseIOffs = 0; - int imageBaseOOffs = 0; - - for (int l = 0; l < levels; l++) - { - int rgba8Size = width * height * depth * layers * 4; - - int w = BitUtils.DivRoundUp(width, BlockWidth); - int h = BitUtils.DivRoundUp(height, BlockHeight); - - for (int l2 = 0; l2 < layers; l2++) - { - for (int z = 0; z < depth; z++) - { - BC7Encoder.Encode( - output.AsMemory().Slice(imageBaseOOffs), - data.AsMemory().Slice(imageBaseIOffs), - width, - height, - EncodeMode.Fast | EncodeMode.Multithreaded); - - imageBaseIOffs += width * height * 4; - imageBaseOOffs += w * h * 16; - } - } - - width = Math.Max(1, width >> 1); - height = Math.Max(1, height >> 1); - depth = Math.Max(1, depth >> 1); - } - - return output; - } - } -}
\ No newline at end of file diff --git a/Ryujinx.Graphics.Texture/BlockLinearConstants.cs b/Ryujinx.Graphics.Texture/BlockLinearConstants.cs deleted file mode 100644 index d95691cf..00000000 --- a/Ryujinx.Graphics.Texture/BlockLinearConstants.cs +++ /dev/null @@ -1,10 +0,0 @@ -namespace Ryujinx.Graphics.Texture -{ - static class BlockLinearConstants - { - public const int GobStride = 64; - public const int GobHeight = 8; - - public const int GobSize = GobStride * GobHeight; - } -}
\ No newline at end of file diff --git a/Ryujinx.Graphics.Texture/BlockLinearLayout.cs b/Ryujinx.Graphics.Texture/BlockLinearLayout.cs deleted file mode 100644 index e098e959..00000000 --- a/Ryujinx.Graphics.Texture/BlockLinearLayout.cs +++ /dev/null @@ -1,195 +0,0 @@ -using Ryujinx.Common; -using System.Numerics; -using System.Runtime.CompilerServices; - -using static Ryujinx.Graphics.Texture.BlockLinearConstants; - -namespace Ryujinx.Graphics.Texture -{ - class BlockLinearLayout - { - private struct RobAndSliceSizes - { - public int RobSize; - public int SliceSize; - - public RobAndSliceSizes(int robSize, int sliceSize) - { - RobSize = robSize; - SliceSize = sliceSize; - } - } - - private int _texBpp; - - private int _bhMask; - private int _bdMask; - - private int _bhShift; - private int _bdShift; - private int _bppShift; - - private int _xShift; - - private int _robSize; - private int _sliceSize; - - // Variables for built in iteration. - private int _yPart; - private int _yzPart; - private int _zPart; - - public BlockLinearLayout( - int width, - int height, - int gobBlocksInY, - int gobBlocksInZ, - int bpp) - { - _texBpp = bpp; - - _bppShift = BitOperations.TrailingZeroCount(bpp); - - _bhMask = gobBlocksInY - 1; - _bdMask = gobBlocksInZ - 1; - - _bhShift = BitOperations.TrailingZeroCount(gobBlocksInY); - _bdShift = BitOperations.TrailingZeroCount(gobBlocksInZ); - - _xShift = BitOperations.TrailingZeroCount(GobSize * gobBlocksInY * gobBlocksInZ); - - RobAndSliceSizes rsSizes = GetRobAndSliceSizes(width, height, gobBlocksInY, gobBlocksInZ); - - _robSize = rsSizes.RobSize; - _sliceSize = rsSizes.SliceSize; - } - - private RobAndSliceSizes GetRobAndSliceSizes(int width, int height, int gobBlocksInY, int gobBlocksInZ) - { - int widthInGobs = BitUtils.DivRoundUp(width * _texBpp, GobStride); - - int robSize = GobSize * gobBlocksInY * gobBlocksInZ * widthInGobs; - - int sliceSize = BitUtils.DivRoundUp(height, gobBlocksInY * GobHeight) * robSize; - - return new RobAndSliceSizes(robSize, sliceSize); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public int GetOffset(int x, int y, int z) - { - return GetOffsetWithLineOffset(x << _bppShift, y, z); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public int GetOffsetWithLineOffset(int x, int y, int z) - { - int yh = y / GobHeight; - - int offset = (z >> _bdShift) * _sliceSize + (yh >> _bhShift) * _robSize; - - offset += (x / GobStride) << _xShift; - - offset += (yh & _bhMask) * GobSize; - - offset += ((z & _bdMask) * GobSize) << _bhShift; - - offset += ((x & 0x3f) >> 5) << 8; - offset += ((y & 0x07) >> 1) << 6; - offset += ((x & 0x1f) >> 4) << 5; - offset += ((y & 0x01) >> 0) << 4; - offset += ((x & 0x0f) >> 0) << 0; - - return offset; - } - - public (int offset, int size) GetRectangleRange(int x, int y, int width, int height) - { - // Justification: - // The 2D offset is a combination of separate x and y parts. - // Both components increase with input and never overlap bits. - // Therefore for each component, the minimum input value is the lowest that component can go. - // Minimum total value is minimum X component + minimum Y component. Similar goes for maximum. - - int start = GetOffset(x, y, 0); - int end = GetOffset(x + width - 1, y + height - 1, 0) + _texBpp; // Cover the last pixel. - return (start, end - start); - } - - public bool LayoutMatches(BlockLinearLayout other) - { - return _robSize == other._robSize && - _sliceSize == other._sliceSize && - _texBpp == other._texBpp && - _bhMask == other._bhMask && - _bdMask == other._bdMask; - } - - // Functions for built in iteration. - // Components of the offset can be updated separately, and combined to save some time. - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void SetY(int y) - { - int yh = y / GobHeight; - int offset = (yh >> _bhShift) * _robSize; - - offset += (yh & _bhMask) * GobSize; - - offset += ((y & 0x07) >> 1) << 6; - offset += ((y & 0x01) >> 0) << 4; - - _yPart = offset; - _yzPart = offset + _zPart; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void SetZ(int z) - { - int offset = (z >> _bdShift) * _sliceSize; - - offset += ((z & _bdMask) * GobSize) << _bhShift; - - _zPart = offset; - _yzPart = offset + _yPart; - } - - /// <summary> - /// Optimized conversion for line offset in bytes to an absolute offset. Input x must be divisible by 16. - /// </summary> - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public int GetOffsetWithLineOffset16(int x) - { - int offset = (x / GobStride) << _xShift; - - offset += ((x & 0x3f) >> 5) << 8; - offset += ((x & 0x1f) >> 4) << 5; - - return offset + _yzPart; - } - - /// <summary> - /// Optimized conversion for line offset in bytes to an absolute offset. Input x must be divisible by 64. - /// </summary> - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public int GetOffsetWithLineOffset64(int x) - { - int offset = (x / GobStride) << _xShift; - - return offset + _yzPart; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public int GetOffset(int x) - { - x <<= _bppShift; - int offset = (x / GobStride) << _xShift; - - offset += ((x & 0x3f) >> 5) << 8; - offset += ((x & 0x1f) >> 4) << 5; - offset += (x & 0x0f); - - return offset + _yzPart; - } - } -}
\ No newline at end of file diff --git a/Ryujinx.Graphics.Texture/Bpp12Pixel.cs b/Ryujinx.Graphics.Texture/Bpp12Pixel.cs deleted file mode 100644 index 5a38259e..00000000 --- a/Ryujinx.Graphics.Texture/Bpp12Pixel.cs +++ /dev/null @@ -1,11 +0,0 @@ -using System.Runtime.InteropServices; - -namespace Ryujinx.Graphics.Texture -{ - [StructLayout(LayoutKind.Sequential, Pack = 1, Size = 12)] - public struct Bpp12Pixel - { - private ulong _elem1; - private uint _elem2; - } -} diff --git a/Ryujinx.Graphics.Texture/ETC2Decoder.cs b/Ryujinx.Graphics.Texture/ETC2Decoder.cs deleted file mode 100644 index 21ff4be4..00000000 --- a/Ryujinx.Graphics.Texture/ETC2Decoder.cs +++ /dev/null @@ -1,682 +0,0 @@ -using Ryujinx.Common; -using System; -using System.Buffers.Binary; -using System.Runtime.InteropServices; - -namespace Ryujinx.Graphics.Texture -{ - public static class ETC2Decoder - { - private const uint AlphaMask = 0xff000000u; - - private const int BlockWidth = 4; - private const int BlockHeight = 4; - - private static readonly int[][] _etc1Lut = - { - new int[] { 2, 8, -2, -8 }, - new int[] { 5, 17, -5, -17 }, - new int[] { 9, 29, -9, -29 }, - new int[] { 13, 42, -13, -42 }, - new int[] { 18, 60, -18, -60 }, - new int[] { 24, 80, -24, -80 }, - new int[] { 33, 106, -33, -106 }, - new int[] { 47, 183, -47, -183 } - }; - - private static readonly int[] _etc2Lut = - { - 3, 6, 11, 16, 23, 32, 41, 64 - }; - - private static readonly int[][] _etc2AlphaLut = - { - new int[] { -3, -6, -9, -15, 2, 5, 8, 14 }, - new int[] { -3, -7, -10, -13, 2, 6, 9, 12 }, - new int[] { -2, -5, -8, -13, 1, 4, 7, 12 }, - new int[] { -2, -4, -6, -13, 1, 3, 5, 12 }, - new int[] { -3, -6, -8, -12, 2, 5, 7, 11 }, - new int[] { -3, -7, -9, -11, 2, 6, 8, 10 }, - new int[] { -4, -7, -8, -11, 3, 6, 7, 10 }, - new int[] { -3, -5, -8, -11, 2, 4, 7, 10 }, - new int[] { -2, -6, -8, -10, 1, 5, 7, 9 }, - new int[] { -2, -5, -8, -10, 1, 4, 7, 9 }, - new int[] { -2, -4, -8, -10, 1, 3, 7, 9 }, - new int[] { -2, -5, -7, -10, 1, 4, 6, 9 }, - new int[] { -3, -4, -7, -10, 2, 3, 6, 9 }, - new int[] { -1, -2, -3, -10, 0, 1, 2, 9 }, - new int[] { -4, -6, -8, -9, 3, 5, 7, 8 }, - new int[] { -3, -5, -7, -9, 2, 4, 6, 8 } - }; - - public static byte[] DecodeRgb(ReadOnlySpan<byte> data, int width, int height, int depth, int levels, int layers) - { - ReadOnlySpan<ulong> dataUlong = MemoryMarshal.Cast<byte, ulong>(data); - - int inputOffset = 0; - - byte[] output = new byte[CalculateOutputSize(width, height, depth, levels, layers)]; - - Span<uint> outputUint = MemoryMarshal.Cast<byte, uint>(output); - Span<uint> tile = stackalloc uint[BlockWidth * BlockHeight]; - - int imageBaseOOffs = 0; - - for (int l = 0; l < levels; l++) - { - int wInBlocks = BitUtils.DivRoundUp(width, BlockWidth); - int hInBlocks = BitUtils.DivRoundUp(height, BlockHeight); - - for (int l2 = 0; l2 < layers; l2++) - { - for (int z = 0; z < depth; z++) - { - for (int y = 0; y < hInBlocks; y++) - { - int ty = y * BlockHeight; - int bh = Math.Min(BlockHeight, height - ty); - - for (int x = 0; x < wInBlocks; x++) - { - int tx = x * BlockWidth; - int bw = Math.Min(BlockWidth, width - tx); - - ulong colorBlock = dataUlong[inputOffset++]; - - DecodeBlock(tile, colorBlock); - - for (int py = 0; py < bh; py++) - { - int oOffsBase = imageBaseOOffs + ((ty + py) * width) + tx; - - for (int px = 0; px < bw; px++) - { - int oOffs = oOffsBase + px; - - outputUint[oOffs] = tile[py * BlockWidth + px] | AlphaMask; - } - } - } - } - - imageBaseOOffs += width * height; - } - } - - width = Math.Max(1, width >> 1); - height = Math.Max(1, height >> 1); - depth = Math.Max(1, depth >> 1); - } - - return output; - } - - public static byte[] DecodePta(ReadOnlySpan<byte> data, int width, int height, int depth, int levels, int layers) - { - ReadOnlySpan<ulong> dataUlong = MemoryMarshal.Cast<byte, ulong>(data); - - int inputOffset = 0; - - byte[] output = new byte[CalculateOutputSize(width, height, depth, levels, layers)]; - - Span<uint> outputUint = MemoryMarshal.Cast<byte, uint>(output); - Span<uint> tile = stackalloc uint[BlockWidth * BlockHeight]; - - int imageBaseOOffs = 0; - - for (int l = 0; l < levels; l++) - { - int wInBlocks = BitUtils.DivRoundUp(width, BlockWidth); - int hInBlocks = BitUtils.DivRoundUp(height, BlockHeight); - - for (int l2 = 0; l2 < layers; l2++) - { - for (int z = 0; z < depth; z++) - { - for (int y = 0; y < hInBlocks; y++) - { - int ty = y * BlockHeight; - int bh = Math.Min(BlockHeight, height - ty); - - for (int x = 0; x < wInBlocks; x++) - { - int tx = x * BlockWidth; - int bw = Math.Min(BlockWidth, width - tx); - - ulong colorBlock = dataUlong[inputOffset++]; - - DecodeBlockPta(tile, colorBlock); - - for (int py = 0; py < bh; py++) - { - int oOffsBase = imageBaseOOffs + ((ty + py) * width) + tx; - - tile.Slice(py * BlockWidth, bw).CopyTo(outputUint.Slice(oOffsBase, bw)); - } - } - } - - imageBaseOOffs += width * height; - } - } - - width = Math.Max(1, width >> 1); - height = Math.Max(1, height >> 1); - depth = Math.Max(1, depth >> 1); - } - - return output; - } - - public static byte[] DecodeRgba(ReadOnlySpan<byte> data, int width, int height, int depth, int levels, int layers) - { - ReadOnlySpan<ulong> dataUlong = MemoryMarshal.Cast<byte, ulong>(data); - - int inputOffset = 0; - - byte[] output = new byte[CalculateOutputSize(width, height, depth, levels, layers)]; - - Span<uint> outputUint = MemoryMarshal.Cast<byte, uint>(output); - Span<uint> tile = stackalloc uint[BlockWidth * BlockHeight]; - - int imageBaseOOffs = 0; - - for (int l = 0; l < levels; l++) - { - int wInBlocks = BitUtils.DivRoundUp(width, BlockWidth); - int hInBlocks = BitUtils.DivRoundUp(height, BlockHeight); - - for (int l2 = 0; l2 < layers; l2++) - { - for (int z = 0; z < depth; z++) - { - for (int y = 0; y < hInBlocks; y++) - { - int ty = y * BlockHeight; - int bh = Math.Min(BlockHeight, height - ty); - - for (int x = 0; x < wInBlocks; x++) - { - int tx = x * BlockWidth; - int bw = Math.Min(BlockWidth, width - tx); - - ulong alphaBlock = dataUlong[inputOffset]; - ulong colorBlock = dataUlong[inputOffset + 1]; - - inputOffset += 2; - - DecodeBlock(tile, colorBlock); - - byte alphaBase = (byte)alphaBlock; - int[] alphaTable = _etc2AlphaLut[(alphaBlock >> 8) & 0xf]; - int alphaMultiplier = (int)(alphaBlock >> 12) & 0xf; - ulong alphaIndices = BinaryPrimitives.ReverseEndianness(alphaBlock); - - if (alphaMultiplier != 0) - { - for (int py = 0; py < bh; py++) - { - int oOffsBase = imageBaseOOffs + ((ty + py) * width) + tx; - - for (int px = 0; px < bw; px++) - { - int oOffs = oOffsBase + px; - int alphaIndex = (int)((alphaIndices >> (((px * BlockHeight + py) ^ 0xf) * 3)) & 7); - - byte a = Saturate(alphaBase + alphaTable[alphaIndex] * alphaMultiplier); - - outputUint[oOffs] = tile[py * BlockWidth + px] | ((uint)a << 24); - } - } - } - else - { - uint a = (uint)alphaBase << 24; - - for (int py = 0; py < bh; py++) - { - int oOffsBase = imageBaseOOffs + ((ty + py) * width) + tx; - - for (int px = 0; px < bw; px++) - { - int oOffs = oOffsBase + px; - - outputUint[oOffs] = tile[py * BlockWidth + px] | a; - } - } - } - } - } - - imageBaseOOffs += width * height; - } - } - - width = Math.Max(1, width >> 1); - height = Math.Max(1, height >> 1); - depth = Math.Max(1, depth >> 1); - } - - return output; - } - - private static void DecodeBlock(Span<uint> tile, ulong block) - { - uint blockLow = (uint)(block >> 0); - uint blockHigh = (uint)(block >> 32); - - uint r1, g1, b1; - uint r2, g2, b2; - - bool differentialMode = (blockLow & 0x2000000) != 0; - - if (differentialMode) - { - (r1, g1, b1, r2, g2, b2) = UnpackRgb555DiffEndPoints(blockLow); - - if (r2 > 31) - { - DecodeBlock59T(tile, blockLow, blockHigh); - } - else if (g2 > 31) - { - DecodeBlock58H(tile, blockLow, blockHigh); - } - else if (b2 > 31) - { - DecodeBlock57P(tile, block); - } - else - { - r1 |= r1 >> 5; - g1 |= g1 >> 5; - b1 |= b1 >> 5; - - r2 = (r2 << 3) | (r2 >> 2); - g2 = (g2 << 3) | (g2 >> 2); - b2 = (b2 << 3) | (b2 >> 2); - - DecodeBlockETC1(tile, blockLow, blockHigh, r1, g1, b1, r2, g2, b2); - } - } - else - { - r1 = (blockLow & 0x0000f0) >> 0; - g1 = (blockLow & 0x00f000) >> 8; - b1 = (blockLow & 0xf00000) >> 16; - - r2 = (blockLow & 0x00000f) << 4; - g2 = (blockLow & 0x000f00) >> 4; - b2 = (blockLow & 0x0f0000) >> 12; - - r1 |= r1 >> 4; - g1 |= g1 >> 4; - b1 |= b1 >> 4; - - r2 |= r2 >> 4; - g2 |= g2 >> 4; - b2 |= b2 >> 4; - - DecodeBlockETC1(tile, blockLow, blockHigh, r1, g1, b1, r2, g2, b2); - } - } - - private static void DecodeBlockPta(Span<uint> tile, ulong block) - { - uint blockLow = (uint)(block >> 0); - uint blockHigh = (uint)(block >> 32); - - (uint r1, uint g1, uint b1, uint r2, uint g2, uint b2) = UnpackRgb555DiffEndPoints(blockLow); - - bool fullyOpaque = (blockLow & 0x2000000) != 0; - - if (fullyOpaque) - { - if (r2 > 31) - { - DecodeBlock59T(tile, blockLow, blockHigh); - } - else if (g2 > 31) - { - DecodeBlock58H(tile, blockLow, blockHigh); - } - else if (b2 > 31) - { - DecodeBlock57P(tile, block); - } - else - { - r1 |= r1 >> 5; - g1 |= g1 >> 5; - b1 |= b1 >> 5; - - r2 = (r2 << 3) | (r2 >> 2); - g2 = (g2 << 3) | (g2 >> 2); - b2 = (b2 << 3) | (b2 >> 2); - - DecodeBlockETC1(tile, blockLow, blockHigh, r1, g1, b1, r2, g2, b2); - } - - for (int i = 0; i < tile.Length; i++) - { - tile[i] |= AlphaMask; - } - } - else - { - if (r2 > 31) - { - DecodeBlock59T(tile, blockLow, blockHigh, AlphaMask); - } - else if (g2 > 31) - { - DecodeBlock58H(tile, blockLow, blockHigh, AlphaMask); - } - else if (b2 > 31) - { - DecodeBlock57P(tile, block); - - for (int i = 0; i < tile.Length; i++) - { - tile[i] |= AlphaMask; - } - } - else - { - r1 |= r1 >> 5; - g1 |= g1 >> 5; - b1 |= b1 >> 5; - - r2 = (r2 << 3) | (r2 >> 2); - g2 = (g2 << 3) | (g2 >> 2); - b2 = (b2 << 3) | (b2 >> 2); - - DecodeBlockETC1(tile, blockLow, blockHigh, r1, g1, b1, r2, g2, b2, AlphaMask); - } - } - } - - private static (uint, uint, uint, uint, uint, uint) UnpackRgb555DiffEndPoints(uint blockLow) - { - uint r1 = (blockLow & 0x0000f8) >> 0; - uint g1 = (blockLow & 0x00f800) >> 8; - uint b1 = (blockLow & 0xf80000) >> 16; - - uint r2 = (uint)((sbyte)(r1 >> 3) + ((sbyte)((blockLow & 0x000007) << 5) >> 5)); - uint g2 = (uint)((sbyte)(g1 >> 3) + ((sbyte)((blockLow & 0x000700) >> 3) >> 5)); - uint b2 = (uint)((sbyte)(b1 >> 3) + ((sbyte)((blockLow & 0x070000) >> 11) >> 5)); - - return (r1, g1, b1, r2, g2, b2); - } - - private static void DecodeBlock59T(Span<uint> tile, uint blockLow, uint blockHigh, uint alphaMask = 0) - { - uint r1 = (blockLow & 3) | ((blockLow >> 1) & 0xc); - uint g1 = (blockLow >> 12) & 0xf; - uint b1 = (blockLow >> 8) & 0xf; - - uint r2 = (blockLow >> 20) & 0xf; - uint g2 = (blockLow >> 16) & 0xf; - uint b2 = (blockLow >> 28) & 0xf; - - r1 |= r1 << 4; - g1 |= g1 << 4; - b1 |= b1 << 4; - - r2 |= r2 << 4; - g2 |= g2 << 4; - b2 |= b2 << 4; - - int dist = _etc2Lut[((blockLow >> 24) & 1) | ((blockLow >> 25) & 6)]; - - Span<uint> palette = stackalloc uint[4]; - - palette[0] = Pack(r1, g1, b1); - palette[1] = Pack(r2, g2, b2, dist); - palette[2] = Pack(r2, g2, b2); - palette[3] = Pack(r2, g2, b2, -dist); - - blockHigh = BinaryPrimitives.ReverseEndianness(blockHigh); - - for (int y = 0; y < BlockHeight; y++) - { - for (int x = 0; x < BlockWidth; x++) - { - int offset = (y * 4) + x; - int index = (x * 4) + y; - - int paletteIndex = (int)((blockHigh >> index) & 1) | (int)((blockHigh >> (index + 15)) & 2); - - tile[offset] = palette[paletteIndex]; - - if (alphaMask != 0) - { - if (paletteIndex == 2) - { - tile[offset] = 0; - } - else - { - tile[offset] |= alphaMask; - } - } - } - } - } - - private static void DecodeBlock58H(Span<uint> tile, uint blockLow, uint blockHigh, uint alphaMask = 0) - { - uint r1 = (blockLow >> 3) & 0xf; - uint g1 = ((blockLow << 1) & 0xe) | ((blockLow >> 12) & 1); - uint b1 = ((blockLow >> 23) & 1) | ((blockLow >> 7) & 6) | ((blockLow >> 8) & 8); - - uint r2 = (blockLow >> 19) & 0xf; - uint g2 = ((blockLow >> 31) & 1) | ((blockLow >> 15) & 0xe); - uint b2 = (blockLow >> 27) & 0xf; - - uint rgb1 = Pack4Be(r1, g1, b1); - uint rgb2 = Pack4Be(r2, g2, b2); - - r1 |= r1 << 4; - g1 |= g1 << 4; - b1 |= b1 << 4; - - r2 |= r2 << 4; - g2 |= g2 << 4; - b2 |= b2 << 4; - - int dist = _etc2Lut[(rgb1 >= rgb2 ? 1u : 0u) | ((blockLow >> 23) & 2) | ((blockLow >> 24) & 4)]; - - Span<uint> palette = stackalloc uint[4]; - - palette[0] = Pack(r1, g1, b1, dist); - palette[1] = Pack(r1, g1, b1, -dist); - palette[2] = Pack(r2, g2, b2, dist); - palette[3] = Pack(r2, g2, b2, -dist); - - blockHigh = BinaryPrimitives.ReverseEndianness(blockHigh); - - for (int y = 0; y < BlockHeight; y++) - { - for (int x = 0; x < BlockWidth; x++) - { - int offset = (y * 4) + x; - int index = (x * 4) + y; - - int paletteIndex = (int)((blockHigh >> index) & 1) | (int)((blockHigh >> (index + 15)) & 2); - - tile[offset] = palette[paletteIndex]; - - if (alphaMask != 0) - { - if (paletteIndex == 2) - { - tile[offset] = 0; - } - else - { - tile[offset] |= alphaMask; - } - } - } - } - } - - private static void DecodeBlock57P(Span<uint> tile, ulong block) - { - int r0 = (int)((block >> 1) & 0x3f); - int g0 = (int)(((block >> 9) & 0x3f) | ((block & 1) << 6)); - int b0 = (int)(((block >> 31) & 1) | ((block >> 15) & 6) | ((block >> 16) & 0x18) | ((block >> 3) & 0x20)); - - int rh = (int)(((block >> 24) & 1) | ((block >> 25) & 0x3e)); - int gh = (int)((block >> 33) & 0x7f); - int bh = (int)(((block >> 43) & 0x1f) | ((block >> 27) & 0x20)); - - int rv = (int)(((block >> 53) & 7) | ((block >> 37) & 0x38)); - int gv = (int)(((block >> 62) & 3) | ((block >> 46) & 0x7c)); - int bv = (int)((block >> 56) & 0x3f); - - r0 = (r0 << 2) | (r0 >> 4); - g0 = (g0 << 1) | (g0 >> 6); - b0 = (b0 << 2) | (b0 >> 4); - - rh = (rh << 2) | (rh >> 4); - gh = (gh << 1) | (gh >> 6); - bh = (bh << 2) | (bh >> 4); - - rv = (rv << 2) | (rv >> 4); - gv = (gv << 1) | (gv >> 6); - bv = (bv << 2) | (bv >> 4); - - for (int y = 0; y < BlockHeight; y++) - { - for (int x = 0; x < BlockWidth; x++) - { - int offset = y * BlockWidth + x; - - byte r = Saturate(((x * (rh - r0)) + (y * (rv - r0)) + (r0 * 4) + 2) >> 2); - byte g = Saturate(((x * (gh - g0)) + (y * (gv - g0)) + (g0 * 4) + 2) >> 2); - byte b = Saturate(((x * (bh - b0)) + (y * (bv - b0)) + (b0 * 4) + 2) >> 2); - - tile[offset] = Pack(r, g, b); - } - } - } - - private static void DecodeBlockETC1( - Span<uint> tile, - uint blockLow, - uint blockHigh, - uint r1, - uint g1, - uint b1, - uint r2, - uint g2, - uint b2, - uint alphaMask = 0) - { - int[] table1 = _etc1Lut[(blockLow >> 29) & 7]; - int[] table2 = _etc1Lut[(blockLow >> 26) & 7]; - - bool flip = (blockLow & 0x1000000) != 0; - - if (!flip) - { - for (int y = 0; y < BlockHeight; y++) - { - for (int x = 0; x < BlockWidth / 2; x++) - { - uint color1 = CalculatePixel(r1, g1, b1, x + 0, y, blockHigh, table1, alphaMask); - uint color2 = CalculatePixel(r2, g2, b2, x + 2, y, blockHigh, table2, alphaMask); - - int offset1 = y * BlockWidth + x; - int offset2 = y * BlockWidth + x + 2; - - tile[offset1] = color1; - tile[offset2] = color2; - } - } - } - else - { - for (int y = 0; y < BlockHeight / 2; y++) - { - for (int x = 0; x < BlockWidth; x++) - { - uint color1 = CalculatePixel(r1, g1, b1, x, y + 0, blockHigh, table1, alphaMask); - uint color2 = CalculatePixel(r2, g2, b2, x, y + 2, blockHigh, table2, alphaMask); - - int offset1 = (y * BlockWidth) + x; - int offset2 = ((y + 2) * BlockWidth) + x; - - tile[offset1] = color1; - tile[offset2] = color2; - } - } - } - } - - private static uint CalculatePixel(uint r, uint g, uint b, int x, int y, uint block, int[] table, uint alphaMask) - { - int index = x * BlockHeight + y; - uint msb = block << 1; - uint tableIndex = index < 8 - ? ((block >> (index + 24)) & 1) + ((msb >> (index + 8)) & 2) - : ((block >> (index + 8)) & 1) + ((msb >> (index - 8)) & 2); - - if (alphaMask != 0) - { - if (tableIndex == 0) - { - return Pack(r, g, b) | alphaMask; - } - else if (tableIndex == 2) - { - return 0; - } - else - { - return Pack(r, g, b, table[tableIndex]) | alphaMask; - } - } - - return Pack(r, g, b, table[tableIndex]); - } - - private static uint Pack(uint r, uint g, uint b, int offset) - { - r = Saturate((int)(r + offset)); - g = Saturate((int)(g + offset)); - b = Saturate((int)(b + offset)); - - return Pack(r, g, b); - } - - private static uint Pack(uint r, uint g, uint b) - { - return r | (g << 8) | (b << 16); - } - - private static uint Pack4Be(uint r, uint g, uint b) - { - return (r << 8) | (g << 4) | b; - } - - private static byte Saturate(int value) - { - return value > byte.MaxValue ? byte.MaxValue : value < byte.MinValue ? byte.MinValue : (byte)value; - } - - private static int CalculateOutputSize(int width, int height, int depth, int levels, int layers) - { - int size = 0; - - for (int l = 0; l < levels; l++) - { - size += Math.Max(1, width >> l) * Math.Max(1, height >> l) * Math.Max(1, depth >> l) * layers * 4; - } - - return size; - } - } -} diff --git a/Ryujinx.Graphics.Texture/Encoders/BC7Encoder.cs b/Ryujinx.Graphics.Texture/Encoders/BC7Encoder.cs deleted file mode 100644 index 35d36bce..00000000 --- a/Ryujinx.Graphics.Texture/Encoders/BC7Encoder.cs +++ /dev/null @@ -1,1005 +0,0 @@ -using Ryujinx.Graphics.Texture.Utils; -using System; -using System.Diagnostics; -using System.Numerics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; -using System.Threading.Tasks; - -namespace Ryujinx.Graphics.Texture.Encoders -{ - static class BC7Encoder - { - private const int MinColorVarianceForModeChange = 160; - - public static void Encode(Memory<byte> outputStorage, ReadOnlyMemory<byte> data, int width, int height, EncodeMode mode) - { - int widthInBlocks = (width + 3) / 4; - int heightInBlocks = (height + 3) / 4; - - bool fastMode = (mode & EncodeMode.ModeMask) == EncodeMode.Fast; - - if (mode.HasFlag(EncodeMode.Multithreaded)) - { - Parallel.For(0, heightInBlocks, (yInBlocks) => - { - Span<ulong> output = MemoryMarshal.Cast<byte, ulong>(outputStorage.Span); - int y = yInBlocks * 4; - - for (int xInBlocks = 0; xInBlocks < widthInBlocks; xInBlocks++) - { - int x = xInBlocks * 4; - Block block = CompressBlock(data.Span, x, y, width, height, fastMode); - - int offset = (yInBlocks * widthInBlocks + xInBlocks) * 2; - output[offset] = block.Low; - output[offset + 1] = block.High; - } - }); - } - else - { - Span<ulong> output = MemoryMarshal.Cast<byte, ulong>(outputStorage.Span); - int offset = 0; - - for (int y = 0; y < height; y += 4) - { - for (int x = 0; x < width; x += 4) - { - Block block = CompressBlock(data.Span, x, y, width, height, fastMode); - - output[offset++] = block.Low; - output[offset++] = block.High; - } - } - } - } - - private static readonly int[] _mostFrequentPartitions = new int[] - { - 0, 13, 2, 1, 15, 14, 10, 23 - }; - - private static Block CompressBlock(ReadOnlySpan<byte> data, int x, int y, int width, int height, bool fastMode) - { - int w = Math.Min(4, width - x); - int h = Math.Min(4, height - y); - - var dataUint = MemoryMarshal.Cast<byte, uint>(data); - - int baseOffset = y * width + x; - - Span<uint> tile = stackalloc uint[w * h]; - - for (int ty = 0; ty < h; ty++) - { - int rowOffset = baseOffset + ty * width; - - for (int tx = 0; tx < w; tx++) - { - tile[ty * w + tx] = dataUint[rowOffset + tx]; - } - } - - return fastMode ? EncodeFast(tile, w, h) : EncodeExhaustive(tile, w, h); - } - - private static Block EncodeFast(ReadOnlySpan<uint> tile, int w, int h) - { - (RgbaColor8 minColor, RgbaColor8 maxColor) = BC67Utils.GetMinMaxColors(tile, w, h); - - bool alphaNotOne = minColor.A != 255 || maxColor.A != 255; - int variance = BC67Utils.SquaredDifference(minColor.GetColor32(), maxColor.GetColor32()); - int selectedMode; - int indexMode = 0; - - if (alphaNotOne) - { - bool constantAlpha = minColor.A == maxColor.A; - if (constantAlpha) - { - selectedMode = variance > MinColorVarianceForModeChange ? 7 : 6; - } - else - { - if (variance > MinColorVarianceForModeChange) - { - Span<uint> uniqueRGB = stackalloc uint[16]; - Span<uint> uniqueAlpha = stackalloc uint[16]; - - int uniqueRGBCount = 0; - int uniqueAlphaCount = 0; - - uint rgbMask = new RgbaColor8(255, 255, 255, 0).ToUInt32(); - uint alphaMask = new RgbaColor8(0, 0, 0, 255).ToUInt32(); - - for (int i = 0; i < tile.Length; i++) - { - uint c = tile[i]; - - if (!uniqueRGB.Slice(0, uniqueRGBCount).Contains(c & rgbMask)) - { - uniqueRGB[uniqueRGBCount++] = c & rgbMask; - } - - if (!uniqueAlpha.Slice(0, uniqueAlphaCount).Contains(c & alphaMask)) - { - uniqueAlpha[uniqueAlphaCount++] = c & alphaMask; - } - } - - selectedMode = 4; - indexMode = uniqueRGBCount > uniqueAlphaCount ? 1 : 0; - } - else - { - selectedMode = 5; - } - } - } - else - { - if (variance > MinColorVarianceForModeChange) - { - selectedMode = 1; - } - else - { - selectedMode = 6; - } - } - - int selectedPartition = 0; - - if (selectedMode == 1 || selectedMode == 7) - { - int partitionSelectionLowestError = int.MaxValue; - - for (int i = 0; i < _mostFrequentPartitions.Length; i++) - { - int p = _mostFrequentPartitions[i]; - int error = GetEndPointSelectionErrorFast(tile, 2, p, w, h, partitionSelectionLowestError); - if (error < partitionSelectionLowestError) - { - partitionSelectionLowestError = error; - selectedPartition = p; - } - } - } - - return Encode(selectedMode, selectedPartition, 0, indexMode, fastMode: true, tile, w, h, out _); - } - - private static Block EncodeExhaustive(ReadOnlySpan<uint> tile, int w, int h) - { - Block bestBlock = default; - int lowestError = int.MaxValue; - int lowestErrorSubsets = int.MaxValue; - - for (int m = 0; m < 8; m++) - { - for (int r = 0; r < (m == 4 || m == 5 ? 4 : 1); r++) - { - for (int im = 0; im < (m == 4 ? 2 : 1); im++) - { - for (int p = 0; p < 1 << BC67Tables.BC7ModeInfos[m].PartitionBitCount; p++) - { - Block block = Encode(m, p, r, im, fastMode: false, tile, w, h, out int maxError); - if (maxError < lowestError || (maxError == lowestError && BC67Tables.BC7ModeInfos[m].SubsetCount < lowestErrorSubsets)) - { - lowestError = maxError; - lowestErrorSubsets = BC67Tables.BC7ModeInfos[m].SubsetCount; - bestBlock = block; - } - } - } - } - } - - return bestBlock; - } - - private static Block Encode( - int mode, - int partition, - int rotation, - int indexMode, - bool fastMode, - ReadOnlySpan<uint> tile, - int w, - int h, - out int errorSum) - { - BC7ModeInfo modeInfo = BC67Tables.BC7ModeInfos[mode]; - int subsetCount = modeInfo.SubsetCount; - int partitionBitCount = modeInfo.PartitionBitCount; - int rotationBitCount = modeInfo.RotationBitCount; - int indexModeBitCount = modeInfo.IndexModeBitCount; - int colorDepth = modeInfo.ColorDepth; - int alphaDepth = modeInfo.AlphaDepth; - int pBits = modeInfo.PBits; - int colorIndexBitCount = modeInfo.ColorIndexBitCount; - int alphaIndexBitCount = modeInfo.AlphaIndexBitCount; - bool separateAlphaIndices = alphaIndexBitCount != 0; - - uint alphaMask; - - if (separateAlphaIndices) - { - alphaMask = rotation switch - { - 1 => new RgbaColor8(255, 0, 0, 0).ToUInt32(), - 2 => new RgbaColor8(0, 255, 0, 0).ToUInt32(), - 3 => new RgbaColor8(0, 0, 255, 0).ToUInt32(), - _ => new RgbaColor8(0, 0, 0, 255).ToUInt32() - }; - } - else - { - alphaMask = new RgbaColor8(0, 0, 0, 0).ToUInt32(); - } - - if (indexMode != 0) - { - alphaMask = ~alphaMask; - } - - // - // Select color palette. - // - - Span<uint> endPoints0 = stackalloc uint[subsetCount]; - Span<uint> endPoints1 = stackalloc uint[subsetCount]; - - SelectEndPoints( - tile, - w, - h, - endPoints0, - endPoints1, - subsetCount, - partition, - colorIndexBitCount, - colorDepth, - alphaDepth, - ~alphaMask, - fastMode); - - if (separateAlphaIndices) - { - SelectEndPoints( - tile, - w, - h, - endPoints0, - endPoints1, - subsetCount, - partition, - alphaIndexBitCount, - colorDepth, - alphaDepth, - alphaMask, - fastMode); - } - - Span<int> pBitValues = stackalloc int[pBits]; - - for (int i = 0; i < pBits; i++) - { - int pBit; - - if (pBits == subsetCount) - { - pBit = GetPBit(endPoints0[i], endPoints1[i], colorDepth, alphaDepth); - } - else - { - int subset = i >> 1; - uint color = (i & 1) == 0 ? endPoints0[subset] : endPoints1[subset]; - pBit = GetPBit(color, colorDepth, alphaDepth); - } - - pBitValues[i] = pBit; - } - - int colorIndexCount = 1 << colorIndexBitCount; - int alphaIndexCount = 1 << alphaIndexBitCount; - - Span<byte> colorIndices = stackalloc byte[16]; - Span<byte> alphaIndices = stackalloc byte[16]; - - errorSum = BC67Utils.SelectIndices( - tile, - w, - h, - endPoints0, - endPoints1, - pBitValues, - colorIndices, - subsetCount, - partition, - colorIndexBitCount, - colorIndexCount, - colorDepth, - alphaDepth, - pBits, - alphaMask); - - if (separateAlphaIndices) - { - errorSum += BC67Utils.SelectIndices( - tile, - w, - h, - endPoints0, - endPoints1, - pBitValues, - alphaIndices, - subsetCount, - partition, - alphaIndexBitCount, - alphaIndexCount, - colorDepth, - alphaDepth, - pBits, - ~alphaMask); - } - - Span<bool> colorSwapSubset = stackalloc bool[3]; - - for (int i = 0; i < 3; i++) - { - colorSwapSubset[i] = colorIndices[BC67Tables.FixUpIndices[subsetCount - 1][partition][i]] >= (colorIndexCount >> 1); - } - - bool alphaSwapSubset = alphaIndices[0] >= (alphaIndexCount >> 1); - - Block block = new Block(); - - int offset = 0; - - block.Encode(1UL << mode, ref offset, mode + 1); - block.Encode((ulong)partition, ref offset, partitionBitCount); - block.Encode((ulong)rotation, ref offset, rotationBitCount); - block.Encode((ulong)indexMode, ref offset, indexModeBitCount); - - for (int comp = 0; comp < 3; comp++) - { - int rotatedComp = comp; - - if (((comp + 1) & 3) == rotation) - { - rotatedComp = 3; - } - - for (int subset = 0; subset < subsetCount; subset++) - { - RgbaColor8 color0 = RgbaColor8.FromUInt32(endPoints0[subset]); - RgbaColor8 color1 = RgbaColor8.FromUInt32(endPoints1[subset]); - - int pBit0 = -1, pBit1 = -1; - - if (pBits == subsetCount) - { - pBit0 = pBit1 = pBitValues[subset]; - } - else if (pBits != 0) - { - pBit0 = pBitValues[subset * 2]; - pBit1 = pBitValues[subset * 2 + 1]; - } - - if (indexMode == 0 ? colorSwapSubset[subset] : alphaSwapSubset) - { - block.Encode(BC67Utils.QuantizeComponent(color1.GetComponent(rotatedComp), colorDepth, pBit1), ref offset, colorDepth); - block.Encode(BC67Utils.QuantizeComponent(color0.GetComponent(rotatedComp), colorDepth, pBit0), ref offset, colorDepth); - } - else - { - block.Encode(BC67Utils.QuantizeComponent(color0.GetComponent(rotatedComp), colorDepth, pBit0), ref offset, colorDepth); - block.Encode(BC67Utils.QuantizeComponent(color1.GetComponent(rotatedComp), colorDepth, pBit1), ref offset, colorDepth); - } - } - } - - if (alphaDepth != 0) - { - int rotatedComp = (rotation - 1) & 3; - - for (int subset = 0; subset < subsetCount; subset++) - { - RgbaColor8 color0 = RgbaColor8.FromUInt32(endPoints0[subset]); - RgbaColor8 color1 = RgbaColor8.FromUInt32(endPoints1[subset]); - - int pBit0 = -1, pBit1 = -1; - - if (pBits == subsetCount) - { - pBit0 = pBit1 = pBitValues[subset]; - } - else if (pBits != 0) - { - pBit0 = pBitValues[subset * 2]; - pBit1 = pBitValues[subset * 2 + 1]; - } - - if (separateAlphaIndices && indexMode == 0 ? alphaSwapSubset : colorSwapSubset[subset]) - { - block.Encode(BC67Utils.QuantizeComponent(color1.GetComponent(rotatedComp), alphaDepth, pBit1), ref offset, alphaDepth); - block.Encode(BC67Utils.QuantizeComponent(color0.GetComponent(rotatedComp), alphaDepth, pBit0), ref offset, alphaDepth); - } - else - { - block.Encode(BC67Utils.QuantizeComponent(color0.GetComponent(rotatedComp), alphaDepth, pBit0), ref offset, alphaDepth); - block.Encode(BC67Utils.QuantizeComponent(color1.GetComponent(rotatedComp), alphaDepth, pBit1), ref offset, alphaDepth); - } - } - } - - for (int i = 0; i < pBits; i++) - { - block.Encode((ulong)pBitValues[i], ref offset, 1); - } - - byte[] fixUpTable = BC67Tables.FixUpIndices[subsetCount - 1][partition]; - - for (int i = 0; i < 16; i++) - { - int subset = BC67Tables.PartitionTable[subsetCount - 1][partition][i]; - byte index = colorIndices[i]; - - if (colorSwapSubset[subset]) - { - index = (byte)(index ^ (colorIndexCount - 1)); - } - - int finalIndexBitCount = i == fixUpTable[subset] ? colorIndexBitCount - 1 : colorIndexBitCount; - - Debug.Assert(index < (1 << finalIndexBitCount)); - - block.Encode(index, ref offset, finalIndexBitCount); - } - - if (separateAlphaIndices) - { - for (int i = 0; i < 16; i++) - { - byte index = alphaIndices[i]; - - if (alphaSwapSubset) - { - index = (byte)(index ^ (alphaIndexCount - 1)); - } - - int finalIndexBitCount = i == 0 ? alphaIndexBitCount - 1 : alphaIndexBitCount; - - Debug.Assert(index < (1 << finalIndexBitCount)); - - block.Encode(index, ref offset, finalIndexBitCount); - } - } - - return block; - } - - private static unsafe int GetEndPointSelectionErrorFast(ReadOnlySpan<uint> tile, int subsetCount, int partition, int w, int h, int maxError) - { - byte[] partitionTable = BC67Tables.PartitionTable[subsetCount - 1][partition]; - - Span<RgbaColor8> minColors = stackalloc RgbaColor8[subsetCount]; - Span<RgbaColor8> maxColors = stackalloc RgbaColor8[subsetCount]; - - BC67Utils.GetMinMaxColors(partitionTable, tile, w, h, minColors, maxColors, subsetCount); - - Span<uint> endPoints0 = stackalloc uint[subsetCount]; - Span<uint> endPoints1 = stackalloc uint[subsetCount]; - - SelectEndPointsFast(partitionTable, tile, w, h, subsetCount, minColors, maxColors, endPoints0, endPoints1, uint.MaxValue); - - Span<RgbaColor32> palette = stackalloc RgbaColor32[8]; - - int errorSum = 0; - - for (int subset = 0; subset < subsetCount; subset++) - { - RgbaColor32 blockDir = maxColors[subset].GetColor32() - minColors[subset].GetColor32(); - int sum = blockDir.R + blockDir.G + blockDir.B + blockDir.A; - if (sum != 0) - { - blockDir = (blockDir << 6) / new RgbaColor32(sum); - } - - uint c0 = endPoints0[subset]; - uint c1 = endPoints1[subset]; - - int pBit0 = GetPBit(c0, 6, 0); - int pBit1 = GetPBit(c1, 6, 0); - - c0 = BC67Utils.Quantize(RgbaColor8.FromUInt32(c0), 6, 0, pBit0).ToUInt32(); - c1 = BC67Utils.Quantize(RgbaColor8.FromUInt32(c1), 6, 0, pBit1).ToUInt32(); - - if (Sse41.IsSupported) - { - Vector128<byte> c0Rep = Vector128.Create(c0).AsByte(); - Vector128<byte> c1Rep = Vector128.Create(c1).AsByte(); - - Vector128<byte> c0c1 = Sse2.UnpackLow(c0Rep, c1Rep); - - Vector128<byte> rWeights; - Vector128<byte> lWeights; - - fixed (byte* pWeights = BC67Tables.Weights[1], pInvWeights = BC67Tables.InverseWeights[1]) - { - rWeights = Sse2.LoadScalarVector128((ulong*)pWeights).AsByte(); - lWeights = Sse2.LoadScalarVector128((ulong*)pInvWeights).AsByte(); - } - - Vector128<byte> iWeights = Sse2.UnpackLow(rWeights, lWeights); - Vector128<byte> iWeights01 = Sse2.UnpackLow(iWeights.AsInt16(), iWeights.AsInt16()).AsByte(); - Vector128<byte> iWeights23 = Sse2.UnpackHigh(iWeights.AsInt16(), iWeights.AsInt16()).AsByte(); - Vector128<byte> iWeights0 = Sse2.UnpackLow(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte(); - Vector128<byte> iWeights1 = Sse2.UnpackHigh(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte(); - Vector128<byte> iWeights2 = Sse2.UnpackLow(iWeights23.AsInt16(), iWeights23.AsInt16()).AsByte(); - Vector128<byte> iWeights3 = Sse2.UnpackHigh(iWeights23.AsInt16(), iWeights23.AsInt16()).AsByte(); - - static Vector128<short> ShiftRoundToNearest(Vector128<short> x) - { - return Sse2.ShiftRightLogical(Sse2.Add(x, Vector128.Create((short)32)), 6); - } - - Vector128<short> pal0 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights0.AsSByte())); - Vector128<short> pal1 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights1.AsSByte())); - Vector128<short> pal2 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights2.AsSByte())); - Vector128<short> pal3 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights3.AsSByte())); - - for (int i = 0; i < tile.Length; i++) - { - if (partitionTable[i] != subset) - { - continue; - } - - uint c = tile[i]; - - Vector128<short> color = Sse41.ConvertToVector128Int16(Vector128.Create(c).AsByte()); - - Vector128<short> delta0 = Sse2.Subtract(color, pal0); - Vector128<short> delta1 = Sse2.Subtract(color, pal1); - Vector128<short> delta2 = Sse2.Subtract(color, pal2); - Vector128<short> delta3 = Sse2.Subtract(color, pal3); - - Vector128<int> deltaSum0 = Sse2.MultiplyAddAdjacent(delta0, delta0); - Vector128<int> deltaSum1 = Sse2.MultiplyAddAdjacent(delta1, delta1); - Vector128<int> deltaSum2 = Sse2.MultiplyAddAdjacent(delta2, delta2); - Vector128<int> deltaSum3 = Sse2.MultiplyAddAdjacent(delta3, delta3); - - Vector128<int> deltaSum01 = Ssse3.HorizontalAdd(deltaSum0, deltaSum1); - Vector128<int> deltaSum23 = Ssse3.HorizontalAdd(deltaSum2, deltaSum3); - - Vector128<ushort> delta = Sse41.PackUnsignedSaturate(deltaSum01, deltaSum23); - - Vector128<ushort> min = Sse41.MinHorizontal(delta); - - errorSum += min.GetElement(0); - } - } - else - { - RgbaColor32 e032 = RgbaColor8.FromUInt32(c0).GetColor32(); - RgbaColor32 e132 = RgbaColor8.FromUInt32(c1).GetColor32(); - - palette[0] = e032; - palette[palette.Length - 1] = e132; - - for (int i = 1; i < palette.Length - 1; i++) - { - palette[i] = BC67Utils.Interpolate(e032, e132, i, 3); - } - - for (int i = 0; i < tile.Length; i++) - { - if (partitionTable[i] != subset) - { - continue; - } - - uint c = tile[i]; - RgbaColor32 color = Unsafe.As<uint, RgbaColor8>(ref c).GetColor32(); - - int bestMatchScore = int.MaxValue; - - for (int j = 0; j < palette.Length; j++) - { - int score = BC67Utils.SquaredDifference(color, palette[j]); - - if (score < bestMatchScore) - { - bestMatchScore = score; - } - } - - errorSum += bestMatchScore; - } - } - - // No point in continuing if we are already above maximum. - if (errorSum >= maxError) - { - return int.MaxValue; - } - } - - return errorSum; - } - - private static void SelectEndPoints( - ReadOnlySpan<uint> tile, - int w, - int h, - Span<uint> endPoints0, - Span<uint> endPoints1, - int subsetCount, - int partition, - int indexBitCount, - int colorDepth, - int alphaDepth, - uint writeMask, - bool fastMode) - { - byte[] partitionTable = BC67Tables.PartitionTable[subsetCount - 1][partition]; - - Span<RgbaColor8> minColors = stackalloc RgbaColor8[subsetCount]; - Span<RgbaColor8> maxColors = stackalloc RgbaColor8[subsetCount]; - - BC67Utils.GetMinMaxColors(partitionTable, tile, w, h, minColors, maxColors, subsetCount); - - uint inverseMask = ~writeMask; - - for (int i = 0; i < subsetCount; i++) - { - Unsafe.As<RgbaColor8, uint>(ref minColors[i]) |= inverseMask; - Unsafe.As<RgbaColor8, uint>(ref maxColors[i]) |= inverseMask; - } - - if (fastMode) - { - SelectEndPointsFast(partitionTable, tile, w, h, subsetCount, minColors, maxColors, endPoints0, endPoints1, writeMask); - } - else - { - Span<RgbaColor8> colors = stackalloc RgbaColor8[subsetCount * 16]; - Span<byte> counts = stackalloc byte[subsetCount]; - - int i = 0; - for (int ty = 0; ty < h; ty++) - { - for (int tx = 0; tx < w; tx++) - { - int subset = partitionTable[ty * 4 + tx]; - RgbaColor8 color = RgbaColor8.FromUInt32(tile[i++] | inverseMask); - - static void AddIfNew(Span<RgbaColor8> values, RgbaColor8 value, int subset, ref byte count) - { - for (int i = 0; i < count; i++) - { - if (values[subset * 16 + i] == value) - { - return; - } - } - - values[subset * 16 + count++] = value; - } - - AddIfNew(colors, color, subset, ref counts[subset]); - } - } - - for (int subset = 0; subset < subsetCount; subset++) - { - int offset = subset * 16; - - RgbaColor8 minColor = minColors[subset]; - RgbaColor8 maxColor = maxColors[subset]; - - ReadOnlySpan<RgbaColor8> subsetColors = colors.Slice(offset, counts[subset]); - - (RgbaColor8 e0, RgbaColor8 e1) = SelectEndPoints(subsetColors, minColor, maxColor, indexBitCount, colorDepth, alphaDepth, inverseMask); - - endPoints0[subset] = (endPoints0[subset] & inverseMask) | (e0.ToUInt32() & writeMask); - endPoints1[subset] = (endPoints1[subset] & inverseMask) | (e1.ToUInt32() & writeMask); - } - } - } - - private static unsafe void SelectEndPointsFast( - ReadOnlySpan<byte> partitionTable, - ReadOnlySpan<uint> tile, - int w, - int h, - int subsetCount, - ReadOnlySpan<RgbaColor8> minColors, - ReadOnlySpan<RgbaColor8> maxColors, - Span<uint> endPoints0, - Span<uint> endPoints1, - uint writeMask) - { - uint inverseMask = ~writeMask; - - if (Sse41.IsSupported && w == 4 && h == 4) - { - Vector128<byte> row0, row1, row2, row3; - Vector128<short> ones = Vector128<short>.AllBitsSet; - - fixed (uint* pTile = tile) - { - row0 = Sse2.LoadVector128(pTile).AsByte(); - row1 = Sse2.LoadVector128(pTile + 4).AsByte(); - row2 = Sse2.LoadVector128(pTile + 8).AsByte(); - row3 = Sse2.LoadVector128(pTile + 12).AsByte(); - } - - Vector128<byte> partitionMask; - - fixed (byte* pPartitionTable = partitionTable) - { - partitionMask = Sse2.LoadVector128(pPartitionTable); - } - - for (int subset = 0; subset < subsetCount; subset++) - { - RgbaColor32 blockDir = maxColors[subset].GetColor32() - minColors[subset].GetColor32(); - int sum = blockDir.R + blockDir.G + blockDir.B + blockDir.A; - if (sum != 0) - { - blockDir = (blockDir << 6) / new RgbaColor32(sum); - } - - Vector128<byte> bd = Vector128.Create(blockDir.GetColor8().ToUInt32()).AsByte(); - - Vector128<short> delta0 = Ssse3.MultiplyAddAdjacent(row0, bd.AsSByte()); - Vector128<short> delta1 = Ssse3.MultiplyAddAdjacent(row1, bd.AsSByte()); - Vector128<short> delta2 = Ssse3.MultiplyAddAdjacent(row2, bd.AsSByte()); - Vector128<short> delta3 = Ssse3.MultiplyAddAdjacent(row3, bd.AsSByte()); - - Vector128<short> delta01 = Ssse3.HorizontalAdd(delta0, delta1); - Vector128<short> delta23 = Ssse3.HorizontalAdd(delta2, delta3); - - Vector128<byte> subsetMask = Sse2.Xor(Sse2.CompareEqual(partitionMask, Vector128.Create((byte)subset)), ones.AsByte()); - - Vector128<short> subsetMask01 = Sse2.UnpackLow(subsetMask, subsetMask).AsInt16(); - Vector128<short> subsetMask23 = Sse2.UnpackHigh(subsetMask, subsetMask).AsInt16(); - - Vector128<ushort> min01 = Sse41.MinHorizontal(Sse2.Or(delta01, subsetMask01).AsUInt16()); - Vector128<ushort> min23 = Sse41.MinHorizontal(Sse2.Or(delta23, subsetMask23).AsUInt16()); - Vector128<ushort> max01 = Sse41.MinHorizontal(Sse2.Xor(Sse2.AndNot(subsetMask01, delta01), ones).AsUInt16()); - Vector128<ushort> max23 = Sse41.MinHorizontal(Sse2.Xor(Sse2.AndNot(subsetMask23, delta23), ones).AsUInt16()); - - uint minPos01 = min01.AsUInt32().GetElement(0); - uint minPos23 = min23.AsUInt32().GetElement(0); - uint maxPos01 = max01.AsUInt32().GetElement(0); - uint maxPos23 = max23.AsUInt32().GetElement(0); - - uint minDistColor = (ushort)minPos23 < (ushort)minPos01 - ? tile[(int)(minPos23 >> 16) + 8] - : tile[(int)(minPos01 >> 16)]; - - // Note that we calculate the maximum as the minimum of the inverse, so less here is actually greater. - uint maxDistColor = (ushort)maxPos23 < (ushort)maxPos01 - ? tile[(int)(maxPos23 >> 16) + 8] - : tile[(int)(maxPos01 >> 16)]; - - endPoints0[subset] = (endPoints0[subset] & inverseMask) | (minDistColor & writeMask); - endPoints1[subset] = (endPoints1[subset] & inverseMask) | (maxDistColor & writeMask); - } - } - else - { - for (int subset = 0; subset < subsetCount; subset++) - { - RgbaColor32 blockDir = maxColors[subset].GetColor32() - minColors[subset].GetColor32(); - blockDir = RgbaColor32.DivideGuarded(blockDir << 6, new RgbaColor32(blockDir.R + blockDir.G + blockDir.B + blockDir.A), 0); - - int minDist = int.MaxValue; - int maxDist = int.MinValue; - - RgbaColor8 minDistColor = default; - RgbaColor8 maxDistColor = default; - - int i = 0; - for (int ty = 0; ty < h; ty++) - { - for (int tx = 0; tx < w; tx++, i++) - { - if (partitionTable[ty * 4 + tx] != subset) - { - continue; - } - - RgbaColor8 color = RgbaColor8.FromUInt32(tile[i]); - int dist = RgbaColor32.Dot(color.GetColor32(), blockDir); - - if (minDist > dist) - { - minDist = dist; - minDistColor = color; - } - - if (maxDist < dist) - { - maxDist = dist; - maxDistColor = color; - } - } - } - - endPoints0[subset] = (endPoints0[subset] & inverseMask) | (minDistColor.ToUInt32() & writeMask); - endPoints1[subset] = (endPoints1[subset] & inverseMask) | (maxDistColor.ToUInt32() & writeMask); - } - } - } - - private static (RgbaColor8, RgbaColor8) SelectEndPoints( - ReadOnlySpan<RgbaColor8> values, - RgbaColor8 minValue, - RgbaColor8 maxValue, - int indexBitCount, - int colorDepth, - int alphaDepth, - uint alphaMask) - { - int n = values.Length; - int numInterpolatedColors = 1 << indexBitCount; - int numInterpolatedColorsMinus1 = numInterpolatedColors - 1; - - if (n == 0) - { - return (default, default); - } - - minValue = BC67Utils.Quantize(minValue, colorDepth, alphaDepth); - maxValue = BC67Utils.Quantize(maxValue, colorDepth, alphaDepth); - - RgbaColor32 blockDir = maxValue.GetColor32() - minValue.GetColor32(); - blockDir = RgbaColor32.DivideGuarded(blockDir << 6, new RgbaColor32(blockDir.R + blockDir.G + blockDir.B + blockDir.A), 0); - - int minDist = int.MaxValue; - int maxDist = 0; - - for (int i = 0; i < values.Length; i++) - { - RgbaColor8 color = values[i]; - int dist = RgbaColor32.Dot(BC67Utils.Quantize(color, colorDepth, alphaDepth).GetColor32(), blockDir); - - if (minDist >= dist) - { - minDist = dist; - } - - if (maxDist <= dist) - { - maxDist = dist; - } - } - - Span<RgbaColor8> palette = stackalloc RgbaColor8[numInterpolatedColors]; - - int distRange = Math.Max(1, maxDist - minDist); - - RgbaColor32 nV = new RgbaColor32(n); - - int bestErrorSum = int.MaxValue; - RgbaColor8 bestE0 = default; - RgbaColor8 bestE1 = default; - - Span<int> indices = stackalloc int[n]; - Span<RgbaColor32> colors = stackalloc RgbaColor32[n]; - - for (int maxIndex = numInterpolatedColorsMinus1; maxIndex >= 1; maxIndex--) - { - int sumX = 0; - int sumXX = 0; - int sumXXIncrement = 0; - - for (int i = 0; i < values.Length; i++) - { - RgbaColor32 color = values[i].GetColor32(); - - int dist = RgbaColor32.Dot(color, blockDir); - - int normalizedValue = ((dist - minDist) << 6) / distRange; - int texelIndex = (normalizedValue * maxIndex + 32) >> 6; - - indices[i] = texelIndex; - colors[i] = color; - - sumX += texelIndex; - sumXX += texelIndex * texelIndex; - sumXXIncrement += 1 + texelIndex * 2; - } - - for (int start = 0; start < numInterpolatedColors - maxIndex; start++) - { - RgbaColor32 sumY = new RgbaColor32(0); - RgbaColor32 sumXY = new RgbaColor32(0); - - for (int i = 0; i < indices.Length; i++) - { - RgbaColor32 y = colors[i]; - - sumY += y; - sumXY += new RgbaColor32(start + indices[i]) * y; - } - - RgbaColor32 sumXV = new RgbaColor32(sumX); - RgbaColor32 sumXXV = new RgbaColor32(sumXX); - RgbaColor32 m = RgbaColor32.DivideGuarded((nV * sumXY - sumXV * sumY) << 6, nV * sumXXV - sumXV * sumXV, 0); - RgbaColor32 b = ((sumY << 6) - m * sumXV) / nV; - - RgbaColor8 candidateE0 = (b >> 6).GetColor8(); - RgbaColor8 candidateE1 = ((b + m * new RgbaColor32(numInterpolatedColorsMinus1)) >> 6).GetColor8(); - - int pBit0 = GetPBit(candidateE0.ToUInt32(), colorDepth, alphaDepth); - int pBit1 = GetPBit(candidateE1.ToUInt32(), colorDepth, alphaDepth); - - int errorSum = BC67Utils.SelectIndices( - MemoryMarshal.Cast<RgbaColor8, uint>(values), - candidateE0.ToUInt32(), - candidateE1.ToUInt32(), - pBit0, - pBit1, - indexBitCount, - numInterpolatedColors, - colorDepth, - alphaDepth, - alphaMask); - - if (errorSum <= bestErrorSum) - { - bestErrorSum = errorSum; - bestE0 = candidateE0; - bestE1 = candidateE1; - } - - sumX += n; - sumXX += sumXXIncrement; - sumXXIncrement += 2 * n; - } - } - - return (bestE0, bestE1); - } - - private static int GetPBit(uint color, int colorDepth, int alphaDepth) - { - uint mask = 0x808080u >> colorDepth; - - if (alphaDepth != 0) - { - // If alpha is 0, let's assume the color information is not too important and prefer - // to preserve alpha instead. - if ((color >> 24) == 0) - { - return 0; - } - - mask |= 0x80000000u >> alphaDepth; - } - - color &= 0x7f7f7f7fu; - color += mask >> 1; - - int onesCount = BitOperations.PopCount(color & mask); - return onesCount >= 2 ? 1 : 0; - } - - private static int GetPBit(uint c0, uint c1, int colorDepth, int alphaDepth) - { - // Giving preference to the first endpoint yields better results, - // might be a side effect of the endpoint selection algorithm? - return GetPBit(c0, colorDepth, alphaDepth); - } - } -} diff --git a/Ryujinx.Graphics.Texture/Encoders/EncodeMode.cs b/Ryujinx.Graphics.Texture/Encoders/EncodeMode.cs deleted file mode 100644 index 5734d301..00000000 --- a/Ryujinx.Graphics.Texture/Encoders/EncodeMode.cs +++ /dev/null @@ -1,10 +0,0 @@ -namespace Ryujinx.Graphics.Texture.Encoders -{ - enum EncodeMode - { - Fast, - Exhaustive, - ModeMask = 0xff, - Multithreaded = 1 << 8 - } -} diff --git a/Ryujinx.Graphics.Texture/LayoutConverter.cs b/Ryujinx.Graphics.Texture/LayoutConverter.cs deleted file mode 100644 index 09eaf300..00000000 --- a/Ryujinx.Graphics.Texture/LayoutConverter.cs +++ /dev/null @@ -1,591 +0,0 @@ -using Ryujinx.Common; -using System; -using System.Runtime.Intrinsics; -using static Ryujinx.Graphics.Texture.BlockLinearConstants; - -namespace Ryujinx.Graphics.Texture -{ - public static class LayoutConverter - { - public const int HostStrideAlignment = 4; - - public static void ConvertBlockLinearToLinear( - Span<byte> dst, - int width, - int height, - int stride, - int bytesPerPixel, - int gobBlocksInY, - ReadOnlySpan<byte> data) - { - int gobHeight = gobBlocksInY * GobHeight; - - int strideTrunc = BitUtils.AlignDown(width * bytesPerPixel, 16); - int strideTrunc64 = BitUtils.AlignDown(width * bytesPerPixel, 64); - - int xStart = strideTrunc / bytesPerPixel; - - int outStrideGap = stride - width * bytesPerPixel; - - int alignment = GobStride / bytesPerPixel; - - int wAligned = BitUtils.AlignUp(width, alignment); - - BlockLinearLayout layoutConverter = new BlockLinearLayout(wAligned, height, gobBlocksInY, 1, bytesPerPixel); - - unsafe bool Convert<T>(Span<byte> output, ReadOnlySpan<byte> data) where T : unmanaged - { - fixed (byte* outputPtr = output, dataPtr = data) - { - byte* outPtr = outputPtr; - - for (int y = 0; y < height; y++) - { - layoutConverter.SetY(y); - - for (int x = 0; x < strideTrunc64; x += 64, outPtr += 64) - { - byte* offset = dataPtr + layoutConverter.GetOffsetWithLineOffset64(x); - byte* offset2 = offset + 0x20; - byte* offset3 = offset + 0x100; - byte* offset4 = offset + 0x120; - - Vector128<byte> value = *(Vector128<byte>*)offset; - Vector128<byte> value2 = *(Vector128<byte>*)offset2; - Vector128<byte> value3 = *(Vector128<byte>*)offset3; - Vector128<byte> value4 = *(Vector128<byte>*)offset4; - - *(Vector128<byte>*)outPtr = value; - *(Vector128<byte>*)(outPtr + 16) = value2; - *(Vector128<byte>*)(outPtr + 32) = value3; - *(Vector128<byte>*)(outPtr + 48) = value4; - } - - for (int x = strideTrunc64; x < strideTrunc; x += 16, outPtr += 16) - { - byte* offset = dataPtr + layoutConverter.GetOffsetWithLineOffset16(x); - - *(Vector128<byte>*)outPtr = *(Vector128<byte>*)offset; - } - - for (int x = xStart; x < width; x++, outPtr += bytesPerPixel) - { - byte* offset = dataPtr + layoutConverter.GetOffset(x); - - *(T*)outPtr = *(T*)offset; - } - - outPtr += outStrideGap; - } - } - return true; - } - - bool _ = bytesPerPixel switch - { - 1 => Convert<byte>(dst, data), - 2 => Convert<ushort>(dst, data), - 4 => Convert<uint>(dst, data), - 8 => Convert<ulong>(dst, data), - 12 => Convert<Bpp12Pixel>(dst, data), - 16 => Convert<Vector128<byte>>(dst, data), - _ => throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format.") - }; - } - - public static byte[] ConvertBlockLinearToLinear( - int width, - int height, - int depth, - int sliceDepth, - int levels, - int layers, - int blockWidth, - int blockHeight, - int bytesPerPixel, - int gobBlocksInY, - int gobBlocksInZ, - int gobBlocksInTileX, - SizeInfo sizeInfo, - ReadOnlySpan<byte> data) - { - int outSize = GetTextureSize( - width, - height, - sliceDepth, - levels, - layers, - blockWidth, - blockHeight, - bytesPerPixel); - - byte[] output = new byte[outSize]; - - int outOffs = 0; - - int mipGobBlocksInY = gobBlocksInY; - int mipGobBlocksInZ = gobBlocksInZ; - - int gobWidth = (GobStride / bytesPerPixel) * gobBlocksInTileX; - int gobHeight = gobBlocksInY * GobHeight; - - for (int level = 0; level < levels; level++) - { - int w = Math.Max(1, width >> level); - int h = Math.Max(1, height >> level); - int d = Math.Max(1, depth >> level); - - w = BitUtils.DivRoundUp(w, blockWidth); - h = BitUtils.DivRoundUp(h, blockHeight); - - while (h <= (mipGobBlocksInY >> 1) * GobHeight && mipGobBlocksInY != 1) - { - mipGobBlocksInY >>= 1; - } - - while (d <= (mipGobBlocksInZ >> 1) && mipGobBlocksInZ != 1) - { - mipGobBlocksInZ >>= 1; - } - - int strideTrunc = BitUtils.AlignDown(w * bytesPerPixel, 16); - int strideTrunc64 = BitUtils.AlignDown(w * bytesPerPixel, 64); - - int xStart = strideTrunc / bytesPerPixel; - - int stride = BitUtils.AlignUp(w * bytesPerPixel, HostStrideAlignment); - - int outStrideGap = stride - w * bytesPerPixel; - - int alignment = gobWidth; - - if (d < gobBlocksInZ || w <= gobWidth || h <= gobHeight) - { - alignment = GobStride / bytesPerPixel; - } - - int wAligned = BitUtils.AlignUp(w, alignment); - - BlockLinearLayout layoutConverter = new BlockLinearLayout( - wAligned, - h, - mipGobBlocksInY, - mipGobBlocksInZ, - bytesPerPixel); - - int sd = Math.Max(1, sliceDepth >> level); - - unsafe bool Convert<T>(Span<byte> output, ReadOnlySpan<byte> data) where T : unmanaged - { - fixed (byte* outputPtr = output, dataPtr = data) - { - byte* outPtr = outputPtr + outOffs; - for (int layer = 0; layer < layers; layer++) - { - byte* inBaseOffset = dataPtr + (layer * sizeInfo.LayerSize + sizeInfo.GetMipOffset(level)); - - for (int z = 0; z < sd; z++) - { - layoutConverter.SetZ(z); - for (int y = 0; y < h; y++) - { - layoutConverter.SetY(y); - - for (int x = 0; x < strideTrunc64; x += 64, outPtr += 64) - { - byte* offset = inBaseOffset + layoutConverter.GetOffsetWithLineOffset64(x); - byte* offset2 = offset + 0x20; - byte* offset3 = offset + 0x100; - byte* offset4 = offset + 0x120; - - Vector128<byte> value = *(Vector128<byte>*)offset; - Vector128<byte> value2 = *(Vector128<byte>*)offset2; - Vector128<byte> value3 = *(Vector128<byte>*)offset3; - Vector128<byte> value4 = *(Vector128<byte>*)offset4; - - *(Vector128<byte>*)outPtr = value; - *(Vector128<byte>*)(outPtr + 16) = value2; - *(Vector128<byte>*)(outPtr + 32) = value3; - *(Vector128<byte>*)(outPtr + 48) = value4; - } - - for (int x = strideTrunc64; x < strideTrunc; x += 16, outPtr += 16) - { - byte* offset = inBaseOffset + layoutConverter.GetOffsetWithLineOffset16(x); - - *(Vector128<byte>*)outPtr = *(Vector128<byte>*)offset; - } - - for (int x = xStart; x < w; x++, outPtr += bytesPerPixel) - { - byte* offset = inBaseOffset + layoutConverter.GetOffset(x); - - *(T*)outPtr = *(T*)offset; - } - - outPtr += outStrideGap; - } - } - } - outOffs += stride * h * d * layers; - } - return true; - } - - bool _ = bytesPerPixel switch - { - 1 => Convert<byte>(output, data), - 2 => Convert<ushort>(output, data), - 4 => Convert<uint>(output, data), - 8 => Convert<ulong>(output, data), - 12 => Convert<Bpp12Pixel>(output, data), - 16 => Convert<Vector128<byte>>(output, data), - _ => throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format.") - }; - } - return output; - } - - public static byte[] ConvertLinearStridedToLinear( - int width, - int height, - int blockWidth, - int blockHeight, - int lineSize, - int stride, - int bytesPerPixel, - ReadOnlySpan<byte> data) - { - int w = BitUtils.DivRoundUp(width, blockWidth); - int h = BitUtils.DivRoundUp(height, blockHeight); - - int outStride = BitUtils.AlignUp(w * bytesPerPixel, HostStrideAlignment); - lineSize = Math.Min(lineSize, outStride); - - byte[] output = new byte[h * outStride]; - Span<byte> outSpan = output; - - int outOffs = 0; - int inOffs = 0; - - for (int y = 0; y < h; y++) - { - data.Slice(inOffs, lineSize).CopyTo(outSpan.Slice(outOffs, lineSize)); - - inOffs += stride; - outOffs += outStride; - } - - return output; - } - - public static void ConvertLinearToBlockLinear( - Span<byte> dst, - int width, - int height, - int stride, - int bytesPerPixel, - int gobBlocksInY, - ReadOnlySpan<byte> data) - { - int gobHeight = gobBlocksInY * GobHeight; - - int strideTrunc = BitUtils.AlignDown(width * bytesPerPixel, 16); - int strideTrunc64 = BitUtils.AlignDown(width * bytesPerPixel, 64); - - int xStart = strideTrunc / bytesPerPixel; - - int inStrideGap = stride - width * bytesPerPixel; - - int alignment = GobStride / bytesPerPixel; - - int wAligned = BitUtils.AlignUp(width, alignment); - - BlockLinearLayout layoutConverter = new BlockLinearLayout(wAligned, height, gobBlocksInY, 1, bytesPerPixel); - - unsafe bool Convert<T>(Span<byte> output, ReadOnlySpan<byte> data) where T : unmanaged - { - fixed (byte* outputPtr = output, dataPtr = data) - { - byte* inPtr = dataPtr; - - for (int y = 0; y < height; y++) - { - layoutConverter.SetY(y); - - for (int x = 0; x < strideTrunc64; x += 64, inPtr += 64) - { - byte* offset = outputPtr + layoutConverter.GetOffsetWithLineOffset64(x); - byte* offset2 = offset + 0x20; - byte* offset3 = offset + 0x100; - byte* offset4 = offset + 0x120; - - Vector128<byte> value = *(Vector128<byte>*)inPtr; - Vector128<byte> value2 = *(Vector128<byte>*)(inPtr + 16); - Vector128<byte> value3 = *(Vector128<byte>*)(inPtr + 32); - Vector128<byte> value4 = *(Vector128<byte>*)(inPtr + 48); - - *(Vector128<byte>*)offset = value; - *(Vector128<byte>*)offset2 = value2; - *(Vector128<byte>*)offset3 = value3; - *(Vector128<byte>*)offset4 = value4; - } - - for (int x = strideTrunc64; x < strideTrunc; x += 16, inPtr += 16) - { - byte* offset = outputPtr + layoutConverter.GetOffsetWithLineOffset16(x); - - *(Vector128<byte>*)offset = *(Vector128<byte>*)inPtr; - } - - for (int x = xStart; x < width; x++, inPtr += bytesPerPixel) - { - byte* offset = outputPtr + layoutConverter.GetOffset(x); - - *(T*)offset = *(T*)inPtr; - } - - inPtr += inStrideGap; - } - } - return true; - } - - bool _ = bytesPerPixel switch - { - 1 => Convert<byte>(dst, data), - 2 => Convert<ushort>(dst, data), - 4 => Convert<uint>(dst, data), - 8 => Convert<ulong>(dst, data), - 12 => Convert<Bpp12Pixel>(dst, data), - 16 => Convert<Vector128<byte>>(dst, data), - _ => throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format.") - }; - } - - public static ReadOnlySpan<byte> ConvertLinearToBlockLinear( - Span<byte> output, - int width, - int height, - int depth, - int sliceDepth, - int levels, - int layers, - int blockWidth, - int blockHeight, - int bytesPerPixel, - int gobBlocksInY, - int gobBlocksInZ, - int gobBlocksInTileX, - SizeInfo sizeInfo, - ReadOnlySpan<byte> data) - { - if (output.Length == 0) - { - output = new byte[sizeInfo.TotalSize]; - } - - int inOffs = 0; - - int mipGobBlocksInY = gobBlocksInY; - int mipGobBlocksInZ = gobBlocksInZ; - - int gobWidth = (GobStride / bytesPerPixel) * gobBlocksInTileX; - int gobHeight = gobBlocksInY * GobHeight; - - for (int level = 0; level < levels; level++) - { - int w = Math.Max(1, width >> level); - int h = Math.Max(1, height >> level); - int d = Math.Max(1, depth >> level); - - w = BitUtils.DivRoundUp(w, blockWidth); - h = BitUtils.DivRoundUp(h, blockHeight); - - while (h <= (mipGobBlocksInY >> 1) * GobHeight && mipGobBlocksInY != 1) - { - mipGobBlocksInY >>= 1; - } - - while (d <= (mipGobBlocksInZ >> 1) && mipGobBlocksInZ != 1) - { - mipGobBlocksInZ >>= 1; - } - - int strideTrunc = BitUtils.AlignDown(w * bytesPerPixel, 16); - int strideTrunc64 = BitUtils.AlignDown(w * bytesPerPixel, 64); - - int xStart = strideTrunc / bytesPerPixel; - - int stride = BitUtils.AlignUp(w * bytesPerPixel, HostStrideAlignment); - - int inStrideGap = stride - w * bytesPerPixel; - - int alignment = gobWidth; - - if (d < gobBlocksInZ || w <= gobWidth || h <= gobHeight) - { - alignment = GobStride / bytesPerPixel; - } - - int wAligned = BitUtils.AlignUp(w, alignment); - - BlockLinearLayout layoutConverter = new BlockLinearLayout( - wAligned, - h, - mipGobBlocksInY, - mipGobBlocksInZ, - bytesPerPixel); - - int sd = Math.Max(1, sliceDepth >> level); - - unsafe bool Convert<T>(Span<byte> output, ReadOnlySpan<byte> data) where T : unmanaged - { - fixed (byte* outputPtr = output, dataPtr = data) - { - byte* inPtr = dataPtr + inOffs; - for (int layer = 0; layer < layers; layer++) - { - byte* outBaseOffset = outputPtr + (layer * sizeInfo.LayerSize + sizeInfo.GetMipOffset(level)); - - for (int z = 0; z < sd; z++) - { - layoutConverter.SetZ(z); - for (int y = 0; y < h; y++) - { - layoutConverter.SetY(y); - - for (int x = 0; x < strideTrunc64; x += 64, inPtr += 64) - { - byte* offset = outBaseOffset + layoutConverter.GetOffsetWithLineOffset64(x); - byte* offset2 = offset + 0x20; - byte* offset3 = offset + 0x100; - byte* offset4 = offset + 0x120; - - Vector128<byte> value = *(Vector128<byte>*)inPtr; - Vector128<byte> value2 = *(Vector128<byte>*)(inPtr + 16); - Vector128<byte> value3 = *(Vector128<byte>*)(inPtr + 32); - Vector128<byte> value4 = *(Vector128<byte>*)(inPtr + 48); - - *(Vector128<byte>*)offset = value; - *(Vector128<byte>*)offset2 = value2; - *(Vector128<byte>*)offset3 = value3; - *(Vector128<byte>*)offset4 = value4; - } - - for (int x = strideTrunc64; x < strideTrunc; x += 16, inPtr += 16) - { - byte* offset = outBaseOffset + layoutConverter.GetOffsetWithLineOffset16(x); - - *(Vector128<byte>*)offset = *(Vector128<byte>*)inPtr; - } - - for (int x = xStart; x < w; x++, inPtr += bytesPerPixel) - { - byte* offset = outBaseOffset + layoutConverter.GetOffset(x); - - *(T*)offset = *(T*)inPtr; - } - - inPtr += inStrideGap; - } - } - } - inOffs += stride * h * d * layers; - } - return true; - } - - bool _ = bytesPerPixel switch - { - 1 => Convert<byte>(output, data), - 2 => Convert<ushort>(output, data), - 4 => Convert<uint>(output, data), - 8 => Convert<ulong>(output, data), - 12 => Convert<Bpp12Pixel>(output, data), - 16 => Convert<Vector128<byte>>(output, data), - _ => throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format.") - }; - } - - return output; - } - - public static ReadOnlySpan<byte> ConvertLinearToLinearStrided( - Span<byte> output, - int width, - int height, - int blockWidth, - int blockHeight, - int stride, - int bytesPerPixel, - ReadOnlySpan<byte> data) - { - int w = BitUtils.DivRoundUp(width, blockWidth); - int h = BitUtils.DivRoundUp(height, blockHeight); - - int inStride = BitUtils.AlignUp(w * bytesPerPixel, HostStrideAlignment); - int lineSize = width * bytesPerPixel; - - if (inStride == stride) - { - if (output.Length != 0) - { - data.CopyTo(output); - return output; - } - else - { - return data; - } - } - - if (output.Length == 0) - { - output = new byte[h * stride]; - } - - int inOffs = 0; - int outOffs = 0; - - for (int y = 0; y < h; y++) - { - data.Slice(inOffs, lineSize).CopyTo(output.Slice(outOffs, lineSize)); - - inOffs += inStride; - outOffs += stride; - } - - return output; - } - - private static int GetTextureSize( - int width, - int height, - int depth, - int levels, - int layers, - int blockWidth, - int blockHeight, - int bytesPerPixel) - { - int layerSize = 0; - - for (int level = 0; level < levels; level++) - { - int w = Math.Max(1, width >> level); - int h = Math.Max(1, height >> level); - int d = Math.Max(1, depth >> level); - - w = BitUtils.DivRoundUp(w, blockWidth); - h = BitUtils.DivRoundUp(h, blockHeight); - - int stride = BitUtils.AlignUp(w * bytesPerPixel, HostStrideAlignment); - - layerSize += stride * h * d; - } - - return layerSize * layers; - } - } -}
\ No newline at end of file diff --git a/Ryujinx.Graphics.Texture/OffsetCalculator.cs b/Ryujinx.Graphics.Texture/OffsetCalculator.cs deleted file mode 100644 index d7472e2f..00000000 --- a/Ryujinx.Graphics.Texture/OffsetCalculator.cs +++ /dev/null @@ -1,141 +0,0 @@ -using Ryujinx.Common; -using System; -using System.Runtime.CompilerServices; -using static Ryujinx.Graphics.Texture.BlockLinearConstants; - -namespace Ryujinx.Graphics.Texture -{ - public class OffsetCalculator - { - private int _width; - private int _height; - private int _stride; - private bool _isLinear; - private int _bytesPerPixel; - - private BlockLinearLayout _layoutConverter; - - // Variables for built in iteration. - private int _yPart; - - public OffsetCalculator( - int width, - int height, - int stride, - bool isLinear, - int gobBlocksInY, - int gobBlocksInZ, - int bytesPerPixel) - { - _width = width; - _height = height; - _stride = stride; - _isLinear = isLinear; - _bytesPerPixel = bytesPerPixel; - - int wAlignment = GobStride / bytesPerPixel; - - int wAligned = BitUtils.AlignUp(width, wAlignment); - - if (!isLinear) - { - _layoutConverter = new BlockLinearLayout( - wAligned, - height, - gobBlocksInY, - gobBlocksInZ, - bytesPerPixel); - } - } - - public OffsetCalculator( - int width, - int height, - int stride, - bool isLinear, - int gobBlocksInY, - int bytesPerPixel) : this(width, height, stride, isLinear, gobBlocksInY, 1, bytesPerPixel) - { - } - - public void SetY(int y) - { - if (_isLinear) - { - _yPart = y * _stride; - } - else - { - _layoutConverter.SetY(y); - } - } - - public int GetOffset(int x, int y) - { - if (_isLinear) - { - return x * _bytesPerPixel + y * _stride; - } - else - { - return _layoutConverter.GetOffset(x, y, 0); - } - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public int GetOffset(int x) - { - if (_isLinear) - { - return x * _bytesPerPixel + _yPart; - } - else - { - return _layoutConverter.GetOffset(x); - } - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public int GetOffsetWithLineOffset64(int x) - { - if (_isLinear) - { - return x + _yPart; - } - else - { - return _layoutConverter.GetOffsetWithLineOffset64(x); - } - } - - public (int offset, int size) GetRectangleRange(int x, int y, int width, int height) - { - if (_isLinear) - { - int start = y * Math.Abs(_stride) + x * _bytesPerPixel; - int end = (y + height - 1) * Math.Abs(_stride) + (x + width) * _bytesPerPixel; - return (y * _stride + x * _bytesPerPixel, end - start); - } - else - { - return _layoutConverter.GetRectangleRange(x, y, width, height); - } - } - - public bool LayoutMatches(OffsetCalculator other) - { - if (_isLinear) - { - return other._isLinear && - _width == other._width && - _height == other._height && - _stride == other._stride && - _bytesPerPixel == other._bytesPerPixel; - } - else - { - return !other._isLinear && _layoutConverter.LayoutMatches(other._layoutConverter); - } - } - } -}
\ No newline at end of file diff --git a/Ryujinx.Graphics.Texture/PixelConverter.cs b/Ryujinx.Graphics.Texture/PixelConverter.cs deleted file mode 100644 index add25cd3..00000000 --- a/Ryujinx.Graphics.Texture/PixelConverter.cs +++ /dev/null @@ -1,216 +0,0 @@ -using Ryujinx.Common; -using System; -using System.Runtime.InteropServices; -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; - -namespace Ryujinx.Graphics.Texture -{ - public static class PixelConverter - { - private static (int remainder, int outRemainder, int height) GetLineRemainders(int length, int width, int bpp, int outBpp) - { - int stride = BitUtils.AlignUp(width * bpp, LayoutConverter.HostStrideAlignment); - int remainder = stride / bpp - width; - - int outStride = BitUtils.AlignUp(width * outBpp, LayoutConverter.HostStrideAlignment); - int outRemainder = outStride / outBpp - width; - - return (remainder, outRemainder, length / stride); - } - - public unsafe static byte[] ConvertR4G4ToR4G4B4A4(ReadOnlySpan<byte> data, int width) - { - byte[] output = new byte[data.Length * 2]; - - (int remainder, int outRemainder, int height) = GetLineRemainders(data.Length, width, 1, 2); - - Span<ushort> outputSpan = MemoryMarshal.Cast<byte, ushort>(output); - - if (remainder == 0) - { - int start = 0; - - if (Sse41.IsSupported) - { - int sizeTrunc = data.Length & ~7; - start = sizeTrunc; - - fixed (byte* inputPtr = data, outputPtr = output) - { - for (ulong offset = 0; offset < (ulong)sizeTrunc; offset += 8) - { - Sse2.Store(outputPtr + offset * 2, Sse41.ConvertToVector128Int16(inputPtr + offset).AsByte()); - } - } - } - - for (int i = start; i < data.Length; i++) - { - outputSpan[i] = (ushort)data[i]; - } - } - else - { - int offset = 0; - int outOffset = 0; - - for (int y = 0; y < height; y++) - { - for (int x = 0; x < width; x++) - { - outputSpan[outOffset++] = data[offset++]; - } - - offset += remainder; - outOffset += outRemainder; - } - } - - return output; - } - - public unsafe static byte[] ConvertR5G6B5ToR8G8B8A8(ReadOnlySpan<byte> data, int width) - { - byte[] output = new byte[data.Length * 2]; - int offset = 0; - int outOffset = 0; - - (int remainder, int outRemainder, int height) = GetLineRemainders(data.Length, width, 2, 4); - - ReadOnlySpan<ushort> inputSpan = MemoryMarshal.Cast<byte, ushort>(data); - Span<uint> outputSpan = MemoryMarshal.Cast<byte, uint>(output); - - for (int y = 0; y < height; y++) - { - for (int x = 0; x < width; x++) - { - uint packed = inputSpan[offset++]; - - uint outputPacked = 0xff000000; - outputPacked |= (packed << 3) & 0x000000f8; - outputPacked |= (packed << 8) & 0x00f80000; - - // Replicate 5 bit components. - outputPacked |= (outputPacked >> 5) & 0x00070007; - - // Include and replicate 6 bit component. - outputPacked |= ((packed << 5) & 0x0000fc00) | ((packed >> 1) & 0x00000300); - - outputSpan[outOffset++] = outputPacked; - } - - offset += remainder; - outOffset += outRemainder; - } - - return output; - } - - public unsafe static byte[] ConvertR5G5B5ToR8G8B8A8(ReadOnlySpan<byte> data, int width, bool forceAlpha) - { - byte[] output = new byte[data.Length * 2]; - int offset = 0; - int outOffset = 0; - - (int remainder, int outRemainder, int height) = GetLineRemainders(data.Length, width, 2, 4); - - ReadOnlySpan<ushort> inputSpan = MemoryMarshal.Cast<byte, ushort>(data); - Span<uint> outputSpan = MemoryMarshal.Cast<byte, uint>(output); - - for (int y = 0; y < height; y++) - { - for (int x = 0; x < width; x++) - { - uint packed = inputSpan[offset++]; - - uint a = forceAlpha ? 1 : (packed >> 15); - - uint outputPacked = a * 0xff000000; - outputPacked |= (packed << 3) & 0x000000f8; - outputPacked |= (packed << 6) & 0x0000f800; - outputPacked |= (packed << 9) & 0x00f80000; - - // Replicate 5 bit components. - outputPacked |= (outputPacked >> 5) & 0x00070707; - - outputSpan[outOffset++] = outputPacked; - } - - offset += remainder; - outOffset += outRemainder; - } - - return output; - } - - public unsafe static byte[] ConvertA1B5G5R5ToR8G8B8A8(ReadOnlySpan<byte> data, int width) - { - byte[] output = new byte[data.Length * 2]; - int offset = 0; - int outOffset = 0; - - (int remainder, int outRemainder, int height) = GetLineRemainders(data.Length, width, 2, 4); - - ReadOnlySpan<ushort> inputSpan = MemoryMarshal.Cast<byte, ushort>(data); - Span<uint> outputSpan = MemoryMarshal.Cast<byte, uint>(output); - - for (int y = 0; y < height; y++) - { - for (int x = 0; x < width; x++) - { - uint packed = inputSpan[offset++]; - - uint a = packed >> 15; - - uint outputPacked = a * 0xff000000; - outputPacked |= (packed >> 8) & 0x000000f8; - outputPacked |= (packed << 5) & 0x0000f800; - outputPacked |= (packed << 18) & 0x00f80000; - - // Replicate 5 bit components. - outputPacked |= (outputPacked >> 5) & 0x00070707; - - outputSpan[outOffset++] = outputPacked; - } - - offset += remainder; - outOffset += outRemainder; - } - - return output; - } - - public unsafe static byte[] ConvertR4G4B4A4ToR8G8B8A8(ReadOnlySpan<byte> data, int width) - { - byte[] output = new byte[data.Length * 2]; - int offset = 0; - int outOffset = 0; - - (int remainder, int outRemainder, int height) = GetLineRemainders(data.Length, width, 2, 4); - - ReadOnlySpan<ushort> inputSpan = MemoryMarshal.Cast<byte, ushort>(data); - Span<uint> outputSpan = MemoryMarshal.Cast<byte, uint>(output); - - for (int y = 0; y < height; y++) - { - for (int x = 0; x < width; x++) - { - uint packed = inputSpan[offset++]; - - uint outputPacked = packed & 0x0000000f; - outputPacked |= (packed << 4) & 0x00000f00; - outputPacked |= (packed << 8) & 0x000f0000; - outputPacked |= (packed << 12) & 0x0f000000; - - outputSpan[outOffset++] = outputPacked * 0x11; - } - - offset += remainder; - outOffset += outRemainder; - } - - return output; - } - } -} diff --git a/Ryujinx.Graphics.Texture/Region.cs b/Ryujinx.Graphics.Texture/Region.cs deleted file mode 100644 index e59888a0..00000000 --- a/Ryujinx.Graphics.Texture/Region.cs +++ /dev/null @@ -1,14 +0,0 @@ -namespace Ryujinx.Graphics.Texture -{ - public readonly struct Region - { - public int Offset { get; } - public int Size { get; } - - public Region(int offset, int size) - { - Offset = offset; - Size = size; - } - } -} diff --git a/Ryujinx.Graphics.Texture/Ryujinx.Graphics.Texture.csproj b/Ryujinx.Graphics.Texture/Ryujinx.Graphics.Texture.csproj deleted file mode 100644 index 70e3453c..00000000 --- a/Ryujinx.Graphics.Texture/Ryujinx.Graphics.Texture.csproj +++ /dev/null @@ -1,11 +0,0 @@ -<Project Sdk="Microsoft.NET.Sdk"> - <PropertyGroup> - <TargetFramework>net7.0</TargetFramework> - <AllowUnsafeBlocks>true</AllowUnsafeBlocks> - </PropertyGroup> - - <ItemGroup> - <ProjectReference Include="..\Ryujinx.Common\Ryujinx.Common.csproj" /> - </ItemGroup> - -</Project> diff --git a/Ryujinx.Graphics.Texture/Size.cs b/Ryujinx.Graphics.Texture/Size.cs deleted file mode 100644 index 21c45b38..00000000 --- a/Ryujinx.Graphics.Texture/Size.cs +++ /dev/null @@ -1,16 +0,0 @@ -namespace Ryujinx.Graphics.Texture -{ - public readonly struct Size - { - public int Width { get; } - public int Height { get; } - public int Depth { get; } - - public Size(int width, int height, int depth) - { - Width = width; - Height = height; - Depth = depth; - } - } -}
\ No newline at end of file diff --git a/Ryujinx.Graphics.Texture/SizeCalculator.cs b/Ryujinx.Graphics.Texture/SizeCalculator.cs deleted file mode 100644 index 5568784f..00000000 --- a/Ryujinx.Graphics.Texture/SizeCalculator.cs +++ /dev/null @@ -1,287 +0,0 @@ -using Ryujinx.Common; -using System; - -using static Ryujinx.Graphics.Texture.BlockLinearConstants; - -namespace Ryujinx.Graphics.Texture -{ - public static class SizeCalculator - { - private const int StrideAlignment = 32; - - private static int Calculate3DOffsetCount(int levels, int depth) - { - int offsetCount = depth; - - while (--levels > 0) - { - depth = Math.Max(1, depth >> 1); - offsetCount += depth; - } - - return offsetCount; - } - - public static SizeInfo GetBlockLinearTextureSize( - int width, - int height, - int depth, - int levels, - int layers, - int blockWidth, - int blockHeight, - int bytesPerPixel, - int gobBlocksInY, - int gobBlocksInZ, - int gobBlocksInTileX, - int gpuLayerSize = 0) - { - bool is3D = depth > 1; - - int layerSize = 0; - - int[] allOffsets = new int[is3D ? Calculate3DOffsetCount(levels, depth) : levels * layers * depth]; - int[] mipOffsets = new int[levels]; - int[] sliceSizes = new int[levels]; - int[] levelSizes = new int[levels]; - - int mipGobBlocksInY = gobBlocksInY; - int mipGobBlocksInZ = gobBlocksInZ; - - int gobWidth = (GobStride / bytesPerPixel) * gobBlocksInTileX; - int gobHeight = gobBlocksInY * GobHeight; - - int depthLevelOffset = 0; - - for (int level = 0; level < levels; level++) - { - int w = Math.Max(1, width >> level); - int h = Math.Max(1, height >> level); - int d = Math.Max(1, depth >> level); - - w = BitUtils.DivRoundUp(w, blockWidth); - h = BitUtils.DivRoundUp(h, blockHeight); - - while (h <= (mipGobBlocksInY >> 1) * GobHeight && mipGobBlocksInY != 1) - { - mipGobBlocksInY >>= 1; - } - - while (d <= (mipGobBlocksInZ >> 1) && mipGobBlocksInZ != 1) - { - mipGobBlocksInZ >>= 1; - } - - int widthInGobs = BitUtils.DivRoundUp(w * bytesPerPixel, GobStride); - - int alignment = gobBlocksInTileX; - - if (d < gobBlocksInZ || w <= gobWidth || h <= gobHeight) - { - alignment = 1; - } - - widthInGobs = BitUtils.AlignUp(widthInGobs, alignment); - - int totalBlocksOfGobsInZ = BitUtils.DivRoundUp(d, mipGobBlocksInZ); - int totalBlocksOfGobsInY = BitUtils.DivRoundUp(BitUtils.DivRoundUp(h, GobHeight), mipGobBlocksInY); - - int robSize = widthInGobs * mipGobBlocksInY * mipGobBlocksInZ * GobSize; - - if (is3D) - { - int gobSize = mipGobBlocksInY * GobSize; - - int sliceSize = totalBlocksOfGobsInY * widthInGobs * gobSize; - - int baseOffset = layerSize; - - int mask = gobBlocksInZ - 1; - - for (int z = 0; z < d; z++) - { - int zLow = z & mask; - int zHigh = z & ~mask; - - allOffsets[z + depthLevelOffset] = baseOffset + zLow * gobSize + zHigh * sliceSize; - } - } - - mipOffsets[level] = layerSize; - sliceSizes[level] = totalBlocksOfGobsInY * robSize; - levelSizes[level] = totalBlocksOfGobsInZ * sliceSizes[level]; - - layerSize += levelSizes[level]; - - depthLevelOffset += d; - } - - if (layers > 1) - { - layerSize = AlignLayerSize( - layerSize, - height, - depth, - blockHeight, - gobBlocksInY, - gobBlocksInZ, - gobBlocksInTileX); - } - - int totalSize; - - if (layerSize < gpuLayerSize) - { - totalSize = (layers - 1) * gpuLayerSize + layerSize; - layerSize = gpuLayerSize; - } - else - { - totalSize = layerSize * layers; - } - - if (!is3D) - { - for (int layer = 0; layer < layers; layer++) - { - int baseIndex = layer * levels; - int baseOffset = layer * layerSize; - - for (int level = 0; level < levels; level++) - { - allOffsets[baseIndex + level] = baseOffset + mipOffsets[level]; - } - } - } - - return new SizeInfo(mipOffsets, allOffsets, sliceSizes, levelSizes, depth, levels, layerSize, totalSize, is3D); - } - - public static SizeInfo GetLinearTextureSize(int stride, int height, int blockHeight) - { - // Non-2D or mipmapped linear textures are not supported by the Switch GPU, - // so we only need to handle a single case (2D textures without mipmaps). - int totalSize = stride * BitUtils.DivRoundUp(height, blockHeight); - - return new SizeInfo(totalSize); - } - - private static int AlignLayerSize( - int size, - int height, - int depth, - int blockHeight, - int gobBlocksInY, - int gobBlocksInZ, - int gobBlocksInTileX) - { - if (gobBlocksInTileX < 2) - { - height = BitUtils.DivRoundUp(height, blockHeight); - - while (height <= (gobBlocksInY >> 1) * GobHeight && gobBlocksInY != 1) - { - gobBlocksInY >>= 1; - } - - while (depth <= (gobBlocksInZ >> 1) && gobBlocksInZ != 1) - { - gobBlocksInZ >>= 1; - } - - int blockOfGobsSize = gobBlocksInY * gobBlocksInZ * GobSize; - - int sizeInBlockOfGobs = size / blockOfGobsSize; - - if (size != sizeInBlockOfGobs * blockOfGobsSize) - { - size = (sizeInBlockOfGobs + 1) * blockOfGobsSize; - } - } - else - { - int alignment = (gobBlocksInTileX * GobSize) * gobBlocksInY * gobBlocksInZ; - - size = BitUtils.AlignUp(size, alignment); - } - - return size; - } - - public static Size GetBlockLinearAlignedSize( - int width, - int height, - int depth, - int blockWidth, - int blockHeight, - int bytesPerPixel, - int gobBlocksInY, - int gobBlocksInZ, - int gobBlocksInTileX) - { - width = BitUtils.DivRoundUp(width, blockWidth); - height = BitUtils.DivRoundUp(height, blockHeight); - - int gobWidth = (GobStride / bytesPerPixel) * gobBlocksInTileX; - int gobHeight = gobBlocksInY * GobHeight; - - int alignment = gobWidth; - - if (depth < gobBlocksInZ || width <= gobWidth || height <= gobHeight) - { - alignment = GobStride / bytesPerPixel; - } - - // Height has already been divided by block height, so pass it as 1. - (gobBlocksInY, gobBlocksInZ) = GetMipGobBlockSizes(height, depth, 1, gobBlocksInY, gobBlocksInZ); - - int blockOfGobsHeight = gobBlocksInY * GobHeight; - int blockOfGobsDepth = gobBlocksInZ; - - width = BitUtils.AlignUp(width, alignment); - height = BitUtils.AlignUp(height, blockOfGobsHeight); - depth = BitUtils.AlignUp(depth, blockOfGobsDepth); - - return new Size(width, height, depth); - } - - public static Size GetLinearAlignedSize( - int width, - int height, - int blockWidth, - int blockHeight, - int bytesPerPixel) - { - width = BitUtils.DivRoundUp(width, blockWidth); - height = BitUtils.DivRoundUp(height, blockHeight); - - int widthAlignment = StrideAlignment / bytesPerPixel; - - width = BitUtils.AlignUp(width, widthAlignment); - - return new Size(width, height, 1); - } - - public static (int, int) GetMipGobBlockSizes( - int height, - int depth, - int blockHeight, - int gobBlocksInY, - int gobBlocksInZ) - { - height = BitUtils.DivRoundUp(height, blockHeight); - - while (height <= (gobBlocksInY >> 1) * GobHeight && gobBlocksInY != 1) - { - gobBlocksInY >>= 1; - } - - while (depth <= (gobBlocksInZ >> 1) && gobBlocksInZ != 1) - { - gobBlocksInZ >>= 1; - } - - return (gobBlocksInY, gobBlocksInZ); - } - } -}
\ No newline at end of file diff --git a/Ryujinx.Graphics.Texture/SizeInfo.cs b/Ryujinx.Graphics.Texture/SizeInfo.cs deleted file mode 100644 index eb573728..00000000 --- a/Ryujinx.Graphics.Texture/SizeInfo.cs +++ /dev/null @@ -1,119 +0,0 @@ -using System; -using System.Collections.Generic; - -namespace Ryujinx.Graphics.Texture -{ - public readonly struct SizeInfo - { - private readonly int[] _mipOffsets; - - private readonly int _levels; - private readonly int _depth; - private readonly bool _is3D; - - public readonly int[] AllOffsets; - public readonly int[] SliceSizes; - public readonly int[] LevelSizes; - public int LayerSize { get; } - public int TotalSize { get; } - - public SizeInfo(int size) - { - _mipOffsets = new int[] { 0 }; - AllOffsets = new int[] { 0 }; - SliceSizes = new int[] { size }; - LevelSizes = new int[] { size }; - _depth = 1; - _levels = 1; - LayerSize = size; - TotalSize = size; - _is3D = false; - } - - internal SizeInfo( - int[] mipOffsets, - int[] allOffsets, - int[] sliceSizes, - int[] levelSizes, - int depth, - int levels, - int layerSize, - int totalSize, - bool is3D) - { - _mipOffsets = mipOffsets; - AllOffsets = allOffsets; - SliceSizes = sliceSizes; - LevelSizes = levelSizes; - _depth = depth; - _levels = levels; - LayerSize = layerSize; - TotalSize = totalSize; - _is3D = is3D; - } - - public int GetMipOffset(int level) - { - if ((uint)level >= _mipOffsets.Length) - { - throw new ArgumentOutOfRangeException(nameof(level)); - } - - return _mipOffsets[level]; - } - - public bool FindView(int offset, out int firstLayer, out int firstLevel) - { - int index = Array.BinarySearch(AllOffsets, offset); - - if (index < 0) - { - firstLayer = 0; - firstLevel = 0; - - return false; - } - - if (_is3D) - { - firstLayer = index; - firstLevel = 0; - - int levelDepth = _depth; - - while (firstLayer >= levelDepth) - { - firstLayer -= levelDepth; - firstLevel++; - levelDepth = Math.Max(levelDepth >> 1, 1); - } - } - else - { - firstLayer = index / _levels; - firstLevel = index - (firstLayer * _levels); - } - - return true; - } - - public IEnumerable<Region> AllRegions() - { - if (_is3D) - { - for (int i = 0; i < _mipOffsets.Length; i++) - { - int maxSize = TotalSize - _mipOffsets[i]; - yield return new Region(_mipOffsets[i], Math.Min(maxSize, LevelSizes[i])); - } - } - else - { - for (int i = 0; i < AllOffsets.Length; i++) - { - yield return new Region(AllOffsets[i], SliceSizes[i % _levels]); - } - } - } - } -}
\ No newline at end of file diff --git a/Ryujinx.Graphics.Texture/Utils/BC67Tables.cs b/Ryujinx.Graphics.Texture/Utils/BC67Tables.cs deleted file mode 100644 index d890652c..00000000 --- a/Ryujinx.Graphics.Texture/Utils/BC67Tables.cs +++ /dev/null @@ -1,297 +0,0 @@ -namespace Ryujinx.Graphics.Texture.Utils -{ - static class BC67Tables - { - public static readonly BC7ModeInfo[] BC7ModeInfos = new BC7ModeInfo[] - { - new BC7ModeInfo(3, 4, 6, 0, 0, 3, 0, 4, 0), - new BC7ModeInfo(2, 6, 2, 0, 0, 3, 0, 6, 0), - new BC7ModeInfo(3, 6, 0, 0, 0, 2, 0, 5, 0), - new BC7ModeInfo(2, 6, 4, 0, 0, 2, 0, 7, 0), - new BC7ModeInfo(1, 0, 0, 2, 1, 2, 3, 5, 6), - new BC7ModeInfo(1, 0, 0, 2, 0, 2, 2, 7, 8), - new BC7ModeInfo(1, 0, 2, 0, 0, 4, 0, 7, 7), - new BC7ModeInfo(2, 6, 4, 0, 0, 2, 0, 5, 5) - }; - - public static readonly byte[][] Weights = - { - new byte[] { 0, 21, 43, 64 }, - new byte[] { 0, 9, 18, 27, 37, 46, 55, 64 }, - new byte[] { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 } - }; - - public static readonly byte[][] InverseWeights = - { - new byte[] { 64, 43, 21, 0 }, - new byte[] { 64, 55, 46, 37, 27, 18, 9, 0 }, - new byte[] { 64, 60, 55, 51, 47, 43, 38, 34, 30, 26, 21, 17, 13, 9, 4, 0 } - }; - - public static readonly byte[][][] FixUpIndices = new byte[3][][] - { - new byte[64][] - { - new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, - new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, - new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, - new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, - new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, - new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, - new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, - new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, - new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, - new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, - new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, - new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, - new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, - new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, - new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, - new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 } - }, - new byte[64][] - { - new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, - new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, - new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, - new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, - new byte[] { 0, 15, 0 }, new byte[] { 0, 2, 0 }, new byte[] { 0, 8, 0 }, new byte[] { 0, 2, 0 }, - new byte[] { 0, 2, 0 }, new byte[] { 0, 8, 0 }, new byte[] { 0, 8, 0 }, new byte[] { 0, 15, 0 }, - new byte[] { 0, 2, 0 }, new byte[] { 0, 8, 0 }, new byte[] { 0, 2, 0 }, new byte[] { 0, 2, 0 }, - new byte[] { 0, 8, 0 }, new byte[] { 0, 8, 0 }, new byte[] { 0, 2, 0 }, new byte[] { 0, 2, 0 }, - new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, new byte[] { 0, 6, 0 }, new byte[] { 0, 8, 0 }, - new byte[] { 0, 2, 0 }, new byte[] { 0, 8, 0 }, new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, - new byte[] { 0, 2, 0 }, new byte[] { 0, 8, 0 }, new byte[] { 0, 2, 0 }, new byte[] { 0, 2, 0 }, - new byte[] { 0, 2, 0 }, new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, new byte[] { 0, 6, 0 }, - new byte[] { 0, 6, 0 }, new byte[] { 0, 2, 0 }, new byte[] { 0, 6, 0 }, new byte[] { 0, 8, 0 }, - new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, new byte[] { 0, 2, 0 }, new byte[] { 0, 2, 0 }, - new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, - new byte[] { 0, 15, 0 }, new byte[] { 0, 2, 0 }, new byte[] { 0, 2, 0 }, new byte[] { 0, 15, 0 } - }, - new byte[64][] - { - new byte[] { 0, 3, 15 }, new byte[] { 0, 3, 8 }, new byte[] { 0, 15, 8 }, new byte[] { 0, 15, 3 }, - new byte[] { 0, 8, 15 }, new byte[] { 0, 3, 15 }, new byte[] { 0, 15, 3 }, new byte[] { 0, 15, 8 }, - new byte[] { 0, 8, 15 }, new byte[] { 0, 8, 15 }, new byte[] { 0, 6, 15 }, new byte[] { 0, 6, 15 }, - new byte[] { 0, 6, 15 }, new byte[] { 0, 5, 15 }, new byte[] { 0, 3, 15 }, new byte[] { 0, 3, 8 }, - new byte[] { 0, 3, 15 }, new byte[] { 0, 3, 8 }, new byte[] { 0, 8, 15 }, new byte[] { 0, 15, 3 }, - new byte[] { 0, 3, 15 }, new byte[] { 0, 3, 8 }, new byte[] { 0, 6, 15 }, new byte[] { 0, 10, 8 }, - new byte[] { 0, 5, 3 }, new byte[] { 0, 8, 15 }, new byte[] { 0, 8, 6 }, new byte[] { 0, 6, 10 }, - new byte[] { 0, 8, 15 }, new byte[] { 0, 5, 15 }, new byte[] { 0, 15, 10 }, new byte[] { 0, 15, 8 }, - new byte[] { 0, 8, 15 }, new byte[] { 0, 15, 3 }, new byte[] { 0, 3, 15 }, new byte[] { 0, 5, 10 }, - new byte[] { 0, 6, 10 }, new byte[] { 0, 10, 8 }, new byte[] { 0, 8, 9 }, new byte[] { 0, 15, 10 }, - new byte[] { 0, 15, 6 }, new byte[] { 0, 3, 15 }, new byte[] { 0, 15, 8 }, new byte[] { 0, 5, 15 }, - new byte[] { 0, 15, 3 }, new byte[] { 0, 15, 6 }, new byte[] { 0, 15, 6 }, new byte[] { 0, 15, 8 }, - new byte[] { 0, 3, 15 }, new byte[] { 0, 15, 3 }, new byte[] { 0, 5, 15 }, new byte[] { 0, 5, 15 }, - new byte[] { 0, 5, 15 }, new byte[] { 0, 8, 15 }, new byte[] { 0, 5, 15 }, new byte[] { 0, 10, 15 }, - new byte[] { 0, 5, 15 }, new byte[] { 0, 10, 15 }, new byte[] { 0, 8, 15 }, new byte[] { 0, 13, 15 }, - new byte[] { 0, 15, 3 }, new byte[] { 0, 12, 15 }, new byte[] { 0, 3, 15 }, new byte[] { 0, 3, 8 } - } - }; - - public static readonly byte[][][] PartitionTable = new byte[3][][] - { - new byte[64][] - { - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 0 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 1 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 2 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 3 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 4 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 5 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 6 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 7 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 8 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 9 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 10 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 11 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 12 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 13 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 14 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 15 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 16 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 17 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 18 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 19 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 20 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 21 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 22 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 23 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 24 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 25 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 26 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 27 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 28 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 29 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 30 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 31 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 32 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 33 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 34 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 35 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 36 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 37 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 38 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 39 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 40 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 41 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 42 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 43 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 44 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 45 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 46 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 47 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 48 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 49 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 50 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 51 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 52 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 53 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 54 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 55 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 56 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 57 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 58 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 59 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 60 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 61 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 62 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } // 63 - }, - new byte[64][] - { - new byte[16] { 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1 }, // 0 - new byte[16] { 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1 }, // 1 - new byte[16] { 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1 }, // 2 - new byte[16] { 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1 }, // 3 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1 }, // 4 - new byte[16] { 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1 }, // 5 - new byte[16] { 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1 }, // 6 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1 }, // 7 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1 }, // 8 - new byte[16] { 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, // 9 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1 }, // 10 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1 }, // 11 - new byte[16] { 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, // 12 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 }, // 13 - new byte[16] { 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, // 14 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1 }, // 15 - new byte[16] { 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1 }, // 16 - new byte[16] { 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 }, // 17 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0 }, // 18 - new byte[16] { 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0 }, // 19 - new byte[16] { 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 }, // 20 - new byte[16] { 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0 }, // 21 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0 }, // 22 - new byte[16] { 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1 }, // 23 - new byte[16] { 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0 }, // 24 - new byte[16] { 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0 }, // 25 - new byte[16] { 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0 }, // 26 - new byte[16] { 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0 }, // 27 - new byte[16] { 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0 }, // 28 - new byte[16] { 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0 }, // 29 - new byte[16] { 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0 }, // 30 - new byte[16] { 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0 }, // 31 - new byte[16] { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 }, // 32 - new byte[16] { 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1 }, // 33 - new byte[16] { 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0 }, // 34 - new byte[16] { 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0 }, // 35 - new byte[16] { 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0 }, // 36 - new byte[16] { 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0 }, // 37 - new byte[16] { 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1 }, // 38 - new byte[16] { 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1 }, // 39 - new byte[16] { 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0 }, // 40 - new byte[16] { 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0 }, // 41 - new byte[16] { 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0 }, // 42 - new byte[16] { 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0 }, // 43 - new byte[16] { 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0 }, // 44 - new byte[16] { 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1 }, // 45 - new byte[16] { 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1 }, // 46 - new byte[16] { 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0 }, // 47 - new byte[16] { 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, // 48 - new byte[16] { 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0 }, // 49 - new byte[16] { 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0 }, // 50 - new byte[16] { 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0 }, // 51 - new byte[16] { 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1 }, // 52 - new byte[16] { 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1 }, // 53 - new byte[16] { 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0 }, // 54 - new byte[16] { 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0 }, // 55 - new byte[16] { 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1 }, // 56 - new byte[16] { 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1 }, // 57 - new byte[16] { 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1 }, // 58 - new byte[16] { 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1 }, // 59 - new byte[16] { 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1 }, // 60 - new byte[16] { 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0 }, // 61 - new byte[16] { 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0 }, // 62 - new byte[16] { 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1 } // 63 - }, - new byte[64][] - { - new byte[16] { 0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 1, 2, 2, 2, 2 }, // 0 - new byte[16] { 0, 0, 0, 1, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1 }, // 1 - new byte[16] { 0, 0, 0, 0, 2, 0, 0, 1, 2, 2, 1, 1, 2, 2, 1, 1 }, // 2 - new byte[16] { 0, 2, 2, 2, 0, 0, 2, 2, 0, 0, 1, 1, 0, 1, 1, 1 }, // 3 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2 }, // 4 - new byte[16] { 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 2, 2, 0, 0, 2, 2 }, // 5 - new byte[16] { 0, 0, 2, 2, 0, 0, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1 }, // 6 - new byte[16] { 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1 }, // 7 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2 }, // 8 - new byte[16] { 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2 }, // 9 - new byte[16] { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2 }, // 10 - new byte[16] { 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2 }, // 11 - new byte[16] { 0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2 }, // 12 - new byte[16] { 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2 }, // 13 - new byte[16] { 0, 0, 1, 1, 0, 1, 1, 2, 1, 1, 2, 2, 1, 2, 2, 2 }, // 14 - new byte[16] { 0, 0, 1, 1, 2, 0, 0, 1, 2, 2, 0, 0, 2, 2, 2, 0 }, // 15 - new byte[16] { 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 2, 1, 1, 2, 2 }, // 16 - new byte[16] { 0, 1, 1, 1, 0, 0, 1, 1, 2, 0, 0, 1, 2, 2, 0, 0 }, // 17 - new byte[16] { 0, 0, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2 }, // 18 - new byte[16] { 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 1, 1, 1, 1 }, // 19 - new byte[16] { 0, 1, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2, 0, 2, 2, 2 }, // 20 - new byte[16] { 0, 0, 0, 1, 0, 0, 0, 1, 2, 2, 2, 1, 2, 2, 2, 1 }, // 21 - new byte[16] { 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 2, 2, 0, 1, 2, 2 }, // 22 - new byte[16] { 0, 0, 0, 0, 1, 1, 0, 0, 2, 2, 1, 0, 2, 2, 1, 0 }, // 23 - new byte[16] { 0, 1, 2, 2, 0, 1, 2, 2, 0, 0, 1, 1, 0, 0, 0, 0 }, // 24 - new byte[16] { 0, 0, 1, 2, 0, 0, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2 }, // 25 - new byte[16] { 0, 1, 1, 0, 1, 2, 2, 1, 1, 2, 2, 1, 0, 1, 1, 0 }, // 26 - new byte[16] { 0, 0, 0, 0, 0, 1, 1, 0, 1, 2, 2, 1, 1, 2, 2, 1 }, // 27 - new byte[16] { 0, 0, 2, 2, 1, 1, 0, 2, 1, 1, 0, 2, 0, 0, 2, 2 }, // 28 - new byte[16] { 0, 1, 1, 0, 0, 1, 1, 0, 2, 0, 0, 2, 2, 2, 2, 2 }, // 29 - new byte[16] { 0, 0, 1, 1, 0, 1, 2, 2, 0, 1, 2, 2, 0, 0, 1, 1 }, // 30 - new byte[16] { 0, 0, 0, 0, 2, 0, 0, 0, 2, 2, 1, 1, 2, 2, 2, 1 }, // 31 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 2, 2, 2 }, // 32 - new byte[16] { 0, 2, 2, 2, 0, 0, 2, 2, 0, 0, 1, 2, 0, 0, 1, 1 }, // 33 - new byte[16] { 0, 0, 1, 1, 0, 0, 1, 2, 0, 0, 2, 2, 0, 2, 2, 2 }, // 34 - new byte[16] { 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0 }, // 35 - new byte[16] { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 0, 0, 0, 0 }, // 36 - new byte[16] { 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0 }, // 37 - new byte[16] { 0, 1, 2, 0, 2, 0, 1, 2, 1, 2, 0, 1, 0, 1, 2, 0 }, // 38 - new byte[16] { 0, 0, 1, 1, 2, 2, 0, 0, 1, 1, 2, 2, 0, 0, 1, 1 }, // 39 - new byte[16] { 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 0, 0, 0, 0, 1, 1 }, // 40 - new byte[16] { 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2 }, // 41 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 2, 1, 2, 1, 2, 1 }, // 42 - new byte[16] { 0, 0, 2, 2, 1, 1, 2, 2, 0, 0, 2, 2, 1, 1, 2, 2 }, // 43 - new byte[16] { 0, 0, 2, 2, 0, 0, 1, 1, 0, 0, 2, 2, 0, 0, 1, 1 }, // 44 - new byte[16] { 0, 2, 2, 0, 1, 2, 2, 1, 0, 2, 2, 0, 1, 2, 2, 1 }, // 45 - new byte[16] { 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0, 1, 0, 1 }, // 46 - new byte[16] { 0, 0, 0, 0, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1 }, // 47 - new byte[16] { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 2 }, // 48 - new byte[16] { 0, 2, 2, 2, 0, 1, 1, 1, 0, 2, 2, 2, 0, 1, 1, 1 }, // 49 - new byte[16] { 0, 0, 0, 2, 1, 1, 1, 2, 0, 0, 0, 2, 1, 1, 1, 2 }, // 50 - new byte[16] { 0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2 }, // 51 - new byte[16] { 0, 2, 2, 2, 0, 1, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2 }, // 52 - new byte[16] { 0, 0, 0, 2, 1, 1, 1, 2, 1, 1, 1, 2, 0, 0, 0, 2 }, // 53 - new byte[16] { 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 2, 2 }, // 54 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 1, 2 }, // 55 - new byte[16] { 0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 2, 2, 2, 2, 2, 2 }, // 56 - new byte[16] { 0, 0, 2, 2, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 2, 2 }, // 57 - new byte[16] { 0, 0, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 0, 0, 2, 2 }, // 58 - new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2 }, // 59 - new byte[16] { 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 1 }, // 60 - new byte[16] { 0, 2, 2, 2, 1, 2, 2, 2, 0, 2, 2, 2, 1, 2, 2, 2 }, // 61 - new byte[16] { 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 }, // 62 - new byte[16] { 0, 1, 1, 1, 2, 0, 1, 1, 2, 2, 0, 1, 2, 2, 2, 0 } // 63 - } - }; - } -} diff --git a/Ryujinx.Graphics.Texture/Utils/BC67Utils.cs b/Ryujinx.Graphics.Texture/Utils/BC67Utils.cs deleted file mode 100644 index e6c3f6e7..00000000 --- a/Ryujinx.Graphics.Texture/Utils/BC67Utils.cs +++ /dev/null @@ -1,1327 +0,0 @@ -using System; -using System.Diagnostics; -using System.Runtime.CompilerServices; -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; - -namespace Ryujinx.Graphics.Texture.Utils -{ - static class BC67Utils - { - private static byte[][] _quantizationLut; - private static byte[][] _quantizationLutNoPBit; - - static BC67Utils() - { - _quantizationLut = new byte[5][]; - _quantizationLutNoPBit = new byte[5][]; - - for (int depth = 4; depth < 9; depth++) - { - byte[] lut = new byte[512]; - byte[] lutNoPBit = new byte[256]; - - for (int i = 0; i < lut.Length; i++) - { - lut[i] = QuantizeComponentForLut((byte)i, depth, i >> 8); - - if (i < lutNoPBit.Length) - { - lutNoPBit[i] = QuantizeComponentForLut((byte)i, depth); - } - } - - _quantizationLut[depth - 4] = lut; - _quantizationLutNoPBit[depth - 4] = lutNoPBit; - } - } - - public static (RgbaColor8, RgbaColor8) GetMinMaxColors(ReadOnlySpan<uint> tile, int w, int h) - { - if (Sse41.IsSupported && w == 4 && h == 4) - { - GetMinMaxColorsOneSubset4x4Sse41(tile, out RgbaColor8 minColor, out RgbaColor8 maxColor); - - return (minColor, maxColor); - } - else - { - RgbaColor8 minColor = new RgbaColor8(255, 255, 255, 255); - RgbaColor8 maxColor = default; - - for (int i = 0; i < tile.Length; i++) - { - RgbaColor8 color = RgbaColor8.FromUInt32(tile[i]); - - minColor.R = Math.Min(minColor.R, color.R); - minColor.G = Math.Min(minColor.G, color.G); - minColor.B = Math.Min(minColor.B, color.B); - minColor.A = Math.Min(minColor.A, color.A); - - maxColor.R = Math.Max(maxColor.R, color.R); - maxColor.G = Math.Max(maxColor.G, color.G); - maxColor.B = Math.Max(maxColor.B, color.B); - maxColor.A = Math.Max(maxColor.A, color.A); - } - - return (minColor, maxColor); - } - } - - public static void GetMinMaxColors( - ReadOnlySpan<byte> partitionTable, - ReadOnlySpan<uint> tile, - int w, - int h, - Span<RgbaColor8> minColors, - Span<RgbaColor8> maxColors, - int subsetCount) - { - if (Sse41.IsSupported && w == 4 && h == 4) - { - if (subsetCount == 1) - { - GetMinMaxColorsOneSubset4x4Sse41(tile, out minColors[0], out maxColors[0]); - return; - } - else if (subsetCount == 2) - { - GetMinMaxColorsTwoSubsets4x4Sse41(partitionTable, tile, minColors, maxColors); - return; - } - } - - minColors.Fill(new RgbaColor8(255, 255, 255, 255)); - - int i = 0; - for (int ty = 0; ty < h; ty++) - { - for (int tx = 0; tx < w; tx++) - { - int subset = partitionTable[ty * w + tx]; - RgbaColor8 color = RgbaColor8.FromUInt32(tile[i++]); - - minColors[subset].R = Math.Min(minColors[subset].R, color.R); - minColors[subset].G = Math.Min(minColors[subset].G, color.G); - minColors[subset].B = Math.Min(minColors[subset].B, color.B); - minColors[subset].A = Math.Min(minColors[subset].A, color.A); - - maxColors[subset].R = Math.Max(maxColors[subset].R, color.R); - maxColors[subset].G = Math.Max(maxColors[subset].G, color.G); - maxColors[subset].B = Math.Max(maxColors[subset].B, color.B); - maxColors[subset].A = Math.Max(maxColors[subset].A, color.A); - } - } - } - - private static unsafe void GetMinMaxColorsOneSubset4x4Sse41(ReadOnlySpan<uint> tile, out RgbaColor8 minColor, out RgbaColor8 maxColor) - { - Vector128<byte> min = Vector128<byte>.AllBitsSet; - Vector128<byte> max = Vector128<byte>.Zero; - Vector128<byte> row0, row1, row2, row3; - - fixed (uint* pTile = tile) - { - row0 = Sse2.LoadVector128(pTile).AsByte(); - row1 = Sse2.LoadVector128(pTile + 4).AsByte(); - row2 = Sse2.LoadVector128(pTile + 8).AsByte(); - row3 = Sse2.LoadVector128(pTile + 12).AsByte(); - } - - min = Sse2.Min(min, row0); - max = Sse2.Max(max, row0); - min = Sse2.Min(min, row1); - max = Sse2.Max(max, row1); - min = Sse2.Min(min, row2); - max = Sse2.Max(max, row2); - min = Sse2.Min(min, row3); - max = Sse2.Max(max, row3); - - minColor = HorizontalMin(min); - maxColor = HorizontalMax(max); - } - - private static unsafe void GetMinMaxColorsTwoSubsets4x4Sse41( - ReadOnlySpan<byte> partitionTable, - ReadOnlySpan<uint> tile, - Span<RgbaColor8> minColors, - Span<RgbaColor8> maxColors) - { - Vector128<byte> partitionMask; - - fixed (byte* pPartitionTable = partitionTable) - { - partitionMask = Sse2.LoadVector128(pPartitionTable); - } - - Vector128<byte> subset0Mask = Sse2.CompareEqual(partitionMask, Vector128<byte>.Zero); - - Vector128<byte> subset0MaskRep16Low = Sse2.UnpackLow(subset0Mask, subset0Mask); - Vector128<byte> subset0MaskRep16High = Sse2.UnpackHigh(subset0Mask, subset0Mask); - - Vector128<byte> subset0Mask0 = Sse2.UnpackLow(subset0MaskRep16Low.AsInt16(), subset0MaskRep16Low.AsInt16()).AsByte(); - Vector128<byte> subset0Mask1 = Sse2.UnpackHigh(subset0MaskRep16Low.AsInt16(), subset0MaskRep16Low.AsInt16()).AsByte(); - Vector128<byte> subset0Mask2 = Sse2.UnpackLow(subset0MaskRep16High.AsInt16(), subset0MaskRep16High.AsInt16()).AsByte(); - Vector128<byte> subset0Mask3 = Sse2.UnpackHigh(subset0MaskRep16High.AsInt16(), subset0MaskRep16High.AsInt16()).AsByte(); - - Vector128<byte> min0 = Vector128<byte>.AllBitsSet; - Vector128<byte> min1 = Vector128<byte>.AllBitsSet; - Vector128<byte> max0 = Vector128<byte>.Zero; - Vector128<byte> max1 = Vector128<byte>.Zero; - - Vector128<byte> row0, row1, row2, row3; - - fixed (uint* pTile = tile) - { - row0 = Sse2.LoadVector128(pTile).AsByte(); - row1 = Sse2.LoadVector128(pTile + 4).AsByte(); - row2 = Sse2.LoadVector128(pTile + 8).AsByte(); - row3 = Sse2.LoadVector128(pTile + 12).AsByte(); - } - - min0 = Sse2.Min(min0, Sse41.BlendVariable(min0, row0, subset0Mask0)); - min0 = Sse2.Min(min0, Sse41.BlendVariable(min0, row1, subset0Mask1)); - min0 = Sse2.Min(min0, Sse41.BlendVariable(min0, row2, subset0Mask2)); - min0 = Sse2.Min(min0, Sse41.BlendVariable(min0, row3, subset0Mask3)); - - min1 = Sse2.Min(min1, Sse2.Or(row0, subset0Mask0)); - min1 = Sse2.Min(min1, Sse2.Or(row1, subset0Mask1)); - min1 = Sse2.Min(min1, Sse2.Or(row2, subset0Mask2)); - min1 = Sse2.Min(min1, Sse2.Or(row3, subset0Mask3)); - - max0 = Sse2.Max(max0, Sse2.And(row0, subset0Mask0)); - max0 = Sse2.Max(max0, Sse2.And(row1, subset0Mask1)); - max0 = Sse2.Max(max0, Sse2.And(row2, subset0Mask2)); - max0 = Sse2.Max(max0, Sse2.And(row3, subset0Mask3)); - - max1 = Sse2.Max(max1, Sse2.AndNot(subset0Mask0, row0)); - max1 = Sse2.Max(max1, Sse2.AndNot(subset0Mask1, row1)); - max1 = Sse2.Max(max1, Sse2.AndNot(subset0Mask2, row2)); - max1 = Sse2.Max(max1, Sse2.AndNot(subset0Mask3, row3)); - - minColors[0] = HorizontalMin(min0); - minColors[1] = HorizontalMin(min1); - maxColors[0] = HorizontalMax(max0); - maxColors[1] = HorizontalMax(max1); - } - - private static RgbaColor8 HorizontalMin(Vector128<byte> x) - { - x = Sse2.Min(x, Sse2.Shuffle(x.AsInt32(), 0x31).AsByte()); - x = Sse2.Min(x, Sse2.Shuffle(x.AsInt32(), 2).AsByte()); - return RgbaColor8.FromUInt32(x.AsUInt32().GetElement(0)); - } - - private static RgbaColor8 HorizontalMax(Vector128<byte> x) - { - x = Sse2.Max(x, Sse2.Shuffle(x.AsInt32(), 0x31).AsByte()); - x = Sse2.Max(x, Sse2.Shuffle(x.AsInt32(), 2).AsByte()); - return RgbaColor8.FromUInt32(x.AsUInt32().GetElement(0)); - } - - public static int SelectIndices( - ReadOnlySpan<uint> values, - uint endPoint0, - uint endPoint1, - int pBit0, - int pBit1, - int indexBitCount, - int indexCount, - int colorDepth, - int alphaDepth, - uint alphaMask) - { - if (Sse41.IsSupported) - { - if (indexBitCount == 2) - { - return Select2BitIndicesSse41( - values, - endPoint0, - endPoint1, - pBit0, - pBit1, - indexBitCount, - indexCount, - colorDepth, - alphaDepth, - alphaMask); - } - else if (indexBitCount == 3) - { - return Select3BitIndicesSse41( - values, - endPoint0, - endPoint1, - pBit0, - pBit1, - indexBitCount, - indexCount, - colorDepth, - alphaDepth, - alphaMask); - } - else if (indexBitCount == 4) - { - return Select4BitIndicesOneSubsetSse41( - values, - endPoint0, - endPoint1, - pBit0, - pBit1, - indexBitCount, - indexCount, - colorDepth, - alphaDepth, - alphaMask); - } - } - - return SelectIndicesFallback( - values, - endPoint0, - endPoint1, - pBit0, - pBit1, - indexBitCount, - indexCount, - colorDepth, - alphaDepth, - alphaMask); - } - - private static unsafe int Select2BitIndicesSse41( - ReadOnlySpan<uint> values, - uint endPoint0, - uint endPoint1, - int pBit0, - int pBit1, - int indexBitCount, - int indexCount, - int colorDepth, - int alphaDepth, - uint alphaMask) - { - uint alphaMaskForPalette = alphaMask; - - if (alphaDepth == 0) - { - alphaMaskForPalette |= new RgbaColor8(0, 0, 0, 255).ToUInt32(); - } - - int errorSum = 0; - - RgbaColor8 c0 = Quantize(RgbaColor8.FromUInt32(endPoint0), colorDepth, alphaDepth, pBit0); - RgbaColor8 c1 = Quantize(RgbaColor8.FromUInt32(endPoint1), colorDepth, alphaDepth, pBit1); - - Vector128<byte> c0Rep = Vector128.Create(c0.ToUInt32() | alphaMaskForPalette).AsByte(); - Vector128<byte> c1Rep = Vector128.Create(c1.ToUInt32() | alphaMaskForPalette).AsByte(); - - Vector128<byte> c0c1 = Sse2.UnpackLow(c0Rep, c1Rep); - - Vector128<byte> rWeights; - Vector128<byte> lWeights; - - fixed (byte* pWeights = BC67Tables.Weights[0], pInvWeights = BC67Tables.InverseWeights[0]) - { - rWeights = Sse2.LoadScalarVector128((uint*)pWeights).AsByte(); - lWeights = Sse2.LoadScalarVector128((uint*)pInvWeights).AsByte(); - } - - Vector128<byte> iWeights = Sse2.UnpackLow(lWeights, rWeights); - Vector128<byte> iWeights01 = Sse2.UnpackLow(iWeights.AsInt16(), iWeights.AsInt16()).AsByte(); - Vector128<byte> iWeights0 = Sse2.UnpackLow(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte(); - Vector128<byte> iWeights1 = Sse2.UnpackHigh(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte(); - - Vector128<short> pal0 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights0.AsSByte())); - Vector128<short> pal1 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights1.AsSByte())); - - for (int i = 0; i < values.Length; i++) - { - uint c = values[i] | alphaMask; - - Vector128<short> color = Sse41.ConvertToVector128Int16(Vector128.Create(c).AsByte()); - - Vector128<short> delta0 = Sse2.Subtract(color, pal0); - Vector128<short> delta1 = Sse2.Subtract(color, pal1); - - Vector128<int> deltaSum0 = Sse2.MultiplyAddAdjacent(delta0, delta0); - Vector128<int> deltaSum1 = Sse2.MultiplyAddAdjacent(delta1, delta1); - - Vector128<int> deltaSum01 = Ssse3.HorizontalAdd(deltaSum0, deltaSum1); - - Vector128<ushort> delta = Sse41.PackUnsignedSaturate(deltaSum01, deltaSum01); - - Vector128<ushort> min = Sse41.MinHorizontal(delta); - - ushort error = min.GetElement(0); - - errorSum += error; - } - - return errorSum; - } - - private static unsafe int Select3BitIndicesSse41( - ReadOnlySpan<uint> values, - uint endPoint0, - uint endPoint1, - int pBit0, - int pBit1, - int indexBitCount, - int indexCount, - int colorDepth, - int alphaDepth, - uint alphaMask) - { - uint alphaMaskForPalette = alphaMask; - - if (alphaDepth == 0) - { - alphaMaskForPalette |= new RgbaColor8(0, 0, 0, 255).ToUInt32(); - } - - int errorSum = 0; - - RgbaColor8 c0 = Quantize(RgbaColor8.FromUInt32(endPoint0), colorDepth, alphaDepth, pBit0); - RgbaColor8 c1 = Quantize(RgbaColor8.FromUInt32(endPoint1), colorDepth, alphaDepth, pBit1); - - Vector128<byte> c0Rep = Vector128.Create(c0.ToUInt32() | alphaMaskForPalette).AsByte(); - Vector128<byte> c1Rep = Vector128.Create(c1.ToUInt32() | alphaMaskForPalette).AsByte(); - - Vector128<byte> c0c1 = Sse2.UnpackLow(c0Rep, c1Rep); - - Vector128<byte> rWeights; - Vector128<byte> lWeights; - - fixed (byte* pWeights = BC67Tables.Weights[1], pInvWeights = BC67Tables.InverseWeights[1]) - { - rWeights = Sse2.LoadScalarVector128((ulong*)pWeights).AsByte(); - lWeights = Sse2.LoadScalarVector128((ulong*)pInvWeights).AsByte(); - } - - Vector128<byte> iWeights = Sse2.UnpackLow(lWeights, rWeights); - Vector128<byte> iWeights01 = Sse2.UnpackLow(iWeights.AsInt16(), iWeights.AsInt16()).AsByte(); - Vector128<byte> iWeights23 = Sse2.UnpackHigh(iWeights.AsInt16(), iWeights.AsInt16()).AsByte(); - Vector128<byte> iWeights0 = Sse2.UnpackLow(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte(); - Vector128<byte> iWeights1 = Sse2.UnpackHigh(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte(); - Vector128<byte> iWeights2 = Sse2.UnpackLow(iWeights23.AsInt16(), iWeights23.AsInt16()).AsByte(); - Vector128<byte> iWeights3 = Sse2.UnpackHigh(iWeights23.AsInt16(), iWeights23.AsInt16()).AsByte(); - - Vector128<short> pal0 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights0.AsSByte())); - Vector128<short> pal1 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights1.AsSByte())); - Vector128<short> pal2 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights2.AsSByte())); - Vector128<short> pal3 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights3.AsSByte())); - - for (int i = 0; i < values.Length; i++) - { - uint c = values[i] | alphaMask; - - Vector128<short> color = Sse41.ConvertToVector128Int16(Vector128.Create(c).AsByte()); - - Vector128<short> delta0 = Sse2.Subtract(color, pal0); - Vector128<short> delta1 = Sse2.Subtract(color, pal1); - Vector128<short> delta2 = Sse2.Subtract(color, pal2); - Vector128<short> delta3 = Sse2.Subtract(color, pal3); - - Vector128<int> deltaSum0 = Sse2.MultiplyAddAdjacent(delta0, delta0); - Vector128<int> deltaSum1 = Sse2.MultiplyAddAdjacent(delta1, delta1); - Vector128<int> deltaSum2 = Sse2.MultiplyAddAdjacent(delta2, delta2); - Vector128<int> deltaSum3 = Sse2.MultiplyAddAdjacent(delta3, delta3); - - Vector128<int> deltaSum01 = Ssse3.HorizontalAdd(deltaSum0, deltaSum1); - Vector128<int> deltaSum23 = Ssse3.HorizontalAdd(deltaSum2, deltaSum3); - - Vector128<ushort> delta = Sse41.PackUnsignedSaturate(deltaSum01, deltaSum23); - - Vector128<ushort> min = Sse41.MinHorizontal(delta); - - ushort error = min.GetElement(0); - - errorSum += error; - } - - return errorSum; - } - - private static unsafe int Select4BitIndicesOneSubsetSse41( - ReadOnlySpan<uint> values, - uint endPoint0, - uint endPoint1, - int pBit0, - int pBit1, - int indexBitCount, - int indexCount, - int colorDepth, - int alphaDepth, - uint alphaMask) - { - uint alphaMaskForPalette = alphaMask; - - if (alphaDepth == 0) - { - alphaMaskForPalette |= new RgbaColor8(0, 0, 0, 255).ToUInt32(); - } - - int errorSum = 0; - - RgbaColor8 c0 = Quantize(RgbaColor8.FromUInt32(endPoint0), colorDepth, alphaDepth, pBit0); - RgbaColor8 c1 = Quantize(RgbaColor8.FromUInt32(endPoint1), colorDepth, alphaDepth, pBit1); - - Vector128<byte> c0Rep = Vector128.Create(c0.ToUInt32() | alphaMaskForPalette).AsByte(); - Vector128<byte> c1Rep = Vector128.Create(c1.ToUInt32() | alphaMaskForPalette).AsByte(); - - Vector128<byte> c0c1 = Sse2.UnpackLow(c0Rep, c1Rep); - - Vector128<byte> rWeights; - Vector128<byte> lWeights; - - fixed (byte* pWeights = BC67Tables.Weights[2], pInvWeights = BC67Tables.InverseWeights[2]) - { - rWeights = Sse2.LoadVector128(pWeights); - lWeights = Sse2.LoadVector128(pInvWeights); - } - - Vector128<byte> iWeightsLow = Sse2.UnpackLow(lWeights, rWeights); - Vector128<byte> iWeightsHigh = Sse2.UnpackHigh(lWeights, rWeights); - Vector128<byte> iWeights01 = Sse2.UnpackLow(iWeightsLow.AsInt16(), iWeightsLow.AsInt16()).AsByte(); - Vector128<byte> iWeights23 = Sse2.UnpackHigh(iWeightsLow.AsInt16(), iWeightsLow.AsInt16()).AsByte(); - Vector128<byte> iWeights45 = Sse2.UnpackLow(iWeightsHigh.AsInt16(), iWeightsHigh.AsInt16()).AsByte(); - Vector128<byte> iWeights67 = Sse2.UnpackHigh(iWeightsHigh.AsInt16(), iWeightsHigh.AsInt16()).AsByte(); - Vector128<byte> iWeights0 = Sse2.UnpackLow(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte(); - Vector128<byte> iWeights1 = Sse2.UnpackHigh(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte(); - Vector128<byte> iWeights2 = Sse2.UnpackLow(iWeights23.AsInt16(), iWeights23.AsInt16()).AsByte(); - Vector128<byte> iWeights3 = Sse2.UnpackHigh(iWeights23.AsInt16(), iWeights23.AsInt16()).AsByte(); - Vector128<byte> iWeights4 = Sse2.UnpackLow(iWeights45.AsInt16(), iWeights45.AsInt16()).AsByte(); - Vector128<byte> iWeights5 = Sse2.UnpackHigh(iWeights45.AsInt16(), iWeights45.AsInt16()).AsByte(); - Vector128<byte> iWeights6 = Sse2.UnpackLow(iWeights67.AsInt16(), iWeights67.AsInt16()).AsByte(); - Vector128<byte> iWeights7 = Sse2.UnpackHigh(iWeights67.AsInt16(), iWeights67.AsInt16()).AsByte(); - - Vector128<short> pal0 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights0.AsSByte())); - Vector128<short> pal1 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights1.AsSByte())); - Vector128<short> pal2 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights2.AsSByte())); - Vector128<short> pal3 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights3.AsSByte())); - Vector128<short> pal4 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights4.AsSByte())); - Vector128<short> pal5 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights5.AsSByte())); - Vector128<short> pal6 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights6.AsSByte())); - Vector128<short> pal7 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights7.AsSByte())); - - for (int i = 0; i < values.Length; i++) - { - uint c = values[i] | alphaMask; - - Vector128<short> color = Sse41.ConvertToVector128Int16(Vector128.Create(c).AsByte()); - - Vector128<short> delta0 = Sse2.Subtract(color, pal0); - Vector128<short> delta1 = Sse2.Subtract(color, pal1); - Vector128<short> delta2 = Sse2.Subtract(color, pal2); - Vector128<short> delta3 = Sse2.Subtract(color, pal3); - Vector128<short> delta4 = Sse2.Subtract(color, pal4); - Vector128<short> delta5 = Sse2.Subtract(color, pal5); - Vector128<short> delta6 = Sse2.Subtract(color, pal6); - Vector128<short> delta7 = Sse2.Subtract(color, pal7); - - Vector128<int> deltaSum0 = Sse2.MultiplyAddAdjacent(delta0, delta0); - Vector128<int> deltaSum1 = Sse2.MultiplyAddAdjacent(delta1, delta1); - Vector128<int> deltaSum2 = Sse2.MultiplyAddAdjacent(delta2, delta2); - Vector128<int> deltaSum3 = Sse2.MultiplyAddAdjacent(delta3, delta3); - Vector128<int> deltaSum4 = Sse2.MultiplyAddAdjacent(delta4, delta4); - Vector128<int> deltaSum5 = Sse2.MultiplyAddAdjacent(delta5, delta5); - Vector128<int> deltaSum6 = Sse2.MultiplyAddAdjacent(delta6, delta6); - Vector128<int> deltaSum7 = Sse2.MultiplyAddAdjacent(delta7, delta7); - - Vector128<int> deltaSum01 = Ssse3.HorizontalAdd(deltaSum0, deltaSum1); - Vector128<int> deltaSum23 = Ssse3.HorizontalAdd(deltaSum2, deltaSum3); - Vector128<int> deltaSum45 = Ssse3.HorizontalAdd(deltaSum4, deltaSum5); - Vector128<int> deltaSum67 = Ssse3.HorizontalAdd(deltaSum6, deltaSum7); - - Vector128<ushort> delta0123 = Sse41.PackUnsignedSaturate(deltaSum01, deltaSum23); - Vector128<ushort> delta4567 = Sse41.PackUnsignedSaturate(deltaSum45, deltaSum67); - - Vector128<ushort> min0123 = Sse41.MinHorizontal(delta0123); - Vector128<ushort> min4567 = Sse41.MinHorizontal(delta4567); - - ushort minPos0123 = min0123.GetElement(0); - ushort minPos4567 = min4567.GetElement(0); - - if (minPos4567 < minPos0123) - { - errorSum += minPos4567; - } - else - { - errorSum += minPos0123; - } - } - - return errorSum; - } - - private static int SelectIndicesFallback( - ReadOnlySpan<uint> values, - uint endPoint0, - uint endPoint1, - int pBit0, - int pBit1, - int indexBitCount, - int indexCount, - int colorDepth, - int alphaDepth, - uint alphaMask) - { - int errorSum = 0; - - uint alphaMaskForPalette = alphaMask; - - if (alphaDepth == 0) - { - alphaMaskForPalette |= new RgbaColor8(0, 0, 0, 255).ToUInt32(); - } - - Span<uint> palette = stackalloc uint[indexCount]; - - RgbaColor8 c0 = Quantize(RgbaColor8.FromUInt32(endPoint0), colorDepth, alphaDepth, pBit0); - RgbaColor8 c1 = Quantize(RgbaColor8.FromUInt32(endPoint1), colorDepth, alphaDepth, pBit1); - - Unsafe.As<RgbaColor8, uint>(ref c0) |= alphaMaskForPalette; - Unsafe.As<RgbaColor8, uint>(ref c1) |= alphaMaskForPalette; - - palette[0] = c0.ToUInt32(); - palette[indexCount - 1] = c1.ToUInt32(); - - for (int j = 1; j < indexCount - 1; j++) - { - palette[j] = Interpolate(c0, c1, j, indexBitCount).ToUInt32(); - } - - for (int i = 0; i < values.Length; i++) - { - uint color = values[i] | alphaMask; - - int bestMatchScore = int.MaxValue; - int bestMatchIndex = 0; - - for (int j = 0; j < indexCount; j++) - { - int score = SquaredDifference( - RgbaColor8.FromUInt32(color).GetColor32(), - RgbaColor8.FromUInt32(palette[j]).GetColor32()); - - if (score < bestMatchScore) - { - bestMatchScore = score; - bestMatchIndex = j; - } - } - - errorSum += bestMatchScore; - } - - return errorSum; - } - - public static int SelectIndices( - ReadOnlySpan<uint> tile, - int w, - int h, - ReadOnlySpan<uint> endPoints0, - ReadOnlySpan<uint> endPoints1, - ReadOnlySpan<int> pBitValues, - Span<byte> indices, - int subsetCount, - int partition, - int indexBitCount, - int indexCount, - int colorDepth, - int alphaDepth, - int pBits, - uint alphaMask) - { - if (Sse41.IsSupported) - { - if (indexBitCount == 2) - { - return Select2BitIndicesSse41( - tile, - w, - h, - endPoints0, - endPoints1, - pBitValues, - indices, - subsetCount, - partition, - colorDepth, - alphaDepth, - pBits, - alphaMask); - } - else if (indexBitCount == 3) - { - return Select3BitIndicesSse41( - tile, - w, - h, - endPoints0, - endPoints1, - pBitValues, - indices, - subsetCount, - partition, - colorDepth, - alphaDepth, - pBits, - alphaMask); - } - else if (indexBitCount == 4) - { - Debug.Assert(subsetCount == 1); - - return Select4BitIndicesOneSubsetSse41( - tile, - w, - h, - endPoints0[0], - endPoints1[0], - pBitValues, - indices, - partition, - colorDepth, - alphaDepth, - pBits, - alphaMask); - } - } - - return SelectIndicesFallback( - tile, - w, - h, - endPoints0, - endPoints1, - pBitValues, - indices, - subsetCount, - partition, - indexBitCount, - indexCount, - colorDepth, - alphaDepth, - pBits, - alphaMask); - } - - private static unsafe int Select2BitIndicesSse41( - ReadOnlySpan<uint> tile, - int w, - int h, - ReadOnlySpan<uint> endPoints0, - ReadOnlySpan<uint> endPoints1, - ReadOnlySpan<int> pBitValues, - Span<byte> indices, - int subsetCount, - int partition, - int colorDepth, - int alphaDepth, - int pBits, - uint alphaMask) - { - byte[] partitionTable = BC67Tables.PartitionTable[subsetCount - 1][partition]; - - uint alphaMaskForPalette = alphaMask; - - if (alphaDepth == 0) - { - alphaMaskForPalette |= new RgbaColor8(0, 0, 0, 255).ToUInt32(); - } - - int errorSum = 0; - - for (int subset = 0; subset < subsetCount; subset++) - { - int pBit0 = -1, pBit1 = -1; - - if (pBits == subsetCount) - { - pBit0 = pBit1 = pBitValues[subset]; - } - else if (pBits != 0) - { - pBit0 = pBitValues[subset * 2]; - pBit1 = pBitValues[subset * 2 + 1]; - } - - RgbaColor8 c0 = Quantize(RgbaColor8.FromUInt32(endPoints0[subset]), colorDepth, alphaDepth, pBit0); - RgbaColor8 c1 = Quantize(RgbaColor8.FromUInt32(endPoints1[subset]), colorDepth, alphaDepth, pBit1); - - Vector128<byte> c0Rep = Vector128.Create(c0.ToUInt32() | alphaMaskForPalette).AsByte(); - Vector128<byte> c1Rep = Vector128.Create(c1.ToUInt32() | alphaMaskForPalette).AsByte(); - - Vector128<byte> c0c1 = Sse2.UnpackLow(c0Rep, c1Rep); - - Vector128<byte> rWeights; - Vector128<byte> lWeights; - - fixed (byte* pWeights = BC67Tables.Weights[0], pInvWeights = BC67Tables.InverseWeights[0]) - { - rWeights = Sse2.LoadScalarVector128((uint*)pWeights).AsByte(); - lWeights = Sse2.LoadScalarVector128((uint*)pInvWeights).AsByte(); - } - - Vector128<byte> iWeights = Sse2.UnpackLow(lWeights, rWeights); - Vector128<byte> iWeights01 = Sse2.UnpackLow(iWeights.AsInt16(), iWeights.AsInt16()).AsByte(); - Vector128<byte> iWeights0 = Sse2.UnpackLow(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte(); - Vector128<byte> iWeights1 = Sse2.UnpackHigh(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte(); - - Vector128<short> pal0 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights0.AsSByte())); - Vector128<short> pal1 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights1.AsSByte())); - - int i = 0; - for (int ty = 0; ty < h; ty++) - { - for (int tx = 0; tx < w; tx++, i++) - { - int tileOffset = ty * 4 + tx; - if (partitionTable[tileOffset] != subset) - { - continue; - } - - uint c = tile[i] | alphaMask; - - Vector128<short> color = Sse41.ConvertToVector128Int16(Vector128.Create(c).AsByte()); - - Vector128<short> delta0 = Sse2.Subtract(color, pal0); - Vector128<short> delta1 = Sse2.Subtract(color, pal1); - - Vector128<int> deltaSum0 = Sse2.MultiplyAddAdjacent(delta0, delta0); - Vector128<int> deltaSum1 = Sse2.MultiplyAddAdjacent(delta1, delta1); - - Vector128<int> deltaSum01 = Ssse3.HorizontalAdd(deltaSum0, deltaSum1); - - Vector128<ushort> delta = Sse41.PackUnsignedSaturate(deltaSum01, deltaSum01); - - Vector128<ushort> min = Sse41.MinHorizontal(delta); - - uint minPos = min.AsUInt32().GetElement(0); - ushort error = (ushort)minPos; - uint index = minPos >> 16; - - indices[tileOffset] = (byte)index; - errorSum += error; - } - } - } - - return errorSum; - } - - private static unsafe int Select3BitIndicesSse41( - ReadOnlySpan<uint> tile, - int w, - int h, - ReadOnlySpan<uint> endPoints0, - ReadOnlySpan<uint> endPoints1, - ReadOnlySpan<int> pBitValues, - Span<byte> indices, - int subsetCount, - int partition, - int colorDepth, - int alphaDepth, - int pBits, - uint alphaMask) - { - byte[] partitionTable = BC67Tables.PartitionTable[subsetCount - 1][partition]; - - uint alphaMaskForPalette = alphaMask; - - if (alphaDepth == 0) - { - alphaMaskForPalette |= new RgbaColor8(0, 0, 0, 255).ToUInt32(); - } - - int errorSum = 0; - - for (int subset = 0; subset < subsetCount; subset++) - { - int pBit0 = -1, pBit1 = -1; - - if (pBits == subsetCount) - { - pBit0 = pBit1 = pBitValues[subset]; - } - else if (pBits != 0) - { - pBit0 = pBitValues[subset * 2]; - pBit1 = pBitValues[subset * 2 + 1]; - } - - RgbaColor8 c0 = Quantize(RgbaColor8.FromUInt32(endPoints0[subset]), colorDepth, alphaDepth, pBit0); - RgbaColor8 c1 = Quantize(RgbaColor8.FromUInt32(endPoints1[subset]), colorDepth, alphaDepth, pBit1); - - Vector128<byte> c0Rep = Vector128.Create(c0.ToUInt32() | alphaMaskForPalette).AsByte(); - Vector128<byte> c1Rep = Vector128.Create(c1.ToUInt32() | alphaMaskForPalette).AsByte(); - - Vector128<byte> c0c1 = Sse2.UnpackLow(c0Rep, c1Rep); - - Vector128<byte> rWeights; - Vector128<byte> lWeights; - - fixed (byte* pWeights = BC67Tables.Weights[1], pInvWeights = BC67Tables.InverseWeights[1]) - { - rWeights = Sse2.LoadScalarVector128((ulong*)pWeights).AsByte(); - lWeights = Sse2.LoadScalarVector128((ulong*)pInvWeights).AsByte(); - } - - Vector128<byte> iWeights = Sse2.UnpackLow(lWeights, rWeights); - Vector128<byte> iWeights01 = Sse2.UnpackLow(iWeights.AsInt16(), iWeights.AsInt16()).AsByte(); - Vector128<byte> iWeights23 = Sse2.UnpackHigh(iWeights.AsInt16(), iWeights.AsInt16()).AsByte(); - Vector128<byte> iWeights0 = Sse2.UnpackLow(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte(); - Vector128<byte> iWeights1 = Sse2.UnpackHigh(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte(); - Vector128<byte> iWeights2 = Sse2.UnpackLow(iWeights23.AsInt16(), iWeights23.AsInt16()).AsByte(); - Vector128<byte> iWeights3 = Sse2.UnpackHigh(iWeights23.AsInt16(), iWeights23.AsInt16()).AsByte(); - - Vector128<short> pal0 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights0.AsSByte())); - Vector128<short> pal1 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights1.AsSByte())); - Vector128<short> pal2 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights2.AsSByte())); - Vector128<short> pal3 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights3.AsSByte())); - - int i = 0; - for (int ty = 0; ty < h; ty++) - { - for (int tx = 0; tx < w; tx++, i++) - { - int tileOffset = ty * 4 + tx; - if (partitionTable[tileOffset] != subset) - { - continue; - } - - uint c = tile[i] | alphaMask; - - Vector128<short> color = Sse41.ConvertToVector128Int16(Vector128.Create(c).AsByte()); - - Vector128<short> delta0 = Sse2.Subtract(color, pal0); - Vector128<short> delta1 = Sse2.Subtract(color, pal1); - Vector128<short> delta2 = Sse2.Subtract(color, pal2); - Vector128<short> delta3 = Sse2.Subtract(color, pal3); - - Vector128<int> deltaSum0 = Sse2.MultiplyAddAdjacent(delta0, delta0); - Vector128<int> deltaSum1 = Sse2.MultiplyAddAdjacent(delta1, delta1); - Vector128<int> deltaSum2 = Sse2.MultiplyAddAdjacent(delta2, delta2); - Vector128<int> deltaSum3 = Sse2.MultiplyAddAdjacent(delta3, delta3); - - Vector128<int> deltaSum01 = Ssse3.HorizontalAdd(deltaSum0, deltaSum1); - Vector128<int> deltaSum23 = Ssse3.HorizontalAdd(deltaSum2, deltaSum3); - - Vector128<ushort> delta = Sse41.PackUnsignedSaturate(deltaSum01, deltaSum23); - - Vector128<ushort> min = Sse41.MinHorizontal(delta); - - uint minPos = min.AsUInt32().GetElement(0); - ushort error = (ushort)minPos; - uint index = minPos >> 16; - - indices[tileOffset] = (byte)index; - errorSum += error; - } - } - } - - return errorSum; - } - - private static unsafe int Select4BitIndicesOneSubsetSse41( - ReadOnlySpan<uint> tile, - int w, - int h, - uint endPoint0, - uint endPoint1, - ReadOnlySpan<int> pBitValues, - Span<byte> indices, - int partition, - int colorDepth, - int alphaDepth, - int pBits, - uint alphaMask) - { - uint alphaMaskForPalette = alphaMask; - - if (alphaDepth == 0) - { - alphaMaskForPalette |= new RgbaColor8(0, 0, 0, 255).ToUInt32(); - } - - int errorSum = 0; - - int pBit0 = -1, pBit1 = -1; - - if (pBits != 0) - { - pBit0 = pBitValues[0]; - pBit1 = pBitValues[1]; - } - - RgbaColor8 c0 = Quantize(RgbaColor8.FromUInt32(endPoint0), colorDepth, alphaDepth, pBit0); - RgbaColor8 c1 = Quantize(RgbaColor8.FromUInt32(endPoint1), colorDepth, alphaDepth, pBit1); - - Vector128<byte> c0Rep = Vector128.Create(c0.ToUInt32() | alphaMaskForPalette).AsByte(); - Vector128<byte> c1Rep = Vector128.Create(c1.ToUInt32() | alphaMaskForPalette).AsByte(); - - Vector128<byte> c0c1 = Sse2.UnpackLow(c0Rep, c1Rep); - - Vector128<byte> rWeights; - Vector128<byte> lWeights; - - fixed (byte* pWeights = BC67Tables.Weights[2], pInvWeights = BC67Tables.InverseWeights[2]) - { - rWeights = Sse2.LoadVector128(pWeights); - lWeights = Sse2.LoadVector128(pInvWeights); - } - - Vector128<byte> iWeightsLow = Sse2.UnpackLow(lWeights, rWeights); - Vector128<byte> iWeightsHigh = Sse2.UnpackHigh(lWeights, rWeights); - Vector128<byte> iWeights01 = Sse2.UnpackLow(iWeightsLow.AsInt16(), iWeightsLow.AsInt16()).AsByte(); - Vector128<byte> iWeights23 = Sse2.UnpackHigh(iWeightsLow.AsInt16(), iWeightsLow.AsInt16()).AsByte(); - Vector128<byte> iWeights45 = Sse2.UnpackLow(iWeightsHigh.AsInt16(), iWeightsHigh.AsInt16()).AsByte(); - Vector128<byte> iWeights67 = Sse2.UnpackHigh(iWeightsHigh.AsInt16(), iWeightsHigh.AsInt16()).AsByte(); - Vector128<byte> iWeights0 = Sse2.UnpackLow(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte(); - Vector128<byte> iWeights1 = Sse2.UnpackHigh(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte(); - Vector128<byte> iWeights2 = Sse2.UnpackLow(iWeights23.AsInt16(), iWeights23.AsInt16()).AsByte(); - Vector128<byte> iWeights3 = Sse2.UnpackHigh(iWeights23.AsInt16(), iWeights23.AsInt16()).AsByte(); - Vector128<byte> iWeights4 = Sse2.UnpackLow(iWeights45.AsInt16(), iWeights45.AsInt16()).AsByte(); - Vector128<byte> iWeights5 = Sse2.UnpackHigh(iWeights45.AsInt16(), iWeights45.AsInt16()).AsByte(); - Vector128<byte> iWeights6 = Sse2.UnpackLow(iWeights67.AsInt16(), iWeights67.AsInt16()).AsByte(); - Vector128<byte> iWeights7 = Sse2.UnpackHigh(iWeights67.AsInt16(), iWeights67.AsInt16()).AsByte(); - - Vector128<short> pal0 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights0.AsSByte())); - Vector128<short> pal1 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights1.AsSByte())); - Vector128<short> pal2 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights2.AsSByte())); - Vector128<short> pal3 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights3.AsSByte())); - Vector128<short> pal4 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights4.AsSByte())); - Vector128<short> pal5 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights5.AsSByte())); - Vector128<short> pal6 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights6.AsSByte())); - Vector128<short> pal7 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights7.AsSByte())); - - int i = 0; - for (int ty = 0; ty < h; ty++) - { - for (int tx = 0; tx < w; tx++, i++) - { - uint c = tile[i] | alphaMask; - - Vector128<short> color = Sse41.ConvertToVector128Int16(Vector128.Create(c).AsByte()); - - Vector128<short> delta0 = Sse2.Subtract(color, pal0); - Vector128<short> delta1 = Sse2.Subtract(color, pal1); - Vector128<short> delta2 = Sse2.Subtract(color, pal2); - Vector128<short> delta3 = Sse2.Subtract(color, pal3); - Vector128<short> delta4 = Sse2.Subtract(color, pal4); - Vector128<short> delta5 = Sse2.Subtract(color, pal5); - Vector128<short> delta6 = Sse2.Subtract(color, pal6); - Vector128<short> delta7 = Sse2.Subtract(color, pal7); - - Vector128<int> deltaSum0 = Sse2.MultiplyAddAdjacent(delta0, delta0); - Vector128<int> deltaSum1 = Sse2.MultiplyAddAdjacent(delta1, delta1); - Vector128<int> deltaSum2 = Sse2.MultiplyAddAdjacent(delta2, delta2); - Vector128<int> deltaSum3 = Sse2.MultiplyAddAdjacent(delta3, delta3); - Vector128<int> deltaSum4 = Sse2.MultiplyAddAdjacent(delta4, delta4); - Vector128<int> deltaSum5 = Sse2.MultiplyAddAdjacent(delta5, delta5); - Vector128<int> deltaSum6 = Sse2.MultiplyAddAdjacent(delta6, delta6); - Vector128<int> deltaSum7 = Sse2.MultiplyAddAdjacent(delta7, delta7); - - Vector128<int> deltaSum01 = Ssse3.HorizontalAdd(deltaSum0, deltaSum1); - Vector128<int> deltaSum23 = Ssse3.HorizontalAdd(deltaSum2, deltaSum3); - Vector128<int> deltaSum45 = Ssse3.HorizontalAdd(deltaSum4, deltaSum5); - Vector128<int> deltaSum67 = Ssse3.HorizontalAdd(deltaSum6, deltaSum7); - - Vector128<ushort> delta0123 = Sse41.PackUnsignedSaturate(deltaSum01, deltaSum23); - Vector128<ushort> delta4567 = Sse41.PackUnsignedSaturate(deltaSum45, deltaSum67); - - Vector128<ushort> min0123 = Sse41.MinHorizontal(delta0123); - Vector128<ushort> min4567 = Sse41.MinHorizontal(delta4567); - - uint minPos0123 = min0123.AsUInt32().GetElement(0); - uint minPos4567 = min4567.AsUInt32().GetElement(0); - - if ((ushort)minPos4567 < (ushort)minPos0123) - { - errorSum += (ushort)minPos4567; - indices[ty * 4 + tx] = (byte)(8 + (minPos4567 >> 16)); - } - else - { - errorSum += (ushort)minPos0123; - indices[ty * 4 + tx] = (byte)(minPos0123 >> 16); - } - } - } - - return errorSum; - } - - private static Vector128<short> ShiftRoundToNearest(Vector128<short> x) - { - return Sse2.ShiftRightLogical(Sse2.Add(x, Vector128.Create((short)32)), 6); - } - - private static int SelectIndicesFallback( - ReadOnlySpan<uint> tile, - int w, - int h, - ReadOnlySpan<uint> endPoints0, - ReadOnlySpan<uint> endPoints1, - ReadOnlySpan<int> pBitValues, - Span<byte> indices, - int subsetCount, - int partition, - int indexBitCount, - int indexCount, - int colorDepth, - int alphaDepth, - int pBits, - uint alphaMask) - { - int errorSum = 0; - - uint alphaMaskForPalette = alphaMask; - - if (alphaDepth == 0) - { - alphaMaskForPalette |= new RgbaColor8(0, 0, 0, 255).ToUInt32(); - } - - Span<uint> palette = stackalloc uint[subsetCount * indexCount]; - - for (int subset = 0; subset < subsetCount; subset++) - { - int palBase = subset * indexCount; - - int pBit0 = -1, pBit1 = -1; - - if (pBits == subsetCount) - { - pBit0 = pBit1 = pBitValues[subset]; - } - else if (pBits != 0) - { - pBit0 = pBitValues[subset * 2]; - pBit1 = pBitValues[subset * 2 + 1]; - } - - RgbaColor8 c0 = Quantize(RgbaColor8.FromUInt32(endPoints0[subset]), colorDepth, alphaDepth, pBit0); - RgbaColor8 c1 = Quantize(RgbaColor8.FromUInt32(endPoints1[subset]), colorDepth, alphaDepth, pBit1); - - Unsafe.As<RgbaColor8, uint>(ref c0) |= alphaMaskForPalette; - Unsafe.As<RgbaColor8, uint>(ref c1) |= alphaMaskForPalette; - - palette[palBase + 0] = c0.ToUInt32(); - palette[palBase + indexCount - 1] = c1.ToUInt32(); - - for (int j = 1; j < indexCount - 1; j++) - { - palette[palBase + j] = Interpolate(c0, c1, j, indexBitCount).ToUInt32(); - } - } - - int i = 0; - for (int ty = 0; ty < h; ty++) - { - for (int tx = 0; tx < w; tx++) - { - int subset = BC67Tables.PartitionTable[subsetCount - 1][partition][ty * 4 + tx]; - uint color = tile[i++] | alphaMask; - - int bestMatchScore = int.MaxValue; - int bestMatchIndex = 0; - - for (int j = 0; j < indexCount; j++) - { - int score = SquaredDifference( - RgbaColor8.FromUInt32(color).GetColor32(), - RgbaColor8.FromUInt32(palette[subset * indexCount + j]).GetColor32()); - - if (score < bestMatchScore) - { - bestMatchScore = score; - bestMatchIndex = j; - } - } - - indices[ty * 4 + tx] = (byte)bestMatchIndex; - errorSum += bestMatchScore; - } - } - - return errorSum; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static int SquaredDifference(RgbaColor32 color1, RgbaColor32 color2) - { - RgbaColor32 delta = color1 - color2; - return RgbaColor32.Dot(delta, delta); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static RgbaColor8 Interpolate(RgbaColor8 color1, RgbaColor8 color2, int weightIndex, int indexBitCount) - { - return Interpolate(color1.GetColor32(), color2.GetColor32(), weightIndex, indexBitCount).GetColor8(); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static RgbaColor32 Interpolate(RgbaColor32 color1, RgbaColor32 color2, int weightIndex, int indexBitCount) - { - Debug.Assert(indexBitCount >= 2 && indexBitCount <= 4); - - int weight = (((weightIndex << 7) / ((1 << indexBitCount) - 1)) + 1) >> 1; - - RgbaColor32 weightV = new RgbaColor32(weight); - RgbaColor32 invWeightV = new RgbaColor32(64 - weight); - - return (color1 * invWeightV + color2 * weightV + new RgbaColor32(32)) >> 6; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static RgbaColor32 Interpolate( - RgbaColor32 color1, - RgbaColor32 color2, - int colorWeightIndex, - int alphaWeightIndex, - int colorIndexBitCount, - int alphaIndexBitCount) - { - Debug.Assert(colorIndexBitCount >= 2 && colorIndexBitCount <= 4); - Debug.Assert(alphaIndexBitCount >= 2 && alphaIndexBitCount <= 4); - - int colorWeight = BC67Tables.Weights[colorIndexBitCount - 2][colorWeightIndex]; - int alphaWeight = BC67Tables.Weights[alphaIndexBitCount - 2][alphaWeightIndex]; - - RgbaColor32 weightV = new RgbaColor32(colorWeight); - weightV.A = alphaWeight; - RgbaColor32 invWeightV = new RgbaColor32(64) - weightV; - - return (color1 * invWeightV + color2 * weightV + new RgbaColor32(32)) >> 6; - } - - public static RgbaColor8 Quantize(RgbaColor8 color, int colorBits, int alphaBits, int pBit = -1) - { - if (alphaBits == 0) - { - int colorShift = 8 - colorBits; - - uint c; - - if (pBit >= 0) - { - byte[] lutColor = _quantizationLut[colorBits - 4]; - - Debug.Assert(pBit <= 1); - int high = pBit << 8; - uint mask = (0xffu >> (colorBits + 1)) * 0x10101; - - c = lutColor[color.R | high]; - c |= (uint)lutColor[color.G | high] << 8; - c |= (uint)lutColor[color.B | high] << 16; - - c <<= colorShift; - c |= (c >> (colorBits + 1)) & mask; - c |= ((uint)pBit * 0x10101) << (colorShift - 1); - } - else - { - byte[] lutColor = _quantizationLutNoPBit[colorBits - 4]; - - uint mask = (0xffu >> colorBits) * 0x10101; - - c = lutColor[color.R]; - c |= (uint)lutColor[color.G] << 8; - c |= (uint)lutColor[color.B] << 16; - - c <<= colorShift; - c |= (c >> colorBits) & mask; - } - - c |= (uint)color.A << 24; - - return RgbaColor8.FromUInt32(c); - } - - return QuantizeFallback(color, colorBits, alphaBits, pBit); - } - - private static RgbaColor8 QuantizeFallback(RgbaColor8 color, int colorBits, int alphaBits, int pBit = -1) - { - byte r = UnquantizeComponent(QuantizeComponent(color.R, colorBits, pBit), colorBits, pBit); - byte g = UnquantizeComponent(QuantizeComponent(color.G, colorBits, pBit), colorBits, pBit); - byte b = UnquantizeComponent(QuantizeComponent(color.B, colorBits, pBit), colorBits, pBit); - byte a = alphaBits == 0 ? color.A : UnquantizeComponent(QuantizeComponent(color.A, alphaBits, pBit), alphaBits, pBit); - return new RgbaColor8(r, g, b, a); - } - - public static byte QuantizeComponent(byte component, int bits, int pBit = -1) - { - return pBit >= 0 ? _quantizationLut[bits - 4][component | (pBit << 8)] : _quantizationLutNoPBit[bits - 4][component]; - } - - private static byte QuantizeComponentForLut(byte component, int bits, int pBit = -1) - { - int shift = 8 - bits; - int fill = component >> bits; - - if (pBit >= 0) - { - Debug.Assert(pBit <= 1); - fill >>= 1; - fill |= pBit << (shift - 1); - } - - int q1 = component >> shift; - int q2 = Math.Max(q1 - 1, 0); - int q3 = Math.Min(q1 + 1, (1 << bits) - 1); - - int delta1 = FastAbs(((q1 << shift) | fill) - component); - int delta2 = component - ((q2 << shift) | fill); - int delta3 = ((q3 << shift) | fill) - component; - - if (delta1 < delta2 && delta1 < delta3) - { - return (byte)q1; - } - else if (delta2 < delta3) - { - return (byte)q2; - } - else - { - return (byte)q3; - } - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static int FastAbs(int x) - { - int sign = x >> 31; - return (x + sign) ^ sign; - } - - private static byte UnquantizeComponent(byte component, int bits, int pBit) - { - int shift = 8 - bits; - int value = component << shift; - - if (pBit >= 0) - { - Debug.Assert(pBit <= 1); - value |= value >> (bits + 1); - value |= pBit << (shift - 1); - } - else - { - value |= value >> bits; - } - - return (byte)value; - } - } -} diff --git a/Ryujinx.Graphics.Texture/Utils/BC7ModeInfo.cs b/Ryujinx.Graphics.Texture/Utils/BC7ModeInfo.cs deleted file mode 100644 index 687df22c..00000000 --- a/Ryujinx.Graphics.Texture/Utils/BC7ModeInfo.cs +++ /dev/null @@ -1,37 +0,0 @@ -namespace Ryujinx.Graphics.Texture.Utils -{ - readonly struct BC7ModeInfo - { - public readonly int SubsetCount; - public readonly int PartitionBitCount; - public readonly int PBits; - public readonly int RotationBitCount; - public readonly int IndexModeBitCount; - public readonly int ColorIndexBitCount; - public readonly int AlphaIndexBitCount; - public readonly int ColorDepth; - public readonly int AlphaDepth; - - public BC7ModeInfo( - int subsetCount, - int partitionBitsCount, - int pBits, - int rotationBitCount, - int indexModeBitCount, - int colorIndexBitCount, - int alphaIndexBitCount, - int colorDepth, - int alphaDepth) - { - SubsetCount = subsetCount; - PartitionBitCount = partitionBitsCount; - PBits = pBits; - RotationBitCount = rotationBitCount; - IndexModeBitCount = indexModeBitCount; - ColorIndexBitCount = colorIndexBitCount; - AlphaIndexBitCount = alphaIndexBitCount; - ColorDepth = colorDepth; - AlphaDepth = alphaDepth; - } - } -}
\ No newline at end of file diff --git a/Ryujinx.Graphics.Texture/Utils/Block.cs b/Ryujinx.Graphics.Texture/Utils/Block.cs deleted file mode 100644 index a8bae077..00000000 --- a/Ryujinx.Graphics.Texture/Utils/Block.cs +++ /dev/null @@ -1,55 +0,0 @@ -namespace Ryujinx.Graphics.Texture.Utils -{ - struct Block - { - public ulong Low; - public ulong High; - - public void Encode(ulong value, ref int offset, int bits) - { - if (offset >= 64) - { - High |= value << (offset - 64); - } - else - { - Low |= value << offset; - - if (offset + bits > 64) - { - int remainder = 64 - offset; - High |= value >> remainder; - } - } - - offset += bits; - } - - public ulong Decode(ref int offset, int bits) - { - ulong value; - ulong mask = bits == 64 ? ulong.MaxValue : (1UL << bits) - 1; - - if (offset >= 64) - { - value = (High >> (offset - 64)) & mask; - } - else - { - value = Low >> offset; - - if (offset + bits > 64) - { - int remainder = 64 - offset; - value |= High << remainder; - } - - value &= mask; - } - - offset += bits; - - return value; - } - } -}
\ No newline at end of file diff --git a/Ryujinx.Graphics.Texture/Utils/RgbaColor32.cs b/Ryujinx.Graphics.Texture/Utils/RgbaColor32.cs deleted file mode 100644 index 582044d9..00000000 --- a/Ryujinx.Graphics.Texture/Utils/RgbaColor32.cs +++ /dev/null @@ -1,229 +0,0 @@ -using System; -using System.Runtime.CompilerServices; -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; - -namespace Ryujinx.Graphics.Texture.Utils -{ - struct RgbaColor32 : IEquatable<RgbaColor32> - { - private Vector128<int> _color; - - public int R - { - get => _color.GetElement(0); - set => _color = _color.WithElement(0, value); - } - - public int G - { - get => _color.GetElement(1); - set => _color = _color.WithElement(1, value); - } - - public int B - { - get => _color.GetElement(2); - set => _color = _color.WithElement(2, value); - } - - public int A - { - get => _color.GetElement(3); - set => _color = _color.WithElement(3, value); - } - - public RgbaColor32(Vector128<int> color) - { - _color = color; - } - - public RgbaColor32(int r, int g, int b, int a) - { - _color = Vector128.Create(r, g, b, a); - } - - public RgbaColor32(int scalar) - { - _color = Vector128.Create(scalar); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static RgbaColor32 operator +(RgbaColor32 x, RgbaColor32 y) - { - if (Sse2.IsSupported) - { - return new RgbaColor32(Sse2.Add(x._color, y._color)); - } - else - { - return new RgbaColor32(x.R + y.R, x.G + y.G, x.B + y.B, x.A + y.A); - } - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static RgbaColor32 operator -(RgbaColor32 x, RgbaColor32 y) - { - if (Sse2.IsSupported) - { - return new RgbaColor32(Sse2.Subtract(x._color, y._color)); - } - else - { - return new RgbaColor32(x.R - y.R, x.G - y.G, x.B - y.B, x.A - y.A); - } - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static RgbaColor32 operator *(RgbaColor32 x, RgbaColor32 y) - { - if (Sse41.IsSupported) - { - return new RgbaColor32(Sse41.MultiplyLow(x._color, y._color)); - } - else - { - return new RgbaColor32(x.R * y.R, x.G * y.G, x.B * y.B, x.A * y.A); - } - } - - public static RgbaColor32 operator /(RgbaColor32 x, RgbaColor32 y) - { - return new RgbaColor32(x.R / y.R, x.G / y.G, x.B / y.B, x.A / y.A); - } - - public static RgbaColor32 DivideGuarded(RgbaColor32 x, RgbaColor32 y, int resultIfZero) - { - return new RgbaColor32( - DivideGuarded(x.R, y.R, resultIfZero), - DivideGuarded(x.G, y.G, resultIfZero), - DivideGuarded(x.B, y.B, resultIfZero), - DivideGuarded(x.A, y.A, resultIfZero)); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static RgbaColor32 operator <<(RgbaColor32 x, int shift) - { - if (Sse2.IsSupported) - { - return new RgbaColor32(Sse2.ShiftLeftLogical(x._color, (byte)shift)); - } - else - { - return new RgbaColor32(x.R << shift, x.G << shift, x.B << shift, x.A << shift); - } - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static RgbaColor32 operator >>(RgbaColor32 x, int shift) - { - if (Sse2.IsSupported) - { - return new RgbaColor32(Sse2.ShiftRightLogical(x._color, (byte)shift)); - } - else - { - return new RgbaColor32(x.R >> shift, x.G >> shift, x.B >> shift, x.A >> shift); - } - } - - public static bool operator ==(RgbaColor32 x, RgbaColor32 y) - { - return x.Equals(y); - } - - public static bool operator !=(RgbaColor32 x, RgbaColor32 y) - { - return !x.Equals(y); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static int Dot(RgbaColor32 x, RgbaColor32 y) - { - if (Sse41.IsSupported) - { - Vector128<int> product = Sse41.MultiplyLow(x._color, y._color); - Vector128<int> sum = Ssse3.HorizontalAdd(product, product); - sum = Ssse3.HorizontalAdd(sum, sum); - return sum.GetElement(0); - } - else - { - return x.R * y.R + x.G * y.G + x.B * y.B + x.A * y.A; - } - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static RgbaColor32 Max(RgbaColor32 x, RgbaColor32 y) - { - if (Sse41.IsSupported) - { - return new RgbaColor32(Sse41.Max(x._color, y._color)); - } - else - { - return new RgbaColor32(Math.Max(x.R, y.R), Math.Max(x.G, y.G), Math.Max(x.B, y.B), Math.Max(x.A, y.A)); - } - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static RgbaColor32 Min(RgbaColor32 x, RgbaColor32 y) - { - if (Sse41.IsSupported) - { - return new RgbaColor32(Sse41.Min(x._color, y._color)); - } - else - { - return new RgbaColor32(Math.Min(x.R, y.R), Math.Min(x.G, y.G), Math.Min(x.B, y.B), Math.Min(x.A, y.A)); - } - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public RgbaColor8 GetColor8() - { - if (Sse41.IsSupported) - { - Vector128<int> temp = _color; - Vector128<ushort> color16 = Sse41.PackUnsignedSaturate(temp, temp); - Vector128<byte> color8 = Sse2.PackUnsignedSaturate(color16.AsInt16(), color16.AsInt16()); - uint color = color8.AsUInt32().GetElement(0); - return Unsafe.As<uint, RgbaColor8>(ref color); - } - else - { - return new RgbaColor8(ClampByte(R), ClampByte(G), ClampByte(B), ClampByte(A)); - } - } - - private static int DivideGuarded(int dividend, int divisor, int resultIfZero) - { - if (divisor == 0) - { - return resultIfZero; - } - - return dividend / divisor; - } - - private static byte ClampByte(int value) - { - return (byte)Math.Clamp(value, 0, 255); - } - - public override int GetHashCode() - { - return HashCode.Combine(R, G, B, A); - } - - public override bool Equals(object obj) - { - return obj is RgbaColor32 other && Equals(other); - } - - public bool Equals(RgbaColor32 other) - { - return _color.Equals(other._color); - } - } -} diff --git a/Ryujinx.Graphics.Texture/Utils/RgbaColor8.cs b/Ryujinx.Graphics.Texture/Utils/RgbaColor8.cs deleted file mode 100644 index 0edf1cce..00000000 --- a/Ryujinx.Graphics.Texture/Utils/RgbaColor8.cs +++ /dev/null @@ -1,84 +0,0 @@ -using System; -using System.Runtime.CompilerServices; -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; - -namespace Ryujinx.Graphics.Texture.Utils -{ - struct RgbaColor8 : IEquatable<RgbaColor8> - { - public byte R; - public byte G; - public byte B; - public byte A; - - public RgbaColor8(byte r, byte g, byte b, byte a) - { - R = r; - G = g; - B = b; - A = a; - } - - public static RgbaColor8 FromUInt32(uint color) - { - return Unsafe.As<uint, RgbaColor8>(ref color); - } - - public static bool operator ==(RgbaColor8 x, RgbaColor8 y) - { - return x.Equals(y); - } - - public static bool operator !=(RgbaColor8 x, RgbaColor8 y) - { - return !x.Equals(y); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public RgbaColor32 GetColor32() - { - if (Sse41.IsSupported) - { - Vector128<byte> color = Vector128.CreateScalarUnsafe(Unsafe.As<RgbaColor8, uint>(ref this)).AsByte(); - return new RgbaColor32(Sse41.ConvertToVector128Int32(color)); - } - else - { - return new RgbaColor32(R, G, B, A); - } - } - - public uint ToUInt32() - { - return Unsafe.As<RgbaColor8, uint>(ref this); - } - - public override int GetHashCode() - { - return HashCode.Combine(R, G, B, A); - } - - public override bool Equals(object obj) - { - return obj is RgbaColor8 other && Equals(other); - } - - public bool Equals(RgbaColor8 other) - { - return R == other.R && G == other.G && B == other.B && A == other.A; - } - - public byte GetComponent(int index) - { - return index switch - { - 0 => R, - 1 => G, - 2 => B, - 3 => A, - _ => throw new ArgumentOutOfRangeException(nameof(index)) - }; - } - } -} |
