diff options
Diffstat (limited to 'Ryujinx.Graphics.Nvdec.Vp9/Idct.cs')
| -rw-r--r-- | Ryujinx.Graphics.Nvdec.Vp9/Idct.cs | 536 |
1 files changed, 536 insertions, 0 deletions
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Idct.cs b/Ryujinx.Graphics.Nvdec.Vp9/Idct.cs new file mode 100644 index 00000000..9fa5842a --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Idct.cs @@ -0,0 +1,536 @@ +using Ryujinx.Graphics.Nvdec.Vp9.Common; +using Ryujinx.Graphics.Nvdec.Vp9.Types; +using System; +using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.InvTxfm; + +namespace Ryujinx.Graphics.Nvdec.Vp9 +{ + internal static class Idct + { + private delegate void Transform1D(ReadOnlySpan<int> input, Span<int> output); + private delegate void HighbdTransform1D(ReadOnlySpan<int> input, Span<int> output, int bd); + + private struct Transform2D + { + public Transform1D Cols, Rows; // Vertical and horizontal + + public Transform2D(Transform1D cols, Transform1D rows) + { + Cols = cols; + Rows = rows; + } + } + + private struct HighbdTransform2D + { + public HighbdTransform1D Cols, Rows; // Vertical and horizontal + + public HighbdTransform2D(HighbdTransform1D cols, HighbdTransform1D rows) + { + Cols = cols; + Rows = rows; + } + } + + private static readonly Transform2D[] Iht4 = new Transform2D[] + { + new Transform2D(Idct4, Idct4), // DCT_DCT = 0 + new Transform2D(Iadst4, Idct4), // ADST_DCT = 1 + new Transform2D(Idct4, Iadst4), // DCT_ADST = 2 + new Transform2D(Iadst4, Iadst4) // ADST_ADST = 3 + }; + + public static void Iht4x416Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int txType) + { + int i, j; + Span<int> output = stackalloc int[4 * 4]; + Span<int> outptr = output; + Span<int> tempIn = stackalloc int[4]; + Span<int> tempOut = stackalloc int[4]; + + // Inverse transform row vectors + for (i = 0; i < 4; ++i) + { + Iht4[txType].Rows(input, outptr); + input = input.Slice(4); + outptr = outptr.Slice(4); + } + + // Inverse transform column vectors + for (i = 0; i < 4; ++i) + { + for (j = 0; j < 4; ++j) + { + tempIn[j] = output[j * 4 + i]; + } + + Iht4[txType].Cols(tempIn, tempOut); + for (j = 0; j < 4; ++j) + { + dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 4)); + } + } + } + + private static readonly Transform2D[] Iht8 = new Transform2D[] + { + new Transform2D(Idct8, Idct8), // DCT_DCT = 0 + new Transform2D(Iadst8, Idct8), // ADST_DCT = 1 + new Transform2D(Idct8, Iadst8), // DCT_ADST = 2 + new Transform2D(Iadst8, Iadst8) // ADST_ADST = 3 + }; + + public static void Iht8x864Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int txType) + { + int i, j; + Span<int> output = stackalloc int[8 * 8]; + Span<int> outptr = output; + Span<int> tempIn = stackalloc int[8]; + Span<int> tempOut = stackalloc int[8]; + Transform2D ht = Iht8[txType]; + + // Inverse transform row vectors + for (i = 0; i < 8; ++i) + { + ht.Rows(input, outptr); + input = input.Slice(8); + outptr = outptr.Slice(8); + } + + // Inverse transform column vectors + for (i = 0; i < 8; ++i) + { + for (j = 0; j < 8; ++j) + { + tempIn[j] = output[j * 8 + i]; + } + + ht.Cols(tempIn, tempOut); + for (j = 0; j < 8; ++j) + { + dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 5)); + } + } + } + + private static readonly Transform2D[] Iht16 = new Transform2D[] + { + new Transform2D(Idct16, Idct16), // DCT_DCT = 0 + new Transform2D(Iadst16, Idct16), // ADST_DCT = 1 + new Transform2D(Idct16, Iadst16), // DCT_ADST = 2 + new Transform2D(Iadst16, Iadst16) // ADST_ADST = 3 + }; + + public static void Iht16x16256Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int txType) + { + int i, j; + Span<int> output = stackalloc int[16 * 16]; + Span<int> outptr = output; + Span<int> tempIn = stackalloc int[16]; + Span<int> tempOut = stackalloc int[16]; + Transform2D ht = Iht16[txType]; + + // Rows + for (i = 0; i < 16; ++i) + { + ht.Rows(input, outptr); + input = input.Slice(16); + outptr = outptr.Slice(16); + } + + // Columns + for (i = 0; i < 16; ++i) + { + for (j = 0; j < 16; ++j) + { + tempIn[j] = output[j * 16 + i]; + } + + ht.Cols(tempIn, tempOut); + for (j = 0; j < 16; ++j) + { + dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 6)); + } + } + } + + // Idct + public static void Idct4x4Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob) + { + if (eob > 1) + { + Idct4x416Add(input, dest, stride); + } + else + { + Idct4x41Add(input, dest, stride); + } + } + + public static void Iwht4x4Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob) + { + if (eob > 1) + { + Iwht4x416Add(input, dest, stride); + } + else + { + Iwht4x41Add(input, dest, stride); + } + } + + public static void Idct8x8Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob) + { + // If dc is 1, then input[0] is the reconstructed value, do not need + // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1. + + // The calculation can be simplified if there are not many non-zero dct + // coefficients. Use eobs to decide what to do. + if (eob == 1) + { + // DC only DCT coefficient + Idct8x81Add(input, dest, stride); + } + else if (eob <= 12) + { + Idct8x812Add(input, dest, stride); + } + else + { + Idct8x864Add(input, dest, stride); + } + } + + public static void Idct16x16Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob) + { + /* The calculation can be simplified if there are not many non-zero dct + * coefficients. Use eobs to separate different cases. */ + if (eob == 1) /* DC only DCT coefficient. */ + { + Idct16x161Add(input, dest, stride); + } + else if (eob <= 10) + { + Idct16x1610Add(input, dest, stride); + } + else if (eob <= 38) + { + Idct16x1638Add(input, dest, stride); + } + else + { + Idct16x16256Add(input, dest, stride); + } + } + + public static void Idct32x32Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob) + { + if (eob == 1) + { + Idct32x321Add(input, dest, stride); + } + else if (eob <= 34) + { + // Non-zero coeff only in upper-left 8x8 + Idct32x3234Add(input, dest, stride); + } + else if (eob <= 135) + { + // Non-zero coeff only in upper-left 16x16 + Idct32x32135Add(input, dest, stride); + } + else + { + Idct32x321024Add(input, dest, stride); + } + } + + // Iht + public static void Iht4x4Add(TxType txType, ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob) + { + if (txType == TxType.DctDct) + { + Idct4x4Add(input, dest, stride, eob); + } + else + { + Iht4x416Add(input, dest, stride, (int)txType); + } + } + + public static void Iht8x8Add(TxType txType, ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob) + { + if (txType == TxType.DctDct) + { + Idct8x8Add(input, dest, stride, eob); + } + else + { + Iht8x864Add(input, dest, stride, (int)txType); + } + } + + public static void Iht16x16Add(TxType txType, ReadOnlySpan<int> input, Span<byte> dest, + int stride, int eob) + { + if (txType == TxType.DctDct) + { + Idct16x16Add(input, dest, stride, eob); + } + else + { + Iht16x16256Add(input, dest, stride, (int)txType); + } + } + + private static readonly HighbdTransform2D[] HighbdIht4 = new HighbdTransform2D[] + { + new HighbdTransform2D(HighbdIdct4, HighbdIdct4), // DCT_DCT = 0 + new HighbdTransform2D(HighbdIadst4, HighbdIdct4), // ADST_DCT = 1 + new HighbdTransform2D(HighbdIdct4, HighbdIadst4), // DCT_ADST = 2 + new HighbdTransform2D(HighbdIadst4, HighbdIadst4) // ADST_ADST = 3 + }; + + public static void HighbdIht4x416Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int txType, int bd) + { + int i, j; + Span<int> output = stackalloc int[4 * 4]; + Span<int> outptr = output; + Span<int> tempIn = stackalloc int[4]; + Span<int> tempOut = stackalloc int[4]; + + // Inverse transform row vectors. + for (i = 0; i < 4; ++i) + { + HighbdIht4[txType].Rows(input, outptr, bd); + input = input.Slice(4); + outptr = outptr.Slice(4); + } + + // Inverse transform column vectors. + for (i = 0; i < 4; ++i) + { + for (j = 0; j < 4; ++j) + { + tempIn[j] = output[j * 4 + i]; + } + + HighbdIht4[txType].Cols(tempIn, tempOut, bd); + for (j = 0; j < 4; ++j) + { + dest[j * stride + i] = HighbdClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 4), bd); + } + } + } + + private static readonly HighbdTransform2D[] HighIht8 = new HighbdTransform2D[] + { + new HighbdTransform2D(HighbdIdct8, HighbdIdct8), // DCT_DCT = 0 + new HighbdTransform2D(HighbdIadst8, HighbdIdct8), // ADST_DCT = 1 + new HighbdTransform2D(HighbdIdct8, HighbdIadst8), // DCT_ADST = 2 + new HighbdTransform2D(HighbdIadst8, HighbdIadst8) // ADST_ADST = 3 + }; + + public static void HighbdIht8x864Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int txType, int bd) + { + int i, j; + Span<int> output = stackalloc int[8 * 8]; + Span<int> outptr = output; + Span<int> tempIn = stackalloc int[8]; + Span<int> tempOut = stackalloc int[8]; + HighbdTransform2D ht = HighIht8[txType]; + + // Inverse transform row vectors. + for (i = 0; i < 8; ++i) + { + ht.Rows(input, outptr, bd); + input = input.Slice(8); + outptr = output.Slice(8); + } + + // Inverse transform column vectors. + for (i = 0; i < 8; ++i) + { + for (j = 0; j < 8; ++j) + { + tempIn[j] = output[j * 8 + i]; + } + + ht.Cols(tempIn, tempOut, bd); + for (j = 0; j < 8; ++j) + { + dest[j * stride + i] = HighbdClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 5), bd); + } + } + } + + private static readonly HighbdTransform2D[] HighIht16 = new HighbdTransform2D[] + { + new HighbdTransform2D(HighbdIdct16, HighbdIdct16), // DCT_DCT = 0 + new HighbdTransform2D(HighbdIadst16, HighbdIdct16), // ADST_DCT = 1 + new HighbdTransform2D(HighbdIdct16, HighbdIadst16), // DCT_ADST = 2 + new HighbdTransform2D(HighbdIadst16, HighbdIadst16) // ADST_ADST = 3 + }; + + public static void HighbdIht16x16256Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int txType, int bd) + { + int i, j; + Span<int> output = stackalloc int[16 * 16]; + Span<int> outptr = output; + Span<int> tempIn = stackalloc int[16]; + Span<int> tempOut = stackalloc int[16]; + HighbdTransform2D ht = HighIht16[txType]; + + // Rows + for (i = 0; i < 16; ++i) + { + ht.Rows(input, outptr, bd); + input = input.Slice(16); + outptr = output.Slice(16); + } + + // Columns + for (i = 0; i < 16; ++i) + { + for (j = 0; j < 16; ++j) + { + tempIn[j] = output[j * 16 + i]; + } + + ht.Cols(tempIn, tempOut, bd); + for (j = 0; j < 16; ++j) + { + dest[j * stride + i] = HighbdClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 6), bd); + } + } + } + + // Idct + public static void HighbdIdct4x4Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd) + { + if (eob > 1) + { + HighbdIdct4x416Add(input, dest, stride, bd); + } + else + { + HighbdIdct4x41Add(input, dest, stride, bd); + } + } + + public static void HighbdIwht4x4Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd) + { + if (eob > 1) + { + HighbdIwht4x416Add(input, dest, stride, bd); + } + else + { + HighbdIwht4x41Add(input, dest, stride, bd); + } + } + + public static void HighbdIdct8x8Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd) + { + // If dc is 1, then input[0] is the reconstructed value, do not need + // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1. + + // The calculation can be simplified if there are not many non-zero dct + // coefficients. Use eobs to decide what to do. + // DC only DCT coefficient + if (eob == 1) + { + vpx_Highbdidct8x8_1_add_c(input, dest, stride, bd); + } + else if (eob <= 12) + { + HighbdIdct8x812Add(input, dest, stride, bd); + } + else + { + HighbdIdct8x864Add(input, dest, stride, bd); + } + } + + public static void HighbdIdct16x16Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd) + { + // The calculation can be simplified if there are not many non-zero dct + // coefficients. Use eobs to separate different cases. + // DC only DCT coefficient. + if (eob == 1) + { + HighbdIdct16x161Add(input, dest, stride, bd); + } + else if (eob <= 10) + { + HighbdIdct16x1610Add(input, dest, stride, bd); + } + else if (eob <= 38) + { + HighbdIdct16x1638Add(input, dest, stride, bd); + } + else + { + HighbdIdct16x16256Add(input, dest, stride, bd); + } + } + + public static void HighbdIdct32x32Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd) + { + // Non-zero coeff only in upper-left 8x8 + if (eob == 1) + { + HighbdIdct32x321Add(input, dest, stride, bd); + } + else if (eob <= 34) + { + HighbdIdct32x3234Add(input, dest, stride, bd); + } + else if (eob <= 135) + { + HighbdIdct32x32135Add(input, dest, stride, bd); + } + else + { + HighbdIdct32x321024Add(input, dest, stride, bd); + } + } + + // Iht + public static void HighbdIht4x4Add(TxType txType, ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd) + { + if (txType == TxType.DctDct) + { + HighbdIdct4x4Add(input, dest, stride, eob, bd); + } + else + { + HighbdIht4x416Add(input, dest, stride, (int)txType, bd); + } + } + + public static void HighbdIht8x8Add(TxType txType, ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd) + { + if (txType == TxType.DctDct) + { + HighbdIdct8x8Add(input, dest, stride, eob, bd); + } + else + { + HighbdIht8x864Add(input, dest, stride, (int)txType, bd); + } + } + + public static void HighbdIht16x16Add(TxType txType, ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd) + { + if (txType == TxType.DctDct) + { + HighbdIdct16x16Add(input, dest, stride, eob, bd); + } + else + { + HighbdIht16x16256Add(input, dest, stride, (int)txType, bd); + } + } + } +} |
