aboutsummaryrefslogtreecommitdiff
path: root/Ryujinx.Graphics.Nvdec.Vp9/Detokenize.cs
diff options
context:
space:
mode:
Diffstat (limited to 'Ryujinx.Graphics.Nvdec.Vp9/Detokenize.cs')
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Detokenize.cs325
1 files changed, 325 insertions, 0 deletions
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Detokenize.cs b/Ryujinx.Graphics.Nvdec.Vp9/Detokenize.cs
new file mode 100644
index 00000000..7ede6d34
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Detokenize.cs
@@ -0,0 +1,325 @@
+using Ryujinx.Common.Memory;
+using Ryujinx.Graphics.Nvdec.Vp9.Dsp;
+using Ryujinx.Graphics.Nvdec.Vp9.Types;
+using Ryujinx.Graphics.Video;
+using System;
+using System.Diagnostics;
+using System.Runtime.InteropServices;
+using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.InvTxfm;
+
+namespace Ryujinx.Graphics.Nvdec.Vp9
+{
+ internal static class Detokenize
+ {
+ private const int EobContextNode = 0;
+ private const int ZeroContextNode = 1;
+ private const int OneContextNode = 2;
+
+ private static int GetCoefContext(ReadOnlySpan<short> neighbors, ReadOnlySpan<byte> tokenCache, int c)
+ {
+ const int maxNeighbors = 2;
+
+ return (1 + tokenCache[neighbors[maxNeighbors * c + 0]] + tokenCache[neighbors[maxNeighbors * c + 1]]) >> 1;
+ }
+
+ private static int ReadCoeff(
+ ref Reader r,
+ ReadOnlySpan<byte> probs,
+ int n,
+ ref ulong value,
+ ref int count,
+ ref uint range)
+ {
+ int i, val = 0;
+ for (i = 0; i < n; ++i)
+ {
+ val = (val << 1) | r.ReadBool(probs[i], ref value, ref count, ref range);
+ }
+
+ return val;
+ }
+
+ private static int DecodeCoefs(
+ ref MacroBlockD xd,
+ PlaneType type,
+ Span<int> dqcoeff,
+ TxSize txSize,
+ ref Array2<short> dq,
+ int ctx,
+ ReadOnlySpan<short> scan,
+ ReadOnlySpan<short> nb,
+ ref Reader r)
+ {
+ ref Vp9BackwardUpdates counts = ref xd.Counts.Value;
+ int maxEob = 16 << ((int)txSize << 1);
+ ref Vp9EntropyProbs fc = ref xd.Fc.Value;
+ int refr = xd.Mi[0].Value.IsInterBlock() ? 1 : 0;
+ int band, c = 0;
+ ref Array6<Array6<Array3<byte>>> coefProbs = ref fc.CoefProbs[(int)txSize][(int)type][refr];
+ Span<byte> tokenCache = stackalloc byte[32 * 32];
+ ReadOnlySpan<byte> bandTranslate = Luts.get_band_translate(txSize);
+ int dqShift = (txSize == TxSize.Tx32x32) ? 1 : 0;
+ int v;
+ short dqv = dq[0];
+ ReadOnlySpan<byte> cat6Prob = (xd.Bd == 12)
+ ? Luts.Vp9Cat6ProbHigh12
+ : (xd.Bd == 10) ? new ReadOnlySpan<byte>(Luts.Vp9Cat6ProbHigh12).Slice(2) : Luts.Vp9Cat6Prob;
+ int cat6Bits = (xd.Bd == 12) ? 18 : (xd.Bd == 10) ? 16 : 14;
+ // Keep value, range, and count as locals. The compiler produces better
+ // results with the locals than using r directly.
+ ulong value = r.Value;
+ uint range = r.Range;
+ int count = r.Count;
+
+ while (c < maxEob)
+ {
+ int val = -1;
+ band = bandTranslate[0];
+ bandTranslate = bandTranslate.Slice(1);
+ ref Array3<byte> prob = ref coefProbs[band][ctx];
+ if (!xd.Counts.IsNull)
+ {
+ ++counts.EobBranch[(int)txSize][(int)type][refr][band][ctx];
+ }
+
+ if (r.ReadBool(prob[EobContextNode], ref value, ref count, ref range) == 0)
+ {
+ if (!xd.Counts.IsNull)
+ {
+ ++counts.Coef[(int)txSize][(int)type][refr][band][ctx][Constants.EobModelToken];
+ }
+
+ break;
+ }
+
+ while (r.ReadBool(prob[ZeroContextNode], ref value, ref count, ref range) == 0)
+ {
+ if (!xd.Counts.IsNull)
+ {
+ ++counts.Coef[(int)txSize][(int)type][refr][band][ctx][Constants.ZeroToken];
+ }
+
+ dqv = dq[1];
+ tokenCache[scan[c]] = 0;
+ ++c;
+ if (c >= maxEob)
+ {
+ r.Value = value;
+ r.Range = range;
+ r.Count = count;
+ return c; // Zero tokens at the end (no eob token)
+ }
+ ctx = GetCoefContext(nb, tokenCache, c);
+ band = bandTranslate[0];
+ bandTranslate = bandTranslate.Slice(1);
+ prob = ref coefProbs[band][ctx];
+ }
+
+ if (r.ReadBool(prob[OneContextNode], ref value, ref count, ref range) != 0)
+ {
+ ReadOnlySpan<byte> p = Luts.Vp9Pareto8Full[prob[Constants.PivotNode] - 1];
+ if (!xd.Counts.IsNull)
+ {
+ ++counts.Coef[(int)txSize][(int)type][refr][band][ctx][Constants.TwoToken];
+ }
+
+ if (r.ReadBool(p[0], ref value, ref count, ref range) != 0)
+ {
+ if (r.ReadBool(p[3], ref value, ref count, ref range) != 0)
+ {
+ tokenCache[scan[c]] = 5;
+ if (r.ReadBool(p[5], ref value, ref count, ref range) != 0)
+ {
+ if (r.ReadBool(p[7], ref value, ref count, ref range) != 0)
+ {
+ val = Constants.Cat6MinVal + ReadCoeff(ref r, cat6Prob, cat6Bits, ref value, ref count, ref range);
+ }
+ else
+ {
+ val = Constants.Cat5MinVal + ReadCoeff(ref r, Luts.Vp9Cat5Prob, 5, ref value, ref count, ref range);
+ }
+ }
+ else if (r.ReadBool(p[6], ref value, ref count, ref range) != 0)
+ {
+ val = Constants.Cat4MinVal + ReadCoeff(ref r, Luts.Vp9Cat4Prob, 4, ref value, ref count, ref range);
+ }
+ else
+ {
+ val = Constants.Cat3MinVal + ReadCoeff(ref r, Luts.Vp9Cat3Prob, 3, ref value, ref count, ref range);
+ }
+ }
+ else
+ {
+ tokenCache[scan[c]] = 4;
+ if (r.ReadBool(p[4], ref value, ref count, ref range) != 0)
+ {
+ val = Constants.Cat2MinVal + ReadCoeff(ref r, Luts.Vp9Cat2Prob, 2, ref value, ref count, ref range);
+ }
+ else
+ {
+ val = Constants.Cat1MinVal + ReadCoeff(ref r, Luts.Vp9Cat1Prob, 1, ref value, ref count, ref range);
+ }
+ }
+ // Val may use 18-bits
+ v = (int)(((long)val * dqv) >> dqShift);
+ }
+ else
+ {
+ if (r.ReadBool(p[1], ref value, ref count, ref range) != 0)
+ {
+ tokenCache[scan[c]] = 3;
+ v = ((3 + r.ReadBool(p[2], ref value, ref count, ref range)) * dqv) >> dqShift;
+ }
+ else
+ {
+ tokenCache[scan[c]] = 2;
+ v = (2 * dqv) >> dqShift;
+ }
+ }
+ }
+ else
+ {
+ if (!xd.Counts.IsNull)
+ {
+ ++counts.Coef[(int)txSize][(int)type][refr][band][ctx][Constants.OneToken];
+ }
+
+ tokenCache[scan[c]] = 1;
+ v = dqv >> dqShift;
+ }
+ dqcoeff[scan[c]] = (int)HighbdCheckRange(r.ReadBool(128, ref value, ref count, ref range) != 0 ? -v : v, xd.Bd);
+ ++c;
+ ctx = GetCoefContext(nb, tokenCache, c);
+ dqv = dq[1];
+ }
+
+ r.Value = value;
+ r.Range = range;
+ r.Count = count;
+ return c;
+ }
+
+ private static void GetCtxShift(ref MacroBlockD xd, ref int ctxShiftA, ref int ctxShiftL, int x, int y, uint txSizeInBlocks)
+ {
+ if (xd.MaxBlocksWide != 0)
+ {
+ if (txSizeInBlocks + x > xd.MaxBlocksWide)
+ {
+ ctxShiftA = (int)(txSizeInBlocks - (xd.MaxBlocksWide - x)) * 8;
+ }
+ }
+ if (xd.MaxBlocksHigh != 0)
+ {
+ if (txSizeInBlocks + y > xd.MaxBlocksHigh)
+ {
+ ctxShiftL = (int)(txSizeInBlocks - (xd.MaxBlocksHigh - y)) * 8;
+ }
+ }
+ }
+
+ private static PlaneType GetPlaneType(int plane)
+ {
+ return (PlaneType)(plane > 0 ? 1 : 0);
+ }
+
+ public static int DecodeBlockTokens(
+ ref TileWorkerData twd,
+ int plane,
+ Luts.ScanOrder sc,
+ int x,
+ int y,
+ TxSize txSize,
+ int segId)
+ {
+ ref Reader r = ref twd.BitReader;
+ ref MacroBlockD xd = ref twd.Xd;
+ ref MacroBlockDPlane pd = ref xd.Plane[plane];
+ ref Array2<short> dequant = ref pd.SegDequant[segId];
+ int eob;
+ Span<sbyte> a = pd.AboveContext.ToSpan().Slice(x);
+ Span<sbyte> l = pd.LeftContext.ToSpan().Slice(y);
+ int ctx;
+ int ctxShiftA = 0;
+ int ctxShiftL = 0;
+
+ switch (txSize)
+ {
+ case TxSize.Tx4x4:
+ ctx = a[0] != 0 ? 1 : 0;
+ ctx += l[0] != 0 ? 1 : 0;
+ eob = DecodeCoefs(
+ ref xd,
+ GetPlaneType(plane),
+ pd.DqCoeff.ToSpan(),
+ txSize,
+ ref dequant,
+ ctx,
+ sc.Scan,
+ sc.Neighbors,
+ ref r);
+ a[0] = l[0] = (sbyte)(eob > 0 ? 1 : 0);
+ break;
+ case TxSize.Tx8x8:
+ GetCtxShift(ref xd, ref ctxShiftA, ref ctxShiftL, x, y, 1 << (int)TxSize.Tx8x8);
+ ctx = MemoryMarshal.Cast<sbyte, ushort>(a)[0] != 0 ? 1 : 0;
+ ctx += MemoryMarshal.Cast<sbyte, ushort>(l)[0] != 0 ? 1 : 0;
+ eob = DecodeCoefs(
+ ref xd,
+ GetPlaneType(plane),
+ pd.DqCoeff.ToSpan(),
+ txSize,
+ ref dequant,
+ ctx,
+ sc.Scan,
+ sc.Neighbors,
+ ref r);
+ MemoryMarshal.Cast<sbyte, ushort>(a)[0] = (ushort)((eob > 0 ? 0x0101 : 0) >> ctxShiftA);
+ MemoryMarshal.Cast<sbyte, ushort>(l)[0] = (ushort)((eob > 0 ? 0x0101 : 0) >> ctxShiftL);
+ break;
+ case TxSize.Tx16x16:
+ GetCtxShift(ref xd, ref ctxShiftA, ref ctxShiftL, x, y, 1 << (int)TxSize.Tx16x16);
+ ctx = MemoryMarshal.Cast<sbyte, uint>(a)[0] != 0 ? 1 : 0;
+ ctx += MemoryMarshal.Cast<sbyte, uint>(l)[0] != 0 ? 1 : 0;
+ eob = DecodeCoefs(
+ ref xd,
+ GetPlaneType(plane),
+ pd.DqCoeff.ToSpan(),
+ txSize,
+ ref dequant,
+ ctx,
+ sc.Scan,
+ sc.Neighbors,
+ ref r);
+ MemoryMarshal.Cast<sbyte, uint>(a)[0] = (uint)((eob > 0 ? 0x01010101 : 0) >> ctxShiftA);
+ MemoryMarshal.Cast<sbyte, uint>(l)[0] = (uint)((eob > 0 ? 0x01010101 : 0) >> ctxShiftL);
+ break;
+ case TxSize.Tx32x32:
+ GetCtxShift(ref xd, ref ctxShiftA, ref ctxShiftL, x, y, 1 << (int)TxSize.Tx32x32);
+ // NOTE: Casting to ulong here is safe because the default memory
+ // alignment is at least 8 bytes and the Tx32x32 is aligned on 8 byte
+ // boundaries.
+ ctx = MemoryMarshal.Cast<sbyte, ulong>(a)[0] != 0 ? 1 : 0;
+ ctx += MemoryMarshal.Cast<sbyte, ulong>(l)[0] != 0 ? 1 : 0;
+ eob = DecodeCoefs(
+ ref xd,
+ GetPlaneType(plane),
+ pd.DqCoeff.ToSpan(),
+ txSize,
+ ref dequant,
+ ctx,
+ sc.Scan,
+ sc.Neighbors,
+ ref r);
+ MemoryMarshal.Cast<sbyte, ulong>(a)[0] = (eob > 0 ? 0x0101010101010101UL : 0) >> ctxShiftA;
+ MemoryMarshal.Cast<sbyte, ulong>(l)[0] = (eob > 0 ? 0x0101010101010101UL : 0) >> ctxShiftL;
+ break;
+ default:
+ Debug.Assert(false, "Invalid transform size.");
+ eob = 0;
+ break;
+ }
+
+ return eob;
+ }
+ }
+}