diff options
| author | gdkchan <gab.dark.100@gmail.com> | 2020-07-12 00:07:01 -0300 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-07-12 05:07:01 +0200 |
| commit | 4d02a2d2c0451b4de1f6de3bbce54c457cacebe2 (patch) | |
| tree | 120fe4fb8cfa1ac1c6ef4c97d92be47e955e8c0f /Ryujinx.Graphics.Nvdec.Vp9/Types | |
| parent | 38b26cf4242999fa7d8c550993ac0940cd03d55f (diff) | |
New NVDEC and VIC implementation (#1384)
* Initial NVDEC and VIC implementation
* Update FFmpeg.AutoGen to 4.3.0
* Add nvdec dependencies for Windows
* Unify some VP9 structures
* Rename VP9 structure fields
* Improvements to Video API
* XML docs for Common.Memory
* Remove now unused or redundant overloads from MemoryAccessor
* NVDEC UV surface read/write scalar paths
* Add FIXME comments about hacky things/stuff that will need to be fixed in the future
* Cleaned up VP9 memory allocation
* Remove some debug logs
* Rename some VP9 structs
* Remove unused struct
* No need to compile Ryujinx.Graphics.Host1x with unsafe anymore
* Name AsyncWorkQueue threads to make debugging easier
* Make Vp9PictureInfo a ref struct
* LayoutConverter no longer needs the depth argument (broken by rebase)
* Pooling of VP9 buffers, plus fix a memory leak on VP9
* Really wish VS could rename projects properly...
* Address feedback
* Remove using
* Catch OperationCanceledException
* Add licensing informations
* Add THIRDPARTY.md to release too
Co-authored-by: Thog <me@thog.eu>
Diffstat (limited to 'Ryujinx.Graphics.Nvdec.Vp9/Types')
32 files changed, 1777 insertions, 0 deletions
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/BModeInfo.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/BModeInfo.cs new file mode 100644 index 00000000..9e1cd8b4 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/BModeInfo.cs @@ -0,0 +1,10 @@ +using Ryujinx.Common.Memory; + +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal struct BModeInfo + { + public PredictionMode Mode; + public Array2<Mv> Mv; // First, second inter predictor motion vectors + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/BlockSize.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/BlockSize.cs new file mode 100644 index 00000000..22a48e20 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/BlockSize.cs @@ -0,0 +1,21 @@ +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal enum BlockSize + { + Block4x4 = 0, + Block4x8 = 1, + Block8x4 = 2, + Block8x8 = 3, + Block8x16 = 4, + Block16x8 = 5, + Block16x16 = 6, + Block16x32 = 7, + Block32x16 = 8, + Block32x32 = 9, + Block32x64 = 10, + Block64x32 = 11, + Block64x64 = 12, + BlockSizes = 13, + BlockInvalid = BlockSizes + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/Buf2D.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/Buf2D.cs new file mode 100644 index 00000000..180d5e34 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/Buf2D.cs @@ -0,0 +1,10 @@ +using Ryujinx.Common.Memory; + +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal struct Buf2D + { + public ArrayPtr<byte> Buf; + public int Stride; + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/FrameType.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/FrameType.cs new file mode 100644 index 00000000..a783999e --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/FrameType.cs @@ -0,0 +1,8 @@ +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal enum FrameType + { + KeyFrame = 0, + InterFrame = 1 + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilter.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilter.cs new file mode 100644 index 00000000..8dc33bda --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilter.cs @@ -0,0 +1,27 @@ +using Ryujinx.Common.Memory; + +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal struct LoopFilter + { + public int FilterLevel; + public int LastFiltLevel; + + public int SharpnessLevel; + public int LastSharpnessLevel; + + public bool ModeRefDeltaEnabled; + public bool ModeRefDeltaUpdate; + + // 0 = Intra, Last, GF, ARF + public Array4<sbyte> RefDeltas; + public Array4<sbyte> LastRefDeltas; + + // 0 = ZERO_MV, MV + public Array2<sbyte> ModeDeltas; + public Array2<sbyte> LastModeDeltas; + + public ArrayPtr<LoopFilterMask> Lfm; + public int LfmStride; + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilterInfoN.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilterInfoN.cs new file mode 100644 index 00000000..0ac38a7b --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilterInfoN.cs @@ -0,0 +1,10 @@ +using Ryujinx.Common.Memory; + +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal struct LoopFilterInfoN + { + public Array64<LoopFilterThresh> Lfthr; + public Array8<Array4<Array2<byte>>> Lvl; + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilterMask.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilterMask.cs new file mode 100644 index 00000000..4aff843a --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilterMask.cs @@ -0,0 +1,24 @@ +using Ryujinx.Common.Memory; + +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + // This structure holds bit masks for all 8x8 blocks in a 64x64 region. + // Each 1 bit represents a position in which we want to apply the loop filter. + // Left_ entries refer to whether we apply a filter on the border to the + // left of the block. Above_ entries refer to whether or not to apply a + // filter on the above border. Int_ entries refer to whether or not to + // apply borders on the 4x4 edges within the 8x8 block that each bit + // represents. + // Since each transform is accompanied by a potentially different type of + // loop filter there is a different entry in the array for each transform size. + internal struct LoopFilterMask + { + public Array4<ulong> LeftY; + public Array4<ulong> AboveY; + public ulong Int4x4Y; + public Array4<ushort> LeftUv; + public Array4<ushort> AboveUv; + public ushort Int4x4Uv; + public Array64<byte> LflY; + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilterThresh.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilterThresh.cs new file mode 100644 index 00000000..bea1d115 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilterThresh.cs @@ -0,0 +1,13 @@ +using Ryujinx.Common.Memory; + +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + // Need to align this structure so when it is declared and + // passed it can be loaded into vector registers. + internal struct LoopFilterThresh + { + public Array16<byte> Mblim; + public Array16<byte> Lim; + public Array16<byte> HevThr; + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/MacroBlockD.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/MacroBlockD.cs new file mode 100644 index 00000000..f1111528 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/MacroBlockD.cs @@ -0,0 +1,179 @@ +using Ryujinx.Common.Memory; +using Ryujinx.Graphics.Video; + +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal struct MacroBlockD + { + public Array3<MacroBlockDPlane> Plane; + public byte BmodeBlocksWl; + public byte BmodeBlocksHl; + + public Ptr<Vp9BackwardUpdates> Counts; + public TileInfo Tile; + + public int MiStride; + + // Grid of 8x8 cells is placed over the block. + // If some of them belong to the same mbtree-block + // they will just have same mi[i][j] value + public ArrayPtr<Ptr<ModeInfo>> Mi; + public Ptr<ModeInfo> LeftMi; + public Ptr<ModeInfo> AboveMi; + + public uint MaxBlocksWide; + public uint MaxBlocksHigh; + + public ArrayPtr<Array3<byte>> PartitionProbs; + + /* Distance of MB away from frame edges */ + public int MbToLeftEdge; + public int MbToRightEdge; + public int MbToTopEdge; + public int MbToBottomEdge; + + public Ptr<Vp9EntropyProbs> Fc; + + /* pointers to reference frames */ + public Array2<Ptr<RefBuffer>> BlockRefs; + + /* pointer to current frame */ + public Surface CurBuf; + + public Array3<ArrayPtr<sbyte>> AboveContext; + public Array3<Array16<sbyte>> LeftContext; + + public ArrayPtr<sbyte> AboveSegContext; + public Array8<sbyte> LeftSegContext; + + /* Bit depth: 8, 10, 12 */ + public int Bd; + + public bool Lossless; + public bool Corrupted; + + public Ptr<InternalErrorInfo> ErrorInfo; + + public int GetPredContextSegId() + { + sbyte aboveSip = !AboveMi.IsNull ? AboveMi.Value.SegIdPredicted : (sbyte)0; + sbyte leftSip = !LeftMi.IsNull ? LeftMi.Value.SegIdPredicted : (sbyte)0; + + return aboveSip + leftSip; + } + + public int GetSkipContext() + { + int aboveSkip = !AboveMi.IsNull ? AboveMi.Value.Skip : 0; + int leftSkip = !LeftMi.IsNull ? LeftMi.Value.Skip : 0; + return aboveSkip + leftSkip; + } + + public int GetPredContextSwitchableInterp() + { + // Note: + // The mode info data structure has a one element border above and to the + // left of the entries corresponding to real macroblocks. + // The prediction flags in these dummy entries are initialized to 0. + int leftType = !LeftMi.IsNull ? LeftMi.Value.InterpFilter : Constants.SwitchableFilters; + int aboveType = !AboveMi.IsNull ? AboveMi.Value.InterpFilter : Constants.SwitchableFilters; + + if (leftType == aboveType) + { + return leftType; + } + else if (leftType == Constants.SwitchableFilters) + { + return aboveType; + } + else if (aboveType == Constants.SwitchableFilters) + { + return leftType; + } + else + { + return Constants.SwitchableFilters; + } + } + + // The mode info data structure has a one element border above and to the + // left of the entries corresponding to real macroblocks. + // The prediction flags in these dummy entries are initialized to 0. + // 0 - inter/inter, inter/--, --/inter, --/-- + // 1 - intra/inter, inter/intra + // 2 - intra/--, --/intra + // 3 - intra/intra + public int GetIntraInterContext() + { + if (!AboveMi.IsNull && !LeftMi.IsNull) + { // Both edges available + bool aboveIntra = !AboveMi.Value.IsInterBlock(); + bool leftIntra = !LeftMi.Value.IsInterBlock(); + return leftIntra && aboveIntra ? 3 : (leftIntra || aboveIntra ? 1 : 0); + } + else if (!AboveMi.IsNull || !LeftMi.IsNull) + { // One edge available + return 2 * (!(!AboveMi.IsNull ? AboveMi.Value : LeftMi.Value).IsInterBlock() ? 1 : 0); + } + return 0; + } + + // Returns a context number for the given MB prediction signal + // The mode info data structure has a one element border above and to the + // left of the entries corresponding to real blocks. + // The prediction flags in these dummy entries are initialized to 0. + public int GetTxSizeContext() + { + int maxTxSize = (int)Luts.MaxTxSizeLookup[(int)Mi[0].Value.SbType]; + int aboveCtx = (!AboveMi.IsNull && AboveMi.Value.Skip == 0) ? (int)AboveMi.Value.TxSize : maxTxSize; + int leftCtx = (!LeftMi.IsNull && LeftMi.Value.Skip == 0) ? (int)LeftMi.Value.TxSize : maxTxSize; + if (LeftMi.IsNull) + { + leftCtx = aboveCtx; + } + + if (AboveMi.IsNull) + { + aboveCtx = leftCtx; + } + + return (aboveCtx + leftCtx) > maxTxSize ? 1 : 0; + } + + public void SetupBlockPlanes(int ssX, int ssY) + { + int i; + + for (i = 0; i < Constants.MaxMbPlane; i++) + { + Plane[i].SubsamplingX = i != 0 ? ssX : 0; + Plane[i].SubsamplingY = i != 0 ? ssY : 0; + } + } + + public void SetSkipContext(int miRow, int miCol) + { + int aboveIdx = miCol * 2; + int leftIdx = (miRow * 2) & 15; + int i; + for (i = 0; i < Constants.MaxMbPlane; ++i) + { + ref MacroBlockDPlane pd = ref Plane[i]; + pd.AboveContext = AboveContext[i].Slice(aboveIdx >> pd.SubsamplingX); + pd.LeftContext = new ArrayPtr<sbyte>(ref LeftContext[i][leftIdx >> pd.SubsamplingY], 16 - (leftIdx >> pd.SubsamplingY)); + } + } + + internal void SetMiRowCol(ref TileInfo tile, int miRow, int bh, int miCol, int bw, int miRows, int miCols) + { + MbToTopEdge = -((miRow * Constants.MiSize) * 8); + MbToBottomEdge = ((miRows - bh - miRow) * Constants.MiSize) * 8; + MbToLeftEdge = -((miCol * Constants.MiSize) * 8); + MbToRightEdge = ((miCols - bw - miCol) * Constants.MiSize) * 8; + + // Are edges available for intra prediction? + AboveMi = (miRow != 0) ? Mi[-MiStride] : Ptr<ModeInfo>.Null; + LeftMi = (miCol > tile.MiColStart) ? Mi[-1] : Ptr<ModeInfo>.Null; + } + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/MacroBlockDPlane.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/MacroBlockDPlane.cs new file mode 100644 index 00000000..ae4ec6f4 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/MacroBlockDPlane.cs @@ -0,0 +1,21 @@ +using Ryujinx.Common.Memory; + +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal struct MacroBlockDPlane + { + public ArrayPtr<int> DqCoeff; + public int SubsamplingX; + public int SubsamplingY; + public Buf2D Dst; + public Array2<Buf2D> Pre; + public ArrayPtr<sbyte> AboveContext; + public ArrayPtr<sbyte> LeftContext; + public Array8<Array2<short>> SegDequant; + + // Number of 4x4s in current block + public ushort N4W, N4H; + // Log2 of N4W, N4H + public byte N4Wl, N4Hl; + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/ModeInfo.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/ModeInfo.cs new file mode 100644 index 00000000..8ef281d8 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/ModeInfo.cs @@ -0,0 +1,66 @@ +using Ryujinx.Common.Memory; +using System.Diagnostics; + +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal struct ModeInfo + { + // Common for both Inter and Intra blocks + public BlockSize SbType; + public PredictionMode Mode; + public TxSize TxSize; + public sbyte Skip; + public sbyte SegmentId; + public sbyte SegIdPredicted; // Valid only when TemporalUpdate is enabled + + // Only for Intra blocks + public PredictionMode UvMode; + + // Only for Inter blocks + public byte InterpFilter; + + // if ref_frame[idx] is equal to AltRefFrame then + // MacroBlockD.BlockRef[idx] is an altref + public Array2<sbyte> RefFrame; + + public Array2<Mv> Mv; + + public Array4<BModeInfo> Bmi; + + public PredictionMode GetYMode(int block) + { + return SbType < BlockSize.Block8x8 ? Bmi[block].Mode : Mode; + } + + public TxSize GetUvTxSize(ref MacroBlockDPlane pd) + { + Debug.Assert(SbType < BlockSize.Block8x8 || + Luts.SsSizeLookup[(int)SbType][pd.SubsamplingX][pd.SubsamplingY] != BlockSize.BlockInvalid); + return Luts.UvTxsizeLookup[(int)SbType][(int)TxSize][pd.SubsamplingX][pd.SubsamplingY]; + } + + public bool IsInterBlock() + { + return RefFrame[0] > Constants.IntraFrame; + } + + public bool HasSecondRef() + { + return RefFrame[1] > Constants.IntraFrame; + } + + private static readonly int[][] IdxNColumnToSubblock = new int[][] + { + new int[] { 1, 2 }, new int[] { 1, 3 }, new int[] { 3, 2 }, new int[] { 3, 3 } + }; + + // This function returns either the appropriate sub block or block's mv + // on whether the block_size < 8x8 and we have check_sub_blocks set. + public Mv GetSubBlockMv(int whichMv, int searchCol, int blockIdx) + { + return blockIdx >= 0 && SbType < BlockSize.Block8x8 + ? Bmi[IdxNColumnToSubblock[blockIdx][searchCol == 0 ? 1 : 0]].Mv[whichMv] + : Mv[whichMv]; + } + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/MotionVectorContext.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/MotionVectorContext.cs new file mode 100644 index 00000000..319c8dba --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/MotionVectorContext.cs @@ -0,0 +1,14 @@ +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal enum MotionVectorContext + { + BothZero = 0, + ZeroPlusPredicted = 1, + BothPredicted = 2, + NewPlusNonIntra = 3, + BothNew = 4, + IntraPlusNonIntra = 5, + BothIntra = 6, + InvalidCase = 9 + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/Mv.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/Mv.cs new file mode 100644 index 00000000..c1f99ade --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/Mv.cs @@ -0,0 +1,189 @@ +using Ryujinx.Common.Memory; +using Ryujinx.Graphics.Video; +using System; +using System.Diagnostics; + +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal struct Mv + { + public short Row; + public short Col; + + private static readonly byte[] LogInBase2 = new byte[] + { + 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10 + }; + + public bool UseMvHp() + { + const int kMvRefThresh = 64; // Threshold for use of high-precision 1/8 mv + return Math.Abs(Row) < kMvRefThresh && Math.Abs(Col) < kMvRefThresh; + } + + public static bool MvJointVertical(MvJointType type) + { + return type == MvJointType.MvJointHzvnz || type == MvJointType.MvJointHnzvnz; + } + + public static bool MvJointHorizontal(MvJointType type) + { + return type == MvJointType.MvJointHnzvz || type == MvJointType.MvJointHnzvnz; + } + + private static int MvClassBase(MvClassType c) + { + return c != 0 ? Constants.Class0Size << ((int)c + 2) : 0; + } + + private static MvClassType GetMvClass(int z, Ptr<int> offset) + { + MvClassType c = (z >= Constants.Class0Size * 4096) ? MvClassType.MvClass10 : (MvClassType)LogInBase2[z >> 3]; + if (!offset.IsNull) + { + offset.Value = z - MvClassBase(c); + } + + return c; + } + + private static void IncMvComponent(int v, ref Vp9BackwardUpdates counts, int comp, int incr, int usehp) + { + int s, z, c, o = 0, d, e, f; + Debug.Assert(v != 0); /* Should not be zero */ + s = v < 0 ? 1 : 0; + counts.Sign[comp][s] += (uint)incr; + z = (s != 0 ? -v : v) - 1; /* Magnitude - 1 */ + + c = (int)GetMvClass(z, new Ptr<int>(ref o)); + counts.Classes[comp][c] += (uint)incr; + + d = (o >> 3); /* Int mv data */ + f = (o >> 1) & 3; /* Fractional pel mv data */ + e = (o & 1); /* High precision mv data */ + + if (c == (int)MvClassType.MvClass0) + { + counts.Class0[comp][d] += (uint)incr; + counts.Class0Fp[comp][d][f] += (uint)incr; + counts.Class0Hp[comp][e] += (uint)(usehp * incr); + } + else + { + int i; + int b = c + Constants.Class0Bits - 1; // Number of bits + for (i = 0; i < b; ++i) + { + counts.Bits[comp][i][((d >> i) & 1)] += (uint)incr; + } + + counts.Fp[comp][f] += (uint)incr; + counts.Hp[comp][e] += (uint)(usehp * incr); + } + } + + private MvJointType GetMvJoint() + { + if (Row == 0) + { + return Col == 0 ? MvJointType.MvJointZero : MvJointType.MvJointHnzvz; + } + else + { + return Col == 0 ? MvJointType.MvJointHzvnz : MvJointType.MvJointHnzvnz; + } + } + + internal void IncMv(Ptr<Vp9BackwardUpdates> counts) + { + if (!counts.IsNull) + { + MvJointType j = GetMvJoint(); + ++counts.Value.Joints[(int)j]; + + if (MvJointVertical(j)) + { + IncMvComponent(Row, ref counts.Value, 0, 1, 1); + } + + if (MvJointHorizontal(j)) + { + IncMvComponent(Col, ref counts.Value, 1, 1, 1); + } + } + } + + public void ClampMv(int minCol, int maxCol, int minRow, int maxRow) + { + Col = (short)Math.Clamp(Col, minCol, maxCol); + Row = (short)Math.Clamp(Row, minRow, maxRow); + } + + private const int MvBorder = (16 << 3); // Allow 16 pels in 1/8th pel units + + public void ClampMvRef(ref MacroBlockD xd) + { + ClampMv( + xd.MbToLeftEdge - MvBorder, + xd.MbToRightEdge + MvBorder, + xd.MbToTopEdge - MvBorder, + xd.MbToBottomEdge + MvBorder); + } + + public void LowerMvPrecision(bool allowHP) + { + bool useHP = allowHP && UseMvHp(); + if (!useHP) + { + if ((Row & 1) != 0) + { + Row += (short)(Row > 0 ? -1 : 1); + } + + if ((Col & 1) != 0) + { + Col += (short)(Col > 0 ? -1 : 1); + } + } + } + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/Mv32.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/Mv32.cs new file mode 100644 index 00000000..fb25d18e --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/Mv32.cs @@ -0,0 +1,8 @@ +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal struct Mv32 + { + public int Row; + public int Col; + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/MvClassType.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/MvClassType.cs new file mode 100644 index 00000000..68a0b59a --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/MvClassType.cs @@ -0,0 +1,17 @@ +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal enum MvClassType + { + MvClass0 = 0, /* (0, 2] integer pel */ + MvClass1 = 1, /* (2, 4] integer pel */ + MvClass2 = 2, /* (4, 8] integer pel */ + MvClass3 = 3, /* (8, 16] integer pel */ + MvClass4 = 4, /* (16, 32] integer pel */ + MvClass5 = 5, /* (32, 64] integer pel */ + MvClass6 = 6, /* (64, 128] integer pel */ + MvClass7 = 7, /* (128, 256] integer pel */ + MvClass8 = 8, /* (256, 512] integer pel */ + MvClass9 = 9, /* (512, 1024] integer pel */ + MvClass10 = 10, /* (1024,2048] integer pel */ + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/MvJointType.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/MvJointType.cs new file mode 100644 index 00000000..a20cb6d0 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/MvJointType.cs @@ -0,0 +1,10 @@ +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal enum MvJointType + { + MvJointZero = 0, /* Zero vector */ + MvJointHnzvz = 1, /* Vert zero, hor nonzero */ + MvJointHzvnz = 2, /* Hor zero, vert nonzero */ + MvJointHnzvnz = 3, /* Both components nonzero */ + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/MvRef.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/MvRef.cs new file mode 100644 index 00000000..71949a09 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/MvRef.cs @@ -0,0 +1,10 @@ +using Ryujinx.Common.Memory; + +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal struct MvRef + { + public Array2<Mv> Mv; + public Array2<sbyte> RefFrame; + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/PartitionType.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/PartitionType.cs new file mode 100644 index 00000000..096f9818 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/PartitionType.cs @@ -0,0 +1,12 @@ +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal enum PartitionType + { + PartitionNone, + PartitionHorz, + PartitionVert, + PartitionSplit, + PartitionTypes, + PartitionInvalid = PartitionTypes + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/PlaneType.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/PlaneType.cs new file mode 100644 index 00000000..790aa2a0 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/PlaneType.cs @@ -0,0 +1,9 @@ +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal enum PlaneType + { + Y = 0, + Uv = 1, + PlaneTypes + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/Position.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/Position.cs new file mode 100644 index 00000000..0d3b56f6 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/Position.cs @@ -0,0 +1,14 @@ +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal struct Position + { + public int Row; + public int Col; + + public Position(int row, int col) + { + Row = row; + Col = col; + } + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/PredictionMode.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/PredictionMode.cs new file mode 100644 index 00000000..bbb9be9a --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/PredictionMode.cs @@ -0,0 +1,21 @@ +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal enum PredictionMode + { + DcPred = 0, // Average of above and left pixels + VPred = 1, // Vertical + HPred = 2, // Horizontal + D45Pred = 3, // Directional 45 deg = round(arctan(1 / 1) * 180 / pi) + D135Pred = 4, // Directional 135 deg = 180 - 45 + D117Pred = 5, // Directional 117 deg = 180 - 63 + D153Pred = 6, // Directional 153 deg = 180 - 27 + D207Pred = 7, // Directional 207 deg = 180 + 27 + D63Pred = 8, // Directional 63 deg = round(arctan(2 / 1) * 180 / pi) + TmPred = 9, // True-motion + NearestMv = 10, + NearMv = 11, + ZeroMv = 12, + NewMv = 13, + MbModeCount = 14 + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/RefBuffer.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/RefBuffer.cs new file mode 100644 index 00000000..9942dd05 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/RefBuffer.cs @@ -0,0 +1,8 @@ +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal struct RefBuffer + { + public Surface Buf; + public ScaleFactors Sf; + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/ReferenceMode.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/ReferenceMode.cs new file mode 100644 index 00000000..7cbf9f4e --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/ReferenceMode.cs @@ -0,0 +1,10 @@ +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal enum ReferenceMode + { + SingleReference = 0, + CompoundReference = 1, + ReferenceModeSelect = 2, + ReferenceModes = 3 + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/ScaleFactors.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/ScaleFactors.cs new file mode 100644 index 00000000..970f9680 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/ScaleFactors.cs @@ -0,0 +1,451 @@ +using Ryujinx.Common.Memory; +using System.Runtime.CompilerServices; +using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.Convolve; +using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.Filter; + +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal struct ScaleFactors + { + private const int RefScaleShift = 14; + private const int RefNoScale = (1 << RefScaleShift); + private const int RefInvalidScale = -1; + + private unsafe delegate void ConvolveFn( + byte* src, + int srcStride, + byte* dst, + int dstStride, + Array8<short>[] filter, + int x0Q4, + int xStepQ4, + int y0Q4, + int yStepQ4, + int w, + int h); + + private unsafe delegate void HighbdConvolveFn( + ushort* src, + int srcStride, + ushort* dst, + int dstStride, + Array8<short>[] filter, + int x0Q4, + int xStepQ4, + int y0Q4, + int yStepQ4, + int w, + int h, + int bd); + + private static readonly unsafe ConvolveFn[][][] PredictX16Y16 = new ConvolveFn[][][] + { + new ConvolveFn[][] + { + new ConvolveFn[] + { + ConvolveCopy, + ConvolveAvg + }, + new ConvolveFn[] + { + Convolve8Vert, + Convolve8AvgVert + } + }, + new ConvolveFn[][] + { + new ConvolveFn[] + { + Convolve8Horiz, + Convolve8AvgHoriz + }, + new ConvolveFn[] + { + Convolve8, + Convolve8Avg + } + } + }; + + private static readonly unsafe ConvolveFn[][][] PredictX16 = new ConvolveFn[][][] + { + new ConvolveFn[][] + { + new ConvolveFn[] + { + ScaledVert, + ScaledAvgVert + }, + new ConvolveFn[] + { + ScaledVert, + ScaledAvgVert + } + }, + new ConvolveFn[][] + { + new ConvolveFn[] + { + Scaled2D, + ScaledAvg2D + }, + new ConvolveFn[] + { + Scaled2D, + ScaledAvg2D + } + } + }; + + private static readonly unsafe ConvolveFn[][][] PredictY16 = new ConvolveFn[][][] + { + new ConvolveFn[][] + { + new ConvolveFn[] + { + ScaledHoriz, + ScaledAvgHoriz + }, + new ConvolveFn[] + { + Scaled2D, + ScaledAvg2D + } + }, + new ConvolveFn[][] + { + new ConvolveFn[] + { + ScaledHoriz, + ScaledAvgHoriz + }, + new ConvolveFn[] + { + Scaled2D, + ScaledAvg2D + } + } + }; + + private static readonly unsafe ConvolveFn[][][] Predict = new ConvolveFn[][][] + { + new ConvolveFn[][] + { + new ConvolveFn[] + { + Scaled2D, + ScaledAvg2D + }, + new ConvolveFn[] + { + Scaled2D, + ScaledAvg2D + } + }, + new ConvolveFn[][] + { + new ConvolveFn[] + { + Scaled2D, + ScaledAvg2D + }, + new ConvolveFn[] + { + Scaled2D, + ScaledAvg2D + } + } + }; + + private static readonly unsafe HighbdConvolveFn[][][] HighbdPredictX16Y16 = new HighbdConvolveFn[][][] + { + new HighbdConvolveFn[][] + { + new HighbdConvolveFn[] + { + HighbdConvolveCopy, + HighbdConvolveAvg + }, + new HighbdConvolveFn[] + { + HighbdConvolve8Vert, + HighbdConvolve8AvgVert + } + }, + new HighbdConvolveFn[][] + { + new HighbdConvolveFn[] + { + HighbdConvolve8Horiz, + HighbdConvolve8AvgHoriz + }, + new HighbdConvolveFn[] + { + HighbdConvolve8, + HighbdConvolve8Avg + } + } + }; + + private static readonly unsafe HighbdConvolveFn[][][] HighbdPredictX16 = new HighbdConvolveFn[][][] + { + new HighbdConvolveFn[][] + { + new HighbdConvolveFn[] + { + HighbdConvolve8Vert, + HighbdConvolve8AvgVert + }, + new HighbdConvolveFn[] + { + HighbdConvolve8Vert, + HighbdConvolve8AvgVert + } + }, + new HighbdConvolveFn[][] + { + new HighbdConvolveFn[] + { + HighbdConvolve8, + HighbdConvolve8Avg + }, + new HighbdConvolveFn[] + { + HighbdConvolve8, + HighbdConvolve8Avg + } + } + }; + + private static readonly unsafe HighbdConvolveFn[][][] HighbdPredictY16 = new HighbdConvolveFn[][][] + { + new HighbdConvolveFn[][] + { + new HighbdConvolveFn[] + { + HighbdConvolve8Horiz, + HighbdConvolve8AvgHoriz + }, + new HighbdConvolveFn[] + { + HighbdConvolve8, + HighbdConvolve8Avg + } + }, + new HighbdConvolveFn[][] + { + new HighbdConvolveFn[] + { + HighbdConvolve8Horiz, + HighbdConvolve8AvgHoriz + }, + new HighbdConvolveFn[] + { + HighbdConvolve8, + HighbdConvolve8Avg + } + } + }; + + private static readonly unsafe HighbdConvolveFn[][][] HighbdPredict = new HighbdConvolveFn[][][] + { + new HighbdConvolveFn[][] + { + new HighbdConvolveFn[] + { + HighbdConvolve8, + HighbdConvolve8Avg + }, + new HighbdConvolveFn[] + { + HighbdConvolve8, + HighbdConvolve8Avg + } + }, + new HighbdConvolveFn[][] + { + new HighbdConvolveFn[] + { + HighbdConvolve8, + HighbdConvolve8Avg + }, + new HighbdConvolveFn[] + { + HighbdConvolve8, + HighbdConvolve8Avg + } + } + }; + + public int XScaleFP; // Horizontal fixed point scale factor + public int YScaleFP; // Vertical fixed point scale factor + public int XStepQ4; + public int YStepQ4; + + public int ScaleValueX(int val) + { + return IsScaled() ? ScaledX(val) : val; + } + + public int ScaleValueY(int val) + { + return IsScaled() ? ScaledY(val) : val; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public unsafe void InterPredict( + int horiz, + int vert, + int avg, + byte* src, + int srcStride, + byte* dst, + int dstStride, + int subpelX, + int subpelY, + int w, + int h, + Array8<short>[] kernel, + int xs, + int ys) + { + if (XStepQ4 == 16) + { + if (YStepQ4 == 16) + { + // No scaling in either direction. + PredictX16Y16[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h); + } + else + { + // No scaling in x direction. Must always scale in the y direction. + PredictX16[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h); + } + } + else + { + if (YStepQ4 == 16) + { + // No scaling in the y direction. Must always scale in the x direction. + PredictY16[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h); + } + else + { + // Must always scale in both directions. + Predict[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h); + } + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public unsafe void HighbdInterPredict( + int horiz, + int vert, + int avg, + ushort* src, + int srcStride, + ushort* dst, + int dstStride, + int subpelX, + int subpelY, + int w, + int h, + Array8<short>[] kernel, + int xs, + int ys, + int bd) + { + if (XStepQ4 == 16) + { + if (YStepQ4 == 16) + { + // No scaling in either direction. + HighbdPredictX16Y16[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h, bd); + } + else + { + // No scaling in x direction. Must always scale in the y direction. + HighbdPredictX16[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h, bd); + } + } + else + { + if (YStepQ4 == 16) + { + // No scaling in the y direction. Must always scale in the x direction. + HighbdPredictY16[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h, bd); + } + else + { + // Must always scale in both directions. + HighbdPredict[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h, bd); + } + } + } + + private int ScaledX(int val) + { + return (int)((long)val * XScaleFP >> RefScaleShift); + } + + private int ScaledY(int val) + { + return (int)((long)val * YScaleFP >> RefScaleShift); + } + + private static int GetFixedPointScaleFactor(int otherSize, int thisSize) + { + // Calculate scaling factor once for each reference frame + // and use fixed point scaling factors in decoding and encoding routines. + // Hardware implementations can calculate scale factor in device driver + // and use multiplication and shifting on hardware instead of division. + return (otherSize << RefScaleShift) / thisSize; + } + + public Mv32 ScaleMv(ref Mv mv, int x, int y) + { + int xOffQ4 = ScaledX(x << SubpelBits) & SubpelMask; + int yOffQ4 = ScaledY(y << SubpelBits) & SubpelMask; + Mv32 res = new Mv32() + { + Row = ScaledY(mv.Row) + yOffQ4, + Col = ScaledX(mv.Col) + xOffQ4 + }; + return res; + } + + public bool IsValidScale() + { + return XScaleFP != RefInvalidScale && YScaleFP != RefInvalidScale; + } + + public bool IsScaled() + { + return IsValidScale() && (XScaleFP != RefNoScale || YScaleFP != RefNoScale); + } + + public static bool ValidRefFrameSize(int refWidth, int refHeight, int thisWidth, int thisHeight) + { + return 2 * thisWidth >= refWidth && + 2 * thisHeight >= refHeight && + thisWidth <= 16 * refWidth && + thisHeight <= 16 * refHeight; + } + + public void SetupScaleFactorsForFrame(int otherW, int otherH, int thisW, int thisH) + { + if (!ValidRefFrameSize(otherW, otherH, thisW, thisH)) + { + XScaleFP = RefInvalidScale; + YScaleFP = RefInvalidScale; + return; + } + + XScaleFP = GetFixedPointScaleFactor(otherW, thisW); + YScaleFP = GetFixedPointScaleFactor(otherH, thisH); + XStepQ4 = ScaledX(16); + YStepQ4 = ScaledY(16); + } + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/SegLvlFeatures.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/SegLvlFeatures.cs new file mode 100644 index 00000000..c3ea3fd8 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/SegLvlFeatures.cs @@ -0,0 +1,11 @@ +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal enum SegLvlFeatures + { + SegLvlAltQ = 0, // Use alternate Quantizer .... + SegLvlAltLf = 1, // Use alternate loop filter value... + SegLvlRefFrame = 2, // Optional Segment reference frame + SegLvlSkip = 3, // Optional Segment (0,0) + skip mode + SegLvlMax = 4 // Number of features supported + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/Segmentation.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/Segmentation.cs new file mode 100644 index 00000000..53d1f2cc --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/Segmentation.cs @@ -0,0 +1,71 @@ +using Ryujinx.Common.Memory; +using System.Diagnostics; +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal struct Segmentation + { + private static readonly int[] SegFeatureDataSigned = new int[] { 1, 1, 0, 0 }; + private static readonly int[] SegFeatureDataMax = new int[] { QuantCommon.MaxQ, Vp9.LoopFilter.MaxLoopFilter, 3, 0 }; + + public bool Enabled; + public bool UpdateMap; + public byte UpdateData; + public byte AbsDelta; + public bool TemporalUpdate; + + public Array8<Array4<short>> FeatureData; + public Array8<uint> FeatureMask; + public int AqAvOffset; + + public static byte GetPredProbSegId(ref Array3<byte> segPredProbs, ref MacroBlockD xd) + { + return segPredProbs[xd.GetPredContextSegId()]; + } + + public void ClearAllSegFeatures() + { + MemoryMarshal.CreateSpan(ref FeatureData[0][0], 8 * 4).Fill(0); + MemoryMarshal.CreateSpan(ref FeatureMask[0], 8).Fill(0); + AqAvOffset = 0; + } + + internal void EnableSegFeature(int segmentId, SegLvlFeatures featureId) + { + FeatureMask[segmentId] |= 1u << (int)featureId; + } + + internal static int FeatureDataMax(SegLvlFeatures featureId) + { + return SegFeatureDataMax[(int)featureId]; + } + + internal static int IsSegFeatureSigned(SegLvlFeatures featureId) + { + return SegFeatureDataSigned[(int)featureId]; + } + + internal void SetSegData(int segmentId, SegLvlFeatures featureId, int segData) + { + Debug.Assert(segData <= SegFeatureDataMax[(int)featureId]); + if (segData < 0) + { + Debug.Assert(SegFeatureDataSigned[(int)featureId] != 0); + Debug.Assert(-segData <= SegFeatureDataMax[(int)featureId]); + } + + FeatureData[segmentId][(int)featureId] = (short)segData; + } + + internal int IsSegFeatureActive(int segmentId, SegLvlFeatures featureId) + { + return Enabled && (FeatureMask[segmentId] & (1 << (int)featureId)) != 0 ? 1 : 0; + } + + internal short GetSegData(int segmentId, SegLvlFeatures featureId) + { + return FeatureData[segmentId][(int)featureId]; + } + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/Surface.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/Surface.cs new file mode 100644 index 00000000..2b2a173e --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/Surface.cs @@ -0,0 +1,80 @@ +using Ryujinx.Common.Memory; +using Ryujinx.Graphics.Video; +using System; +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal struct Surface : ISurface + { + public ArrayPtr<byte> YBuffer; + public ArrayPtr<byte> UBuffer; + public ArrayPtr<byte> VBuffer; + + public unsafe Plane YPlane => new Plane((IntPtr)YBuffer.ToPointer(), YBuffer.Length); + public unsafe Plane UPlane => new Plane((IntPtr)UBuffer.ToPointer(), UBuffer.Length); + public unsafe Plane VPlane => new Plane((IntPtr)VBuffer.ToPointer(), VBuffer.Length); + + public int Width { get; } + public int Height { get; } + public int AlignedWidth { get; } + public int AlignedHeight { get; } + public int Stride { get; } + public int UvWidth { get; } + public int UvHeight { get; } + public int UvAlignedWidth { get; } + public int UvAlignedHeight { get; } + public int UvStride { get; } + public bool HighBd => false; + + private readonly IntPtr _pointer; + + public Surface(int width, int height) + { + const int border = 32; + const int ssX = 1; + const int ssY = 1; + const bool highbd = false; + + int alignedWidth = (width + 7) & ~7; + int alignedHeight = (height + 7) & ~7; + int yStride = ((alignedWidth + 2 * border) + 31) & ~31; + int yplaneSize = (alignedHeight + 2 * border) * yStride; + int uvWidth = alignedWidth >> ssX; + int uvHeight = alignedHeight >> ssY; + int uvStride = yStride >> ssX; + int uvBorderW = border >> ssX; + int uvBorderH = border >> ssY; + int uvplaneSize = (uvHeight + 2 * uvBorderH) * uvStride; + + int frameSize = (highbd ? 2 : 1) * (yplaneSize + 2 * uvplaneSize); + + IntPtr pointer = Marshal.AllocHGlobal(frameSize); + _pointer = pointer; + Width = width; + Height = height; + AlignedWidth = alignedWidth; + AlignedHeight = alignedHeight; + Stride = yStride; + UvWidth = (width + ssX) >> ssX; + UvHeight = (height + ssY) >> ssY; + UvAlignedWidth = uvWidth; + UvAlignedHeight = uvHeight; + UvStride = uvStride; + + ArrayPtr<byte> NewPlane(int start, int size, int border) + { + return new ArrayPtr<byte>(pointer + start + border, size - border); + } + + YBuffer = NewPlane(0, yplaneSize, (border * yStride) + border); + UBuffer = NewPlane(yplaneSize, uvplaneSize, (uvBorderH * uvStride) + uvBorderW); + VBuffer = NewPlane(yplaneSize + uvplaneSize, uvplaneSize, (uvBorderH * uvStride) + uvBorderW); + } + + public void Dispose() + { + Marshal.FreeHGlobal(_pointer); + } + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/TileInfo.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/TileInfo.cs new file mode 100644 index 00000000..67289c47 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/TileInfo.cs @@ -0,0 +1,85 @@ +using Ryujinx.Graphics.Nvdec.Vp9.Common; +using System; +using System.Diagnostics; + +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal struct TileInfo + { + private const int MinTileWidthB64 = 4; + private const int MaxTileWidthB64 = 64; + + public int MiRowStart, MiRowEnd; + public int MiColStart, MiColEnd; + + public static int MiColsAlignedToSb(int nMis) + { + return BitUtils.AlignPowerOfTwo(nMis, Constants.MiBlockSizeLog2); + } + + private static int GetTileOffset(int idx, int mis, int log2) + { + int sbCols = MiColsAlignedToSb(mis) >> Constants.MiBlockSizeLog2; + int offset = ((idx * sbCols) >> log2) << Constants.MiBlockSizeLog2; + return Math.Min(offset, mis); + } + + public void SetRow(ref Vp9Common cm, int row) + { + MiRowStart = GetTileOffset(row, cm.MiRows, cm.Log2TileRows); + MiRowEnd = GetTileOffset(row + 1, cm.MiRows, cm.Log2TileRows); + } + + public void SetCol(ref Vp9Common cm, int col) + { + MiColStart = GetTileOffset(col, cm.MiCols, cm.Log2TileCols); + MiColEnd = GetTileOffset(col + 1, cm.MiCols, cm.Log2TileCols); + } + + public void Init(ref Vp9Common cm, int row, int col) + { + SetRow(ref cm, row); + SetCol(ref cm, col); + } + + // Checks that the given miRow, miCol and search point + // are inside the borders of the tile. + public bool IsInside(int miCol, int miRow, int miRows, ref Position miPos) + { + return !(miRow + miPos.Row < 0 || + miCol + miPos.Col < MiColStart || + miRow + miPos.Row >= miRows || + miCol + miPos.Col >= MiColEnd); + } + + private static int GetMinLog2TileCols(int sb64Cols) + { + int minLog2 = 0; + while ((MaxTileWidthB64 << minLog2) < sb64Cols) + { + ++minLog2; + } + + return minLog2; + } + + private static int GetMaxLog2TileCols(int sb64Cols) + { + int maxLog2 = 1; + while ((sb64Cols >> maxLog2) >= MinTileWidthB64) + { + ++maxLog2; + } + + return maxLog2 - 1; + } + + public static void GetTileNBits(int miCols, ref int minLog2TileCols, ref int maxLog2TileCols) + { + int sb64Cols = MiColsAlignedToSb(miCols) >> Constants.MiBlockSizeLog2; + minLog2TileCols = GetMinLog2TileCols(sb64Cols); + maxLog2TileCols = GetMaxLog2TileCols(sb64Cols); + Debug.Assert(minLog2TileCols <= maxLog2TileCols); + } + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/TxMode.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/TxMode.cs new file mode 100644 index 00000000..db914525 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/TxMode.cs @@ -0,0 +1,12 @@ +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + public enum TxMode + { + Only4X4 = 0, // Only 4x4 transform used + Allow8X8 = 1, // Allow block transform size up to 8x8 + Allow16X16 = 2, // Allow block transform size up to 16x16 + Allow32X32 = 3, // Allow block transform size up to 32x32 + TxModeSelect = 4, // Transform specified for each block + TxModes = 5 + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/TxSize.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/TxSize.cs new file mode 100644 index 00000000..994deb2c --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/TxSize.cs @@ -0,0 +1,11 @@ +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + public enum TxSize + { + Tx4x4 = 0, // 4x4 transform + Tx8x8 = 1, // 8x8 transform + Tx16x16 = 2, // 16x16 transform + Tx32x32 = 3, // 32x32 transform + TxSizes = 4 + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/TxType.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/TxType.cs new file mode 100644 index 00000000..dbf7251c --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/TxType.cs @@ -0,0 +1,11 @@ +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal enum TxType + { + DctDct = 0, // DCT in both horizontal and vertical + AdstDct = 1, // ADST in vertical, DCT in horizontal + DctAdst = 2, // DCT in vertical, ADST in horizontal + AdstAdst = 3, // ADST in both directions + TxTypes = 4 + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/Vp9Common.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/Vp9Common.cs new file mode 100644 index 00000000..0dafb820 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/Vp9Common.cs @@ -0,0 +1,334 @@ +using Ryujinx.Common.Memory; +using Ryujinx.Graphics.Nvdec.Vp9.Common; +using Ryujinx.Graphics.Video; + +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal struct Vp9Common + { + public MacroBlockD Mb; + + public ArrayPtr<TileWorkerData> TileWorkerData; + + public InternalErrorInfo Error; + + public int Width; + public int Height; + + public int SubsamplingX; + public int SubsamplingY; + + public ArrayPtr<MvRef> PrevFrameMvs; + public ArrayPtr<MvRef> CurFrameMvs; + + public Array3<RefBuffer> FrameRefs; + + public FrameType FrameType; + + // Flag signaling that the frame is encoded using only Intra modes. + public bool IntraOnly; + + public bool AllowHighPrecisionMv; + + // MBs, MbRows/Cols is in 16-pixel units; MiRows/Cols is in + // ModeInfo (8-pixel) units. + public int MBs; + public int MbRows, MiRows; + public int MbCols, MiCols; + public int MiStride; + + /* Profile settings */ + public TxMode TxMode; + + public int BaseQindex; + public int YDcDeltaQ; + public int UvDcDeltaQ; + public int UvAcDeltaQ; + public Array8<Array2<short>> YDequant; + public Array8<Array2<short>> UvDequant; + + /* We allocate a ModeInfo struct for each macroblock, together with + an extra row on top and column on the left to simplify prediction. */ + public ArrayPtr<ModeInfo> Mip; /* Base of allocated array */ + public ArrayPtr<ModeInfo> Mi; /* Corresponds to upper left visible macroblock */ + + public ArrayPtr<Ptr<ModeInfo>> MiGridBase; + public ArrayPtr<Ptr<ModeInfo>> MiGridVisible; + + // Whether to use previous frame's motion vectors for prediction. + public bool UsePrevFrameMvs; + + // Persistent mb segment id map used in prediction. + public int SegMapIdx; + public int PrevSegMapIdx; + + public Array2<ArrayPtr<byte>> SegMapArray; + public ArrayPtr<byte> LastFrameSegMap; + public ArrayPtr<byte> CurrentFrameSegMap; + + public byte InterpFilter; + + public LoopFilterInfoN LfInfo; + + public Array4<sbyte> RefFrameSignBias; /* Two state 0, 1 */ + + public LoopFilter Lf; + public Segmentation Seg; + + // Context probabilities for reference frame prediction + public sbyte CompFixedRef; + public Array2<sbyte> CompVarRef; + public ReferenceMode ReferenceMode; + + public Ptr<Vp9EntropyProbs> Fc; + public Ptr<Vp9BackwardUpdates> Counts; + + public bool FrameParallelDecodingMode; + + public int Log2TileCols, Log2TileRows; + + public ArrayPtr<sbyte> AboveSegContext; + public ArrayPtr<sbyte> AboveContext; + public int AboveContextAllocCols; + + public bool FrameIsIntraOnly() + { + return FrameType == FrameType.KeyFrame || IntraOnly; + } + + public bool CompoundReferenceAllowed() + { + int i; + for (i = 1; i < Constants.RefsPerFrame; ++i) + { + if (RefFrameSignBias[i + 1] != RefFrameSignBias[1]) + { + return true; + } + } + + return false; + } + + private static int CalcMiSize(int len) + { + // Len is in mi units. + return len + Constants.MiBlockSize; + } + + public void SetMbMi(int width, int height) + { + int alignedWidth = BitUtils.AlignPowerOfTwo(width, Constants.MiSizeLog2); + int alignedHeight = BitUtils.AlignPowerOfTwo(height, Constants.MiSizeLog2); + + MiCols = alignedWidth >> Constants.MiSizeLog2; + MiRows = alignedHeight >> Constants.MiSizeLog2; + MiStride = CalcMiSize(MiCols); + + MbCols = (MiCols + 1) >> 1; + MbRows = (MiRows + 1) >> 1; + MBs = MbRows * MbCols; + } + + public void AllocTileWorkerData(MemoryAllocator allocator, int tileCols, int tileRows) + { + TileWorkerData = allocator.Allocate<TileWorkerData>(tileCols * tileRows); + } + + public void FreeTileWorkerData(MemoryAllocator allocator) + { + allocator.Free(TileWorkerData); + } + + private void AllocSegMap(MemoryAllocator allocator, int segMapSize) + { + int i; + + for (i = 0; i < Constants.NumPingPongBuffers; ++i) + { + SegMapArray[i] = allocator.Allocate<byte>(segMapSize); + } + + // Init the index. + SegMapIdx = 0; + PrevSegMapIdx = 1; + + CurrentFrameSegMap = SegMapArray[SegMapIdx]; + LastFrameSegMap = SegMapArray[PrevSegMapIdx]; + } + + private void FreeSegMap(MemoryAllocator allocator) + { + int i; + + for (i = 0; i < Constants.NumPingPongBuffers; ++i) + { + allocator.Free(SegMapArray[i]); + SegMapArray[i] = ArrayPtr<byte>.Null; + } + + CurrentFrameSegMap = ArrayPtr<byte>.Null; + LastFrameSegMap = ArrayPtr<byte>.Null; + } + + private void DecAllocMi(MemoryAllocator allocator, int miSize) + { + Mip = allocator.Allocate<ModeInfo>(miSize); + MiGridBase = allocator.Allocate<Ptr<ModeInfo>>(miSize); + } + + private void DecFreeMi(MemoryAllocator allocator) + { + allocator.Free(Mip); + Mip = ArrayPtr<ModeInfo>.Null; + allocator.Free(MiGridBase); + MiGridBase = ArrayPtr<Ptr<ModeInfo>>.Null; + } + + public void FreeContextBuffers(MemoryAllocator allocator) + { + DecFreeMi(allocator); + FreeSegMap(allocator); + allocator.Free(AboveContext); + AboveContext = ArrayPtr<sbyte>.Null; + allocator.Free(AboveSegContext); + AboveSegContext = ArrayPtr<sbyte>.Null; + allocator.Free(Lf.Lfm); + Lf.Lfm = ArrayPtr<LoopFilterMask>.Null; + allocator.Free(CurFrameMvs); + CurFrameMvs = ArrayPtr<MvRef>.Null; + if (UsePrevFrameMvs) + { + allocator.Free(PrevFrameMvs); + PrevFrameMvs = ArrayPtr<MvRef>.Null; + } + } + + private void AllocLoopFilter(MemoryAllocator allocator) + { + // Each lfm holds bit masks for all the 8x8 blocks in a 64x64 region. The + // stride and rows are rounded up / truncated to a multiple of 8. + Lf.LfmStride = (MiCols + (Constants.MiBlockSize - 1)) >> 3; + Lf.Lfm = allocator.Allocate<LoopFilterMask>(((MiRows + (Constants.MiBlockSize - 1)) >> 3) * Lf.LfmStride); + } + + public void AllocContextBuffers(MemoryAllocator allocator, int width, int height) + { + SetMbMi(width, height); + int newMiSize = MiStride * CalcMiSize(MiRows); + if (newMiSize != 0) + { + DecAllocMi(allocator, newMiSize); + } + + if (MiRows * MiCols != 0) + { + // Create the segmentation map structure and set to 0. + AllocSegMap(allocator, MiRows * MiCols); + } + + if (MiCols != 0) + { + AboveContext = allocator.Allocate<sbyte>(2 * TileInfo.MiColsAlignedToSb(MiCols) * Constants.MaxMbPlane); + AboveSegContext = allocator.Allocate<sbyte>(TileInfo.MiColsAlignedToSb(MiCols)); + } + + AllocLoopFilter(allocator); + + CurFrameMvs = allocator.Allocate<MvRef>(MiRows * MiCols); + // Using the same size as the current frame is fine here, + // as this is never true when we have a resolution change. + if (UsePrevFrameMvs) + { + PrevFrameMvs = allocator.Allocate<MvRef>(MiRows * MiCols); + } + } + + private unsafe void DecSetupMi() + { + Mi = Mip.Slice(MiStride + 1); + MiGridVisible = MiGridBase.Slice(MiStride + 1); + MemoryUtil.Fill(MiGridBase.ToPointer(), Ptr<ModeInfo>.Null, MiStride * (MiRows + 1)); + } + + public unsafe void InitContextBuffers() + { + DecSetupMi(); + if (!LastFrameSegMap.IsNull) + { + MemoryUtil.Fill(LastFrameSegMap.ToPointer(), (byte)0, MiRows * MiCols); + } + } + + private void SetPartitionProbs(ref MacroBlockD xd) + { + xd.PartitionProbs = FrameIsIntraOnly() + ? new ArrayPtr<Array3<byte>>(ref Fc.Value.KfPartitionProb[0], 16) + : new ArrayPtr<Array3<byte>>(ref Fc.Value.PartitionProb[0], 16); + } + + internal void InitMacroBlockD(ref MacroBlockD xd, ArrayPtr<int> dqcoeff) + { + int i; + + for (i = 0; i < Constants.MaxMbPlane; ++i) + { + xd.Plane[i].DqCoeff = dqcoeff; + xd.AboveContext[i] = AboveContext.Slice(i * 2 * TileInfo.MiColsAlignedToSb(MiCols)); + + if (i == 0) + { + MemoryUtil.Copy(ref xd.Plane[i].SegDequant, ref YDequant); + } + else + { + MemoryUtil.Copy(ref xd.Plane[i].SegDequant, ref UvDequant); + } + xd.Fc = new Ptr<Vp9EntropyProbs>(ref Fc.Value); + } + + xd.AboveSegContext = AboveSegContext; + xd.MiStride = MiStride; + xd.ErrorInfo = new Ptr<InternalErrorInfo>(ref Error); + + SetPartitionProbs(ref xd); + } + + public void SetupSegmentationDequant() + { + const BitDepth bitDepth = BitDepth.Bits8; // TODO: Configurable + // Build y/uv dequant values based on segmentation. + if (Seg.Enabled) + { + int i; + for (i = 0; i < Constants.MaxSegments; ++i) + { + int qIndex = QuantCommon.GetQIndex(ref Seg, i, BaseQindex); + YDequant[i][0] = QuantCommon.DcQuant(qIndex, YDcDeltaQ, bitDepth); + YDequant[i][1] = QuantCommon.AcQuant(qIndex, 0, bitDepth); + UvDequant[i][0] = QuantCommon.DcQuant(qIndex, UvDcDeltaQ, bitDepth); + UvDequant[i][1] = QuantCommon.AcQuant(qIndex, UvAcDeltaQ, bitDepth); + } + } + else + { + int qIndex = BaseQindex; + // When segmentation is disabled, only the first value is used. The + // remaining are don't cares. + YDequant[0][0] = QuantCommon.DcQuant(qIndex, YDcDeltaQ, bitDepth); + YDequant[0][1] = QuantCommon.AcQuant(qIndex, 0, bitDepth); + UvDequant[0][0] = QuantCommon.DcQuant(qIndex, UvDcDeltaQ, bitDepth); + UvDequant[0][1] = QuantCommon.AcQuant(qIndex, UvAcDeltaQ, bitDepth); + } + } + + public void SetupScaleFactors() + { + for (int i = 0; i < Constants.RefsPerFrame; ++i) + { + ref RefBuffer refBuf = ref FrameRefs[i]; + refBuf.Sf.SetupScaleFactorsForFrame(refBuf.Buf.Width, refBuf.Buf.Height, Width, Height); + } + } + } +} |
