From eb67a45ca82bc01ac843c853fd3c17f2a90e0250 Mon Sep 17 00:00:00 2001 From: ameerj Date: Mon, 26 Oct 2020 23:07:36 -0400 Subject: video_core: NVDEC Implementation This commit aims to implement the NVDEC (Nvidia Decoder) functionality, with video frame decoding being handled by the FFmpeg library. The process begins with Ioctl commands being sent to the NVDEC and VIC (Video Image Composer) emulated devices. These allocate the necessary GPU buffers for the frame data, along with providing information on the incoming video data. A Submit command then signals the GPU to process and decode the frame data. To decode the frame, the respective codec's header must be manually composed from the information provided by NVDEC, then sent with the raw frame data to the ffmpeg library. Currently, H264 and VP9 are supported, with VP9 having some minor artifacting issues related mainly to the reference frame composition in its uncompressed header. Async GPU is not properly implemented at the moment. Co-Authored-By: David <25727384+ogniK5377@users.noreply.github.com> --- src/video_core/command_classes/codecs/codec.cpp | 114 ++++++++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 src/video_core/command_classes/codecs/codec.cpp (limited to 'src/video_core/command_classes/codecs/codec.cpp') diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp new file mode 100644 index 000000000..2df410be8 --- /dev/null +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -0,0 +1,114 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include "common/assert.h" +#include "video_core/command_classes/codecs/codec.h" +#include "video_core/command_classes/codecs/h264.h" +#include "video_core/command_classes/codecs/vp9.h" +#include "video_core/gpu.h" +#include "video_core/memory_manager.h" + +extern "C" { +#include +} + +namespace Tegra { + +Codec::Codec(GPU& gpu_) + : gpu(gpu_), h264_decoder(std::make_unique(gpu)), + vp9_decoder(std::make_unique(gpu)) {} + +Codec::~Codec() { + if (!initialized) { + return; + } + // Free libav memory + avcodec_send_packet(av_codec_ctx, nullptr); + avcodec_receive_frame(av_codec_ctx, av_frame); + avcodec_flush_buffers(av_codec_ctx); + + av_frame_unref(av_frame); + av_free(av_frame); + avcodec_close(av_codec_ctx); +} + +void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) { + LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", static_cast(codec)); + current_codec = codec; +} + +void Codec::StateWrite(u32 offset, u64 arguments) { + u8* const state_offset = reinterpret_cast(&state) + offset * sizeof(u64); + std::memcpy(state_offset, &arguments, sizeof(u64)); +} + +void Codec::Decode() { + bool is_first_frame = false; + + if (!initialized) { + if (current_codec == NvdecCommon::VideoCodec::H264) { + av_codec = avcodec_find_decoder(AV_CODEC_ID_H264); + } else if (current_codec == NvdecCommon::VideoCodec::Vp9) { + av_codec = avcodec_find_decoder(AV_CODEC_ID_VP9); + } else { + LOG_ERROR(Service_NVDRV, "Unknown video codec {}", static_cast(current_codec)); + return; + } + + av_codec_ctx = avcodec_alloc_context3(av_codec); + av_frame = av_frame_alloc(); + av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0); + + // TODO(ameerj): libavcodec gpu hw acceleration + + const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr); + if (av_error < 0) { + LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed."); + av_frame_unref(av_frame); + av_free(av_frame); + avcodec_close(av_codec_ctx); + return; + } + initialized = true; + is_first_frame = true; + } + bool vp9_hidden_frame = false; + + AVPacket packet{}; + av_init_packet(&packet); + std::vector frame_data; + + if (current_codec == NvdecCommon::VideoCodec::H264) { + frame_data = h264_decoder->ComposeFrameHeader(state, is_first_frame); + } else if (current_codec == NvdecCommon::VideoCodec::Vp9) { + frame_data = vp9_decoder->ComposeFrameHeader(state); + vp9_hidden_frame = vp9_decoder->WasFrameHidden(); + } + + packet.data = frame_data.data(); + packet.size = static_cast(frame_data.size()); + + avcodec_send_packet(av_codec_ctx, &packet); + + if (!vp9_hidden_frame) { + // Only receive/store visible frames + avcodec_receive_frame(av_codec_ctx, av_frame); + } +} + +AVFrame* Codec::GetCurrentFrame() { + return av_frame; +} + +const AVFrame* Codec::GetCurrentFrame() const { + return av_frame; +} + +NvdecCommon::VideoCodec Codec::GetCurrentCodec() const { + return current_codec; +} + +} // namespace Tegra -- cgit v1.2.3 From c04203b786d87ecb66811341e7ea776452664e91 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Tue, 27 Oct 2020 02:09:17 -0400 Subject: nvdec: Tidy up header includes Prevents a few unnecessary inclusions. --- src/video_core/command_classes/codecs/codec.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src/video_core/command_classes/codecs/codec.cpp') diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index 2df410be8..1adf3cd13 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -4,6 +4,7 @@ #include #include +#include #include "common/assert.h" #include "video_core/command_classes/codecs/codec.h" #include "video_core/command_classes/codecs/h264.h" -- cgit v1.2.3 From eab041866b7c766aa38258aecef8a00c03612459 Mon Sep 17 00:00:00 2001 From: ameerj Date: Wed, 25 Nov 2020 17:10:44 -0500 Subject: Queue decoded frames, cleanup decoders --- src/video_core/command_classes/codecs/codec.cpp | 30 ++++++++++++++++--------- 1 file changed, 20 insertions(+), 10 deletions(-) (limited to 'src/video_core/command_classes/codecs/codec.cpp') diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index 1adf3cd13..1a19341c8 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -18,6 +18,11 @@ extern "C" { namespace Tegra { +void av_frame_deleter(AVFrame* ptr) { + av_frame_unref(ptr); + av_free(ptr); +} + Codec::Codec(GPU& gpu_) : gpu(gpu_), h264_decoder(std::make_unique(gpu)), vp9_decoder(std::make_unique(gpu)) {} @@ -27,7 +32,9 @@ Codec::~Codec() { return; } // Free libav memory + AVFrame* av_frame{nullptr}; avcodec_send_packet(av_codec_ctx, nullptr); + av_frame = av_frame_alloc(); avcodec_receive_frame(av_codec_ctx, av_frame); avcodec_flush_buffers(av_codec_ctx); @@ -60,7 +67,7 @@ void Codec::Decode() { } av_codec_ctx = avcodec_alloc_context3(av_codec); - av_frame = av_frame_alloc(); + av_codec_ctx->refcounted_frames = 1; av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0); // TODO(ameerj): libavcodec gpu hw acceleration @@ -68,8 +75,6 @@ void Codec::Decode() { const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr); if (av_error < 0) { LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed."); - av_frame_unref(av_frame); - av_free(av_frame); avcodec_close(av_codec_ctx); return; } @@ -96,16 +101,21 @@ void Codec::Decode() { if (!vp9_hidden_frame) { // Only receive/store visible frames - avcodec_receive_frame(av_codec_ctx, av_frame); + AVFramePtr frame = AVFramePtr{av_frame_alloc(), av_frame_deleter}; + avcodec_receive_frame(av_codec_ctx, frame.get()); + av_frames.push(std::move(frame)); } } -AVFrame* Codec::GetCurrentFrame() { - return av_frame; -} - -const AVFrame* Codec::GetCurrentFrame() const { - return av_frame; +AVFramePtr Codec::GetCurrentFrame() { + // Sometimes VIC will request more frames than have been decoded. + // in this case, return a nullptr and don't overwrite previous frame data + if (av_frames.size() > 0) { + AVFramePtr frame = std::move(av_frames.front()); + av_frames.pop(); + return frame; + } + return AVFramePtr{nullptr, av_frame_deleter}; } NvdecCommon::VideoCodec Codec::GetCurrentCodec() const { -- cgit v1.2.3 From c9e3abe2060760d71c83a1574559b6e479e637d2 Mon Sep 17 00:00:00 2001 From: ameerj Date: Thu, 26 Nov 2020 00:18:26 -0500 Subject: Address PR feedback remove some redundant moves, make deleter match naming guidelines. Co-Authored-By: LC <712067+lioncash@users.noreply.github.com> --- src/video_core/command_classes/codecs/codec.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'src/video_core/command_classes/codecs/codec.cpp') diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index 1a19341c8..412e1e41c 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -18,7 +18,7 @@ extern "C" { namespace Tegra { -void av_frame_deleter(AVFrame* ptr) { +void AVFrameDeleter(AVFrame* ptr) { av_frame_unref(ptr); av_free(ptr); } @@ -101,7 +101,7 @@ void Codec::Decode() { if (!vp9_hidden_frame) { // Only receive/store visible frames - AVFramePtr frame = AVFramePtr{av_frame_alloc(), av_frame_deleter}; + AVFramePtr frame = AVFramePtr{av_frame_alloc(), AVFrameDeleter}; avcodec_receive_frame(av_codec_ctx, frame.get()); av_frames.push(std::move(frame)); } @@ -110,12 +110,13 @@ void Codec::Decode() { AVFramePtr Codec::GetCurrentFrame() { // Sometimes VIC will request more frames than have been decoded. // in this case, return a nullptr and don't overwrite previous frame data - if (av_frames.size() > 0) { - AVFramePtr frame = std::move(av_frames.front()); - av_frames.pop(); - return frame; + if (av_frames.empty()) { + return AVFramePtr{nullptr, AVFrameDeleter}; } - return AVFramePtr{nullptr, av_frame_deleter}; + + AVFramePtr frame = std::move(av_frames.front()); + av_frames.pop(); + return frame; } NvdecCommon::VideoCodec Codec::GetCurrentCodec() const { -- cgit v1.2.3 From 979b60273889f070737d1fe3037991245180ca67 Mon Sep 17 00:00:00 2001 From: ameerj Date: Thu, 26 Nov 2020 14:04:06 -0500 Subject: Limit queue size to 10 frames Workaround for ZLA, which seems to decode and queue twice as many frames as it displays. --- src/video_core/command_classes/codecs/codec.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/video_core/command_classes/codecs/codec.cpp') diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index 412e1e41c..9a88f64e4 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -104,6 +104,10 @@ void Codec::Decode() { AVFramePtr frame = AVFramePtr{av_frame_alloc(), AVFrameDeleter}; avcodec_receive_frame(av_codec_ctx, frame.get()); av_frames.push(std::move(frame)); + // Limit queue to 10 frames. Workaround for ZLA decode and queue spam + if (av_frames.size() > 10) { + av_frames.pop(); + } } } -- cgit v1.2.3 From 94af77aa7c26b65365fecc6230c2110c10ff16b5 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Fri, 4 Dec 2020 16:23:10 -0500 Subject: codec: Remove deprecated usage of AVCodecContext::refcounted_frames This was only necessary for use with the avcodec_decode_video2/avcoded_decode_audio4 APIs which are also deprecated. Given we use avcodec_send_packet/avcodec_receive_frame, this isn't necessary, this is even indicated directly within the FFmpeg API changes document here on 2017-09-26: https://github.com/FFmpeg/FFmpeg/blob/master/doc/APIchanges#L410 This prevents our code from breaking whenever we update to a newer version of FFmpeg in the future if they ever decide to fully remove this API member. --- src/video_core/command_classes/codecs/codec.cpp | 1 - 1 file changed, 1 deletion(-) (limited to 'src/video_core/command_classes/codecs/codec.cpp') diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index 9a88f64e4..f547f5bd4 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -67,7 +67,6 @@ void Codec::Decode() { } av_codec_ctx = avcodec_alloc_context3(av_codec); - av_codec_ctx->refcounted_frames = 1; av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0); // TODO(ameerj): libavcodec gpu hw acceleration -- cgit v1.2.3 From 4c5f5c9bf301d3626df104dbed6fed6f115cedc8 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Mon, 7 Dec 2020 00:41:47 -0500 Subject: video_core: Remove unnecessary enum class casting in logging messages fmt now automatically prints the numeric value of an enum class member by default, so we don't need to use casts any more. Reduces the line noise a bit. --- src/video_core/command_classes/codecs/codec.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/video_core/command_classes/codecs/codec.cpp') diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index f547f5bd4..39bc923a5 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -44,7 +44,7 @@ Codec::~Codec() { } void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) { - LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", static_cast(codec)); + LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", codec); current_codec = codec; } @@ -62,7 +62,7 @@ void Codec::Decode() { } else if (current_codec == NvdecCommon::VideoCodec::Vp9) { av_codec = avcodec_find_decoder(AV_CODEC_ID_VP9); } else { - LOG_ERROR(Service_NVDRV, "Unknown video codec {}", static_cast(current_codec)); + LOG_ERROR(Service_NVDRV, "Unknown video codec {}", current_codec); return; } -- cgit v1.2.3