From eb67a45ca82bc01ac843c853fd3c17f2a90e0250 Mon Sep 17 00:00:00 2001 From: ameerj Date: Mon, 26 Oct 2020 23:07:36 -0400 Subject: video_core: NVDEC Implementation This commit aims to implement the NVDEC (Nvidia Decoder) functionality, with video frame decoding being handled by the FFmpeg library. The process begins with Ioctl commands being sent to the NVDEC and VIC (Video Image Composer) emulated devices. These allocate the necessary GPU buffers for the frame data, along with providing information on the incoming video data. A Submit command then signals the GPU to process and decode the frame data. To decode the frame, the respective codec's header must be manually composed from the information provided by NVDEC, then sent with the raw frame data to the ffmpeg library. Currently, H264 and VP9 are supported, with VP9 having some minor artifacting issues related mainly to the reference frame composition in its uncompressed header. Async GPU is not properly implemented at the moment. Co-Authored-By: David <25727384+ogniK5377@users.noreply.github.com> --- src/video_core/command_classes/vic.cpp | 180 +++++++++++++++++++++++++++++++++ 1 file changed, 180 insertions(+) create mode 100644 src/video_core/command_classes/vic.cpp (limited to 'src/video_core/command_classes/vic.cpp') diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp new file mode 100644 index 000000000..66e15a1a8 --- /dev/null +++ b/src/video_core/command_classes/vic.cpp @@ -0,0 +1,180 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include "common/assert.h" +#include "video_core/command_classes/nvdec.h" +#include "video_core/command_classes/vic.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/gpu.h" +#include "video_core/memory_manager.h" +#include "video_core/texture_cache/surface_params.h" + +extern "C" { +#include +} + +namespace Tegra { + +Vic::Vic(GPU& gpu_, std::shared_ptr nvdec_processor_) + : gpu(gpu_), nvdec_processor(std::move(nvdec_processor_)) {} +Vic::~Vic() = default; + +void Vic::VicStateWrite(u32 offset, u32 arguments) { + u8* const state_offset = reinterpret_cast(&vic_state) + offset * sizeof(u32); + std::memcpy(state_offset, &arguments, sizeof(u32)); +} + +void Vic::ProcessMethod(Vic::Method method, const std::vector& arguments) { + LOG_DEBUG(HW_GPU, "Vic method 0x{:X}", static_cast(method)); + VicStateWrite(static_cast(method), arguments[0]); + const u64 arg = static_cast(arguments[0]) << 8; + switch (method) { + case Method::Execute: + Execute(); + break; + case Method::SetConfigStructOffset: + config_struct_address = arg; + break; + case Method::SetOutputSurfaceLumaOffset: + output_surface_luma_address = arg; + break; + case Method::SetOutputSurfaceChromaUOffset: + output_surface_chroma_u_address = arg; + break; + case Method::SetOutputSurfaceChromaVOffset: + output_surface_chroma_v_address = arg; + break; + default: + break; + } +} + +void Vic::Execute() { + if (output_surface_luma_address == 0) { + LOG_ERROR(Service_NVDRV, "VIC Luma address not set. Recieved 0x{:X}", + vic_state.output_surface.luma_offset); + return; + } + const VicConfig config{gpu.MemoryManager().Read(config_struct_address + 0x20)}; + const VideoPixelFormat pixel_format = + static_cast(config.pixel_format.Value()); + switch (pixel_format) { + case VideoPixelFormat::BGRA8: + case VideoPixelFormat::RGBA8: { + LOG_TRACE(Service_NVDRV, "Writing RGB Frame"); + const auto* frame = nvdec_processor->GetFrame(); + + if (!frame || frame->width == 0 || frame->height == 0) { + return; + } + if (scaler_ctx == nullptr || frame->width != scaler_width || + frame->height != scaler_height) { + const AVPixelFormat target_format = + (pixel_format == VideoPixelFormat::RGBA8) ? AV_PIX_FMT_RGBA : AV_PIX_FMT_BGRA; + + sws_freeContext(scaler_ctx); + scaler_ctx = nullptr; + + // FFmpeg returns all frames in YUV420, convert it into expected format + scaler_ctx = + sws_getContext(frame->width, frame->height, AV_PIX_FMT_YUV420P, frame->width, + frame->height, target_format, 0, nullptr, nullptr, nullptr); + + scaler_width = frame->width; + scaler_height = frame->height; + } + // Get Converted frame + const std::size_t linear_size = frame->width * frame->height * 4; + + using AVMallocPtr = std::unique_ptr; + AVMallocPtr converted_frame_buffer{static_cast(av_malloc(linear_size)), av_free}; + + const int converted_stride{frame->width * 4}; + u8* const converted_frame_buf_addr{converted_frame_buffer.get()}; + + sws_scale(scaler_ctx, frame->data, frame->linesize, 0, frame->height, + &converted_frame_buf_addr, &converted_stride); + + const u32 blk_kind = static_cast(config.block_linear_kind); + if (blk_kind != 0) { + // swizzle pitch linear to block linear + const u32 block_height = static_cast(config.block_linear_height_log2); + const auto size = Tegra::Texture::CalculateSize(true, 4, frame->width, frame->height, 1, + block_height, 0); + std::vector swizzled_data(size); + Tegra::Texture::CopySwizzledData(frame->width, frame->height, 1, 4, 4, + swizzled_data.data(), converted_frame_buffer.get(), + false, block_height, 0, 1); + + gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size); + gpu.Maxwell3D().OnMemoryWrite(); + } else { + // send pitch linear frame + gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr, + linear_size); + gpu.Maxwell3D().OnMemoryWrite(); + } + break; + } + case VideoPixelFormat::Yuv420: { + LOG_TRACE(Service_NVDRV, "Writing YUV420 Frame"); + + const auto* frame = nvdec_processor->GetFrame(); + + if (!frame || frame->width == 0 || frame->height == 0) { + return; + } + + const std::size_t surface_width = config.surface_width_minus1 + 1; + const std::size_t surface_height = config.surface_height_minus1 + 1; + const std::size_t half_width = surface_width / 2; + const std::size_t half_height = config.surface_height_minus1 / 2; + const std::size_t aligned_width = (surface_width + 0xff) & ~0xff; + + const auto* luma_ptr = frame->data[0]; + const auto* chroma_b_ptr = frame->data[1]; + const auto* chroma_r_ptr = frame->data[2]; + const auto stride = frame->linesize[0]; + const auto half_stride = frame->linesize[1]; + + std::vector luma_buffer(aligned_width * surface_height); + std::vector chroma_buffer(aligned_width * half_height); + + // Populate luma buffer + for (std::size_t y = 0; y < surface_height - 1; ++y) { + std::size_t src = y * stride; + std::size_t dst = y * aligned_width; + + std::size_t size = surface_width; + + for (std::size_t offset = 0; offset < size; ++offset) { + luma_buffer[dst + offset] = luma_ptr[src + offset]; + } + } + gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), + luma_buffer.size()); + + // Populate chroma buffer from both channels with interleaving. + for (std::size_t y = 0; y < half_height; ++y) { + std::size_t src = y * half_stride; + std::size_t dst = y * aligned_width; + + for (std::size_t x = 0; x < half_width; ++x) { + chroma_buffer[dst + x * 2] = chroma_b_ptr[src + x]; + chroma_buffer[dst + x * 2 + 1] = chroma_r_ptr[src + x]; + } + } + gpu.MemoryManager().WriteBlock(output_surface_chroma_u_address, chroma_buffer.data(), + chroma_buffer.size()); + gpu.Maxwell3D().OnMemoryWrite(); + break; + } + default: + UNIMPLEMENTED_MSG("Unknown video pixel format {}", config.pixel_format.Value()); + break; + } +} + +} // namespace Tegra -- cgit v1.2.3 From c04203b786d87ecb66811341e7ea776452664e91 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Tue, 27 Oct 2020 02:09:17 -0400 Subject: nvdec: Tidy up header includes Prevents a few unnecessary inclusions. --- src/video_core/command_classes/vic.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/command_classes/vic.cpp') diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp index 66e15a1a8..5b52da277 100644 --- a/src/video_core/command_classes/vic.cpp +++ b/src/video_core/command_classes/vic.cpp @@ -26,7 +26,7 @@ void Vic::VicStateWrite(u32 offset, u32 arguments) { std::memcpy(state_offset, &arguments, sizeof(u32)); } -void Vic::ProcessMethod(Vic::Method method, const std::vector& arguments) { +void Vic::ProcessMethod(Method method, const std::vector& arguments) { LOG_DEBUG(HW_GPU, "Vic method 0x{:X}", static_cast(method)); VicStateWrite(static_cast(method), arguments[0]); const u64 arg = static_cast(arguments[0]) << 8; -- cgit v1.2.3 From eab041866b7c766aa38258aecef8a00c03612459 Mon Sep 17 00:00:00 2001 From: ameerj Date: Wed, 25 Nov 2020 17:10:44 -0500 Subject: Queue decoded frames, cleanup decoders --- src/video_core/command_classes/vic.cpp | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'src/video_core/command_classes/vic.cpp') diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp index 5b52da277..248443027 100644 --- a/src/video_core/command_classes/vic.cpp +++ b/src/video_core/command_classes/vic.cpp @@ -58,17 +58,18 @@ void Vic::Execute() { return; } const VicConfig config{gpu.MemoryManager().Read(config_struct_address + 0x20)}; + const AVFramePtr frame_ptr = std::move(nvdec_processor->GetFrame()); + const auto* frame = frame_ptr.get(); + if (!frame || frame->width == 0 || frame->height == 0) { + return; + } const VideoPixelFormat pixel_format = static_cast(config.pixel_format.Value()); switch (pixel_format) { case VideoPixelFormat::BGRA8: case VideoPixelFormat::RGBA8: { LOG_TRACE(Service_NVDRV, "Writing RGB Frame"); - const auto* frame = nvdec_processor->GetFrame(); - if (!frame || frame->width == 0 || frame->height == 0) { - return; - } if (scaler_ctx == nullptr || frame->width != scaler_width || frame->height != scaler_height) { const AVPixelFormat target_format = @@ -121,12 +122,6 @@ void Vic::Execute() { case VideoPixelFormat::Yuv420: { LOG_TRACE(Service_NVDRV, "Writing YUV420 Frame"); - const auto* frame = nvdec_processor->GetFrame(); - - if (!frame || frame->width == 0 || frame->height == 0) { - return; - } - const std::size_t surface_width = config.surface_width_minus1 + 1; const std::size_t surface_height = config.surface_height_minus1 + 1; const std::size_t half_width = surface_width / 2; -- cgit v1.2.3 From cf9767c608dfb49b77708966d8d07354930d150c Mon Sep 17 00:00:00 2001 From: Lioncash Date: Thu, 3 Dec 2020 12:33:05 -0500 Subject: vp9/vic: Resolve pessimizing moves Removes the usage of moves that don't result in behavior different from a copy, or otherwise would prevent copy elision from occurring. --- src/video_core/command_classes/vic.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/command_classes/vic.cpp') diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp index 248443027..6cfc193fa 100644 --- a/src/video_core/command_classes/vic.cpp +++ b/src/video_core/command_classes/vic.cpp @@ -58,7 +58,7 @@ void Vic::Execute() { return; } const VicConfig config{gpu.MemoryManager().Read(config_struct_address + 0x20)}; - const AVFramePtr frame_ptr = std::move(nvdec_processor->GetFrame()); + const AVFramePtr frame_ptr = nvdec_processor->GetFrame(); const auto* frame = frame_ptr.get(); if (!frame || frame->width == 0 || frame->height == 0) { return; -- cgit v1.2.3 From 4c5f5c9bf301d3626df104dbed6fed6f115cedc8 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Mon, 7 Dec 2020 00:41:47 -0500 Subject: video_core: Remove unnecessary enum class casting in logging messages fmt now automatically prints the numeric value of an enum class member by default, so we don't need to use casts any more. Reduces the line noise a bit. --- src/video_core/command_classes/vic.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/command_classes/vic.cpp') diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp index 6cfc193fa..66e21ce9c 100644 --- a/src/video_core/command_classes/vic.cpp +++ b/src/video_core/command_classes/vic.cpp @@ -27,7 +27,7 @@ void Vic::VicStateWrite(u32 offset, u32 arguments) { } void Vic::ProcessMethod(Method method, const std::vector& arguments) { - LOG_DEBUG(HW_GPU, "Vic method 0x{:X}", static_cast(method)); + LOG_DEBUG(HW_GPU, "Vic method 0x{:X}", method); VicStateWrite(static_cast(method), arguments[0]); const u64 arg = static_cast(arguments[0]) << 8; switch (method) { -- cgit v1.2.3 From 9764c13d6d2977903f407761b27d847c0056e1c4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 30 Dec 2020 02:25:23 -0300 Subject: video_core: Rewrite the texture cache The current texture cache has several points that hurt maintainability and performance. It's easy to break unrelated parts of the cache when doing minor changes. The cache can easily forget valuable information about the cached textures by CPU writes or simply by its normal usage.The current texture cache has several points that hurt maintainability and performance. It's easy to break unrelated parts of the cache when doing minor changes. The cache can easily forget valuable information about the cached textures by CPU writes or simply by its normal usage. This commit aims to address those issues. --- src/video_core/command_classes/vic.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/video_core/command_classes/vic.cpp') diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp index 66e21ce9c..aa8c9f9de 100644 --- a/src/video_core/command_classes/vic.cpp +++ b/src/video_core/command_classes/vic.cpp @@ -9,7 +9,7 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/gpu.h" #include "video_core/memory_manager.h" -#include "video_core/texture_cache/surface_params.h" +#include "video_core/textures/decoders.h" extern "C" { #include @@ -105,9 +105,9 @@ void Vic::Execute() { const auto size = Tegra::Texture::CalculateSize(true, 4, frame->width, frame->height, 1, block_height, 0); std::vector swizzled_data(size); - Tegra::Texture::CopySwizzledData(frame->width, frame->height, 1, 4, 4, - swizzled_data.data(), converted_frame_buffer.get(), - false, block_height, 0, 1); + Tegra::Texture::SwizzleSubrect(frame->width, frame->height, frame->width * 4, + frame->width, 4, swizzled_data.data(), + converted_frame_buffer.get(), block_height, 0, 0); gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size); gpu.Maxwell3D().OnMemoryWrite(); -- cgit v1.2.3 From a745d87971b2c9795e1b2c587bfe30b849b522fa Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Sat, 2 Jan 2021 09:00:05 -0500 Subject: general: Fix various spelling errors --- src/video_core/command_classes/vic.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/command_classes/vic.cpp') diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp index aa8c9f9de..55e632346 100644 --- a/src/video_core/command_classes/vic.cpp +++ b/src/video_core/command_classes/vic.cpp @@ -53,7 +53,7 @@ void Vic::ProcessMethod(Method method, const std::vector& arguments) { void Vic::Execute() { if (output_surface_luma_address == 0) { - LOG_ERROR(Service_NVDRV, "VIC Luma address not set. Recieved 0x{:X}", + LOG_ERROR(Service_NVDRV, "VIC Luma address not set. Received 0x{:X}", vic_state.output_surface.luma_offset); return; } -- cgit v1.2.3