4 files changed, 111 insertions, 14 deletions
diff --git a/src/video_core/host1x/codecs/codec.cpp b/src/video_core/host1x/codecs/codec.cpp
index 42e7d6e4f..cd6a3a9b8 100644
--- a/src/video_core/host1x/codecs/codec.cpp
+++ b/src/video_core/host1x/codecs/codec.cpp
@@ -5,6 +5,7 @@
 #include <fstream>
 #include <vector>
 #include "common/assert.h"
+#include "common/scope_exit.h"
 #include "common/settings.h"
 #include "video_core/host1x/codecs/codec.h"
 #include "video_core/host1x/codecs/h264.h"
@@ -14,6 +15,8 @@
 #include "video_core/memory_manager.h"
 
 extern "C" {
+#include <libavfilter/buffersink.h>
+#include <libavfilter/buffersrc.h>
 #include <libavutil/opt.h>
 #ifdef LIBVA_FOUND
 // for querying VAAPI driver information
@@ -85,6 +88,10 @@ Codec::~Codec() {
     // Free libav memory
     avcodec_free_context(&av_codec_ctx);
     av_buffer_unref(&av_gpu_decoder);
+
+    if (filters_initialized) {
+        avfilter_graph_free(&av_filter_graph);
+    }
 }
 
 bool Codec::CreateGpuAvDevice() {
@@ -152,6 +159,8 @@ bool Codec::CreateGpuAvDevice() {
 void Codec::InitializeAvCodecContext() {
     av_codec_ctx = avcodec_alloc_context3(av_codec);
     av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0);
+    av_codec_ctx->thread_count = 0;
+    av_codec_ctx->thread_type &= ~FF_THREAD_FRAME;
 }
 
 void Codec::InitializeGpuDecoder() {
@@ -165,6 +174,62 @@ void Codec::InitializeGpuDecoder() {
     av_codec_ctx->get_format = GetGpuFormat;
 }
 
+void Codec::InitializeAvFilters(AVFrame* frame) {
+    const AVFilter* buffer_src = avfilter_get_by_name("buffer");
+    const AVFilter* buffer_sink = avfilter_get_by_name("buffersink");
+    AVFilterInOut* inputs = avfilter_inout_alloc();
+    AVFilterInOut* outputs = avfilter_inout_alloc();
+    SCOPE_EXIT({
+        avfilter_inout_free(&inputs);
+        avfilter_inout_free(&outputs);
+    });
+
+    // Don't know how to get the accurate time_base but it doesn't matter for yadif filter
+    // so just use 1/1 to make buffer filter happy
+    std::string args = fmt::format("video_size={}x{}:pix_fmt={}:time_base=1/1", frame->width,
+                                   frame->height, frame->format);
+
+    av_filter_graph = avfilter_graph_alloc();
+    int ret = avfilter_graph_create_filter(&av_filter_src_ctx, buffer_src, "in", args.c_str(),
+                                           nullptr, av_filter_graph);
+    if (ret < 0) {
+        LOG_ERROR(Service_NVDRV, "avfilter_graph_create_filter source error: {}", ret);
+        return;
+    }
+
+    ret = avfilter_graph_create_filter(&av_filter_sink_ctx, buffer_sink, "out", nullptr, nullptr,
+                                       av_filter_graph);
+    if (ret < 0) {
+        LOG_ERROR(Service_NVDRV, "avfilter_graph_create_filter sink error: {}", ret);
+        return;
+    }
+
+    inputs->name = av_strdup("out");
+    inputs->filter_ctx = av_filter_sink_ctx;
+    inputs->pad_idx = 0;
+    inputs->next = nullptr;
+
+    outputs->name = av_strdup("in");
+    outputs->filter_ctx = av_filter_src_ctx;
+    outputs->pad_idx = 0;
+    outputs->next = nullptr;
+
+    const char* description = "yadif=1:-1:0";
+    ret = avfilter_graph_parse_ptr(av_filter_graph, description, &inputs, &outputs, nullptr);
+    if (ret < 0) {
+        LOG_ERROR(Service_NVDRV, "avfilter_graph_parse_ptr error: {}", ret);
+        return;
+    }
+
+    ret = avfilter_graph_config(av_filter_graph, nullptr);
+    if (ret < 0) {
+        LOG_ERROR(Service_NVDRV, "avfilter_graph_config error: {}", ret);
+        return;
+    }
+
+    filters_initialized = true;
+}
+
 void Codec::Initialize() {
     const AVCodecID codec = [&] {
         switch (current_codec) {
@@ -269,8 +334,34 @@ void Codec::Decode() {
         UNIMPLEMENTED_MSG("Unexpected video format: {}", final_frame->format);
         return;
     }
-    av_frames.push(std::move(final_frame));
-    if (av_frames.size() > 10) {
+    if (!final_frame->interlaced_frame) {
+        av_frames.push(std::move(final_frame));
+    } else {
+        if (!filters_initialized) {
+            InitializeAvFilters(final_frame.get());
+        }
+        if (const int ret = av_buffersrc_add_frame_flags(av_filter_src_ctx, final_frame.get(),
+                                                         AV_BUFFERSRC_FLAG_KEEP_REF);
+            ret) {
+            LOG_DEBUG(Service_NVDRV, "av_buffersrc_add_frame_flags error {}", ret);
+            return;
+        }
+        while (true) {
+            auto filter_frame = AVFramePtr{av_frame_alloc(), AVFrameDeleter};
+
+            int ret = av_buffersink_get_frame(av_filter_sink_ctx, filter_frame.get());
+
+            if (ret == AVERROR(EAGAIN) || ret == AVERROR(AVERROR_EOF))
+                break;
+            if (ret < 0) {
+                LOG_DEBUG(Service_NVDRV, "av_buffersink_get_frame error {}", ret);
+                return;
+            }
+
+            av_frames.push(std::move(filter_frame));
+        }
+    }
+    while (av_frames.size() > 10) {
         LOG_TRACE(Service_NVDRV, "av_frames.push overflow dropped frame");
         av_frames.pop();
     }
diff --git a/src/video_core/host1x/codecs/codec.h b/src/video_core/host1x/codecs/codec.h
index 0d45fb7fe..06fe00a4b 100644
--- a/src/video_core/host1x/codecs/codec.h
+++ b/src/video_core/host1x/codecs/codec.h
@@ -15,6 +15,7 @@ extern "C" {
 #pragma GCC diagnostic ignored "-Wconversion"
 #endif
 #include <libavcodec/avcodec.h>
+#include <libavfilter/avfilter.h>
 #if defined(__GNUC__) || defined(__clang__)
 #pragma GCC diagnostic pop
 #endif
@@ -61,17 +62,24 @@ public:
 private:
     void InitializeAvCodecContext();
 
+    void InitializeAvFilters(AVFrame* frame);
+
     void InitializeGpuDecoder();
 
     bool CreateGpuAvDevice();
 
     bool initialized{};
+    bool filters_initialized{};
     Host1x::NvdecCommon::VideoCodec current_codec{Host1x::NvdecCommon::VideoCodec::None};
 
     const AVCodec* av_codec{nullptr};
     AVCodecContext* av_codec_ctx{nullptr};
     AVBufferRef* av_gpu_decoder{nullptr};
 
+    AVFilterContext* av_filter_src_ctx{nullptr};
+    AVFilterContext* av_filter_sink_ctx{nullptr};
+    AVFilterGraph* av_filter_graph{nullptr};
+
     Host1x::Host1x& host1x;
     const Host1x::NvdecCommon::NvdecRegisters& state;
     std::unique_ptr<Decoder::H264> h264_decoder;
diff --git a/src/video_core/host1x/codecs/h264.cpp b/src/video_core/host1x/codecs/h264.cpp
index e87bd65fa..ce827eb6c 100644
--- a/src/video_core/host1x/codecs/h264.cpp
+++ b/src/video_core/host1x/codecs/h264.cpp
@@ -4,6 +4,7 @@
 #include <array>
 #include <bit>
 
+#include "common/scratch_buffer.h"
 #include "common/settings.h"
 #include "video_core/host1x/codecs/h264.h"
 #include "video_core/host1x/host1x.h"
@@ -111,7 +112,7 @@ const std::vector<u8>& H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegist
     writer.WriteUe(0);
 
     writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0);
-    writer.WriteBit(false);
+    writer.WriteBit(context.h264_parameter_set.pic_order_present_flag != 0);
     writer.WriteUe(0);
     writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active);
     writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active);
@@ -129,7 +130,7 @@ const std::vector<u8>& H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegist
     writer.WriteBit(context.h264_parameter_set.redundant_pic_cnt_present_flag != 0);
     writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0);
 
-    writer.WriteBit(true);
+    writer.WriteBit(true); // pic_scaling_matrix_present_flag
 
     for (s32 index = 0; index < 6; index++) {
         writer.WriteBit(true);
@@ -188,7 +189,8 @@ void H264BitWriter::WriteBit(bool state) {
 }
 
 void H264BitWriter::WriteScalingList(std::span<const u8> list, s32 start, s32 count) {
-    std::vector<u8> scan(count);
+    static Common::ScratchBuffer<u8> scan{};
+    scan.resize_destructive(count);
     if (count == 16) {
         std::memcpy(scan.data(), zig_zag_scan.data(), scan.size());
     } else {
diff --git a/src/video_core/host1x/vic.cpp b/src/video_core/host1x/vic.cpp
index 36a04e4e0..10d7ef884 100644
--- a/src/video_core/host1x/vic.cpp
+++ b/src/video_core/host1x/vic.cpp
@@ -189,9 +189,7 @@ void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) {
     for (std::size_t y = 0; y < frame_height; ++y) {
         const std::size_t src = y * stride;
         const std::size_t dst = y * aligned_width;
-        for (std::size_t x = 0; x < frame_width; ++x) {
-            luma_buffer[dst + x] = luma_src[src + x];
-        }
+        std::memcpy(luma_buffer.data() + dst, luma_src + src, frame_width);
     }
     host1x.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(),
                                       luma_buffer.size());
@@ -205,15 +203,15 @@ void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) {
         // Frame from FFmpeg software
         // Populate chroma buffer from both channels with interleaving.
         const std::size_t half_width = frame_width / 2;
+        u8* chroma_buffer_data = chroma_buffer.data();
         const u8* chroma_b_src = frame->data[1];
         const u8* chroma_r_src = frame->data[2];
         for (std::size_t y = 0; y < half_height; ++y) {
             const std::size_t src = y * half_stride;
             const std::size_t dst = y * aligned_width;
-
             for (std::size_t x = 0; x < half_width; ++x) {
-                chroma_buffer[dst + x * 2] = chroma_b_src[src + x];
-                chroma_buffer[dst + x * 2 + 1] = chroma_r_src[src + x];
+                chroma_buffer_data[dst + x * 2] = chroma_b_src[src + x];
+                chroma_buffer_data[dst + x * 2 + 1] = chroma_r_src[src + x];
             }
         }
         break;
@@ -225,9 +223,7 @@ void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) {
         for (std::size_t y = 0; y < half_height; ++y) {
             const std::size_t src = y * stride;
             const std::size_t dst = y * aligned_width;
-            for (std::size_t x = 0; x < frame_width; ++x) {
-                chroma_buffer[dst + x] = chroma_src[src + x];
-            }
+            std::memcpy(chroma_buffer.data() + dst, chroma_src + src, frame_width);
         }
         break;
     }