26 files changed, 361 insertions, 246 deletions
diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp
index d774db107..b60f86260 100644
--- a/src/video_core/cdma_pusher.cpp
+++ b/src/video_core/cdma_pusher.cpp
@@ -144,7 +144,7 @@ void CDmaPusher::ExecuteCommand(u32 offset, u32 data) {
         }
         case ThiMethod::SetMethod1:
             LOG_DEBUG(Service_NVDRV, "VIC method 0x{:X}, Args=({})",
-                      static_cast<u32>(vic_thi_state.method_0));
+                      static_cast<u32>(vic_thi_state.method_0), data);
             vic_processor->ProcessMethod(static_cast<Tegra::Vic::Method>(vic_thi_state.method_0),
                                          {data});
             break;
diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp
index 2df410be8..1adf3cd13 100644
--- a/src/video_core/command_classes/codecs/codec.cpp
+++ b/src/video_core/command_classes/codecs/codec.cpp
@@ -4,6 +4,7 @@
 
 #include <cstring>
 #include <fstream>
+#include <vector>
 #include "common/assert.h"
 #include "video_core/command_classes/codecs/codec.h"
 #include "video_core/command_classes/codecs/h264.h"
diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h
index 2e56daf29..5bbe6a332 100644
--- a/src/video_core/command_classes/codecs/codec.h
+++ b/src/video_core/command_classes/codecs/codec.h
@@ -5,8 +5,6 @@
 #pragma once
 
 #include <memory>
-#include <vector>
-#include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "video_core/command_classes/nvdec_common.h"
 
@@ -44,11 +42,11 @@ public:
     void Decode();
 
     /// Returns most recently decoded frame
-    AVFrame* GetCurrentFrame();
-    const AVFrame* GetCurrentFrame() const;
+    [[nodiscard]] AVFrame* GetCurrentFrame();
+    [[nodiscard]] const AVFrame* GetCurrentFrame() const;
 
     /// Returns the value of current_codec
-    NvdecCommon::VideoCodec GetCurrentCodec() const;
+    [[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const;
 
 private:
     bool initialized{};
diff --git a/src/video_core/command_classes/codecs/h264.cpp b/src/video_core/command_classes/codecs/h264.cpp
index 1a39f7b23..33e063e20 100644
--- a/src/video_core/command_classes/codecs/h264.cpp
+++ b/src/video_core/command_classes/codecs/h264.cpp
@@ -18,17 +18,33 @@
 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 //
 
+#include <array>
 #include "common/bit_util.h"
 #include "video_core/command_classes/codecs/h264.h"
 #include "video_core/gpu.h"
 #include "video_core/memory_manager.h"
 
 namespace Tegra::Decoder {
+namespace {
+// ZigZag LUTs from libavcodec.
+constexpr std::array<u8, 64> zig_zag_direct{
+    0,  1,  8,  16, 9,  2,  3,  10, 17, 24, 32, 25, 18, 11, 4,  5,  12, 19, 26, 33, 40, 48,
+    41, 34, 27, 20, 13, 6,  7,  14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23,
+    30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63,
+};
+
+constexpr std::array<u8, 16> zig_zag_scan{
+    0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4, 1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4,
+    1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4, 3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4,
+};
+} // Anonymous namespace
+
 H264::H264(GPU& gpu_) : gpu(gpu_) {}
 
 H264::~H264() = default;
 
-std::vector<u8>& H264::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state, bool is_first_frame) {
+const std::vector<u8>& H264::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state,
+                                                bool is_first_frame) {
     H264DecoderContext context{};
     gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext));
 
@@ -48,7 +64,8 @@ std::vector<u8>& H264::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state, bo
         writer.WriteU(0, 8);
         writer.WriteU(31, 8);
         writer.WriteUe(0);
-        const s32 chroma_format_idc = (context.h264_parameter_set.flags >> 12) & 0x3;
+        const auto chroma_format_idc =
+            static_cast<u32>((context.h264_parameter_set.flags >> 12) & 3);
         writer.WriteUe(chroma_format_idc);
         if (chroma_format_idc == 3) {
             writer.WriteBit(false);
@@ -59,8 +76,8 @@ std::vector<u8>& H264::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state, bo
         writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag
         writer.WriteBit(false); // Scaling matrix present flag
 
-        const s32 order_cnt_type = static_cast<s32>((context.h264_parameter_set.flags >> 14) & 3);
-        writer.WriteUe(static_cast<s32>((context.h264_parameter_set.flags >> 8) & 0xf));
+        const auto order_cnt_type = static_cast<u32>((context.h264_parameter_set.flags >> 14) & 3);
+        writer.WriteUe(static_cast<u32>((context.h264_parameter_set.flags >> 8) & 0xf));
         writer.WriteUe(order_cnt_type);
         if (order_cnt_type == 0) {
             writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt);
@@ -100,7 +117,7 @@ std::vector<u8>& H264::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state, bo
         writer.WriteUe(0);
         writer.WriteUe(0);
 
-        writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag);
+        writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0);
         writer.WriteBit(false);
         writer.WriteUe(0);
         writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active);
@@ -172,8 +189,8 @@ void H264BitWriter::WriteSe(s32 value) {
     WriteExpGolombCodedInt(value);
 }
 
-void H264BitWriter::WriteUe(s32 value) {
-    WriteExpGolombCodedUInt((u32)value);
+void H264BitWriter::WriteUe(u32 value) {
+    WriteExpGolombCodedUInt(value);
 }
 
 void H264BitWriter::End() {
diff --git a/src/video_core/command_classes/codecs/h264.h b/src/video_core/command_classes/codecs/h264.h
index 21752dd90..273449495 100644
--- a/src/video_core/command_classes/codecs/h264.h
+++ b/src/video_core/command_classes/codecs/h264.h
@@ -38,7 +38,7 @@ public:
     /// WriteSe and WriteUe write in the Exp-Golomb-coded syntax
     void WriteU(s32 value, s32 value_sz);
     void WriteSe(s32 value);
-    void WriteUe(s32 value);
+    void WriteUe(u32 value);
 
     /// Finalize the bitstream
     void End();
@@ -51,26 +51,14 @@ public:
     void WriteScalingList(const std::vector<u8>& list, s32 start, s32 count);
 
     /// Return the bitstream as a vector.
-    std::vector<u8>& GetByteArray();
-    const std::vector<u8>& GetByteArray() const;
+    [[nodiscard]] std::vector<u8>& GetByteArray();
+    [[nodiscard]] const std::vector<u8>& GetByteArray() const;
 
 private:
-    // ZigZag LUTs from libavcodec.
-    static constexpr std::array<u8, 64> zig_zag_direct{
-        0,  1,  8,  16, 9,  2,  3,  10, 17, 24, 32, 25, 18, 11, 4,  5,  12, 19, 26, 33, 40, 48,
-        41, 34, 27, 20, 13, 6,  7,  14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23,
-        30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63,
-    };
-
-    static constexpr std::array<u8, 16> zig_zag_scan{
-        0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4, 1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4,
-        1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4, 3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4,
-    };
-
     void WriteBits(s32 value, s32 bit_count);
     void WriteExpGolombCodedInt(s32 value);
     void WriteExpGolombCodedUInt(u32 value);
-    s32 GetFreeBufferBits();
+    [[nodiscard]] s32 GetFreeBufferBits();
     void Flush();
 
     s32 buffer_size{8};
@@ -86,8 +74,8 @@ public:
     ~H264();
 
     /// Compose the H264 header of the frame for FFmpeg decoding
-    std::vector<u8>& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state,
-                                        bool is_first_frame = false);
+    [[nodiscard]] const std::vector<u8>& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state,
+                                                            bool is_first_frame = false);
 
 private:
     struct H264ParameterSet {
diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp
index d205a8f5d..ab44fdc9e 100644
--- a/src/video_core/command_classes/codecs/vp9.cpp
+++ b/src/video_core/command_classes/codecs/vp9.cpp
@@ -9,7 +9,7 @@
 #include "video_core/memory_manager.h"
 
 namespace Tegra::Decoder {
-
+namespace {
 // Default compressed header probabilities once frame context resets
 constexpr Vp9EntropyProbs default_probs{
     .y_mode_prob{
@@ -170,6 +170,89 @@ constexpr Vp9EntropyProbs default_probs{
     .high_precision{128, 128},
 };
 
+constexpr std::array<s32, 256> norm_lut{
+    0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+constexpr std::array<s32, 254> map_lut{
+    20,  21,  22,  23,  24,  25,  0,   26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,
+    1,   38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  2,   50,  51,  52,  53,  54,
+    55,  56,  57,  58,  59,  60,  61,  3,   62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,
+    73,  4,   74,  75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  5,   86,  87,  88,  89,
+    90,  91,  92,  93,  94,  95,  96,  97,  6,   98,  99,  100, 101, 102, 103, 104, 105, 106, 107,
+    108, 109, 7,   110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 8,   122, 123, 124,
+    125, 126, 127, 128, 129, 130, 131, 132, 133, 9,   134, 135, 136, 137, 138, 139, 140, 141, 142,
+    143, 144, 145, 10,  146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 11,  158, 159,
+    160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 12,  170, 171, 172, 173, 174, 175, 176, 177,
+    178, 179, 180, 181, 13,  182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 14,  194,
+    195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 15,  206, 207, 208, 209, 210, 211, 212,
+    213, 214, 215, 216, 217, 16,  218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 17,
+    230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 18,  242, 243, 244, 245, 246, 247,
+    248, 249, 250, 251, 252, 253, 19,
+};
+
+// 6.2.14 Tile size calculation
+
+[[nodiscard]] s32 CalcMinLog2TileCols(s32 frame_width) {
+    const s32 sb64_cols = (frame_width + 63) / 64;
+    s32 min_log2 = 0;
+
+    while ((64 << min_log2) < sb64_cols) {
+        min_log2++;
+    }
+
+    return min_log2;
+}
+
+[[nodiscard]] s32 CalcMaxLog2TileCols(s32 frame_width) {
+    const s32 sb64_cols = (frame_width + 63) / 64;
+    s32 max_log2 = 1;
+
+    while ((sb64_cols >> max_log2) >= 4) {
+        max_log2++;
+    }
+
+    return max_log2 - 1;
+}
+
+// Recenters probability. Based on section 6.3.6 of VP9 Specification
+[[nodiscard]] s32 RecenterNonNeg(s32 new_prob, s32 old_prob) {
+    if (new_prob > old_prob * 2) {
+        return new_prob;
+    }
+
+    if (new_prob >= old_prob) {
+        return (new_prob - old_prob) * 2;
+    }
+
+    return (old_prob - new_prob) * 2 - 1;
+}
+
+// Adjusts old_prob depending on new_prob. Based on section 6.3.5 of VP9 Specification
+[[nodiscard]] s32 RemapProbability(s32 new_prob, s32 old_prob) {
+    new_prob--;
+    old_prob--;
+
+    std::size_t index{};
+
+    if (old_prob * 2 <= 0xff) {
+        index = static_cast<std::size_t>(std::max(0, RecenterNonNeg(new_prob, old_prob) - 1));
+    } else {
+        index = static_cast<std::size_t>(
+            std::max(0, RecenterNonNeg(0xff - 1 - new_prob, 0xff - 1 - old_prob) - 1));
+    }
+
+    return map_lut[index];
+}
+} // Anonymous namespace
+
 VP9::VP9(GPU& gpu) : gpu(gpu) {}
 
 VP9::~VP9() = default;
@@ -207,32 +290,6 @@ void VP9::WriteProbabilityDelta(VpxRangeEncoder& writer, u8 new_prob, u8 old_pro
     EncodeTermSubExp(writer, delta);
 }
 
-s32 VP9::RemapProbability(s32 new_prob, s32 old_prob) {
-    new_prob--;
-    old_prob--;
-
-    std::size_t index{};
-
-    if (old_prob * 2 <= 0xff) {
-        index = static_cast<std::size_t>(std::max(0, RecenterNonNeg(new_prob, old_prob) - 1));
-    } else {
-        index = static_cast<std::size_t>(
-            std::max(0, RecenterNonNeg(0xff - 1 - new_prob, 0xff - 1 - old_prob) - 1));
-    }
-
-    return map_lut[index];
-}
-
-s32 VP9::RecenterNonNeg(s32 new_prob, s32 old_prob) {
-    if (new_prob > old_prob * 2) {
-        return new_prob;
-    } else if (new_prob >= old_prob) {
-        return (new_prob - old_prob) * 2;
-    } else {
-        return (old_prob - new_prob) * 2 - 1;
-    }
-}
-
 void VP9::EncodeTermSubExp(VpxRangeEncoder& writer, s32 value) {
     if (WriteLessThan(writer, value, 16)) {
         writer.Write(value, 4);
@@ -332,28 +389,6 @@ void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_
     }
 }
 
-s32 VP9::CalcMinLog2TileCols(s32 frame_width) {
-    const s32 sb64_cols = (frame_width + 63) / 64;
-    s32 min_log2 = 0;
-
-    while ((64 << min_log2) < sb64_cols) {
-        min_log2++;
-    }
-
-    return min_log2;
-}
-
-s32 VP9::CalcMaxLog2TileCols(s32 frameWidth) {
-    const s32 sb64_cols = (frameWidth + 63) / 64;
-    s32 max_log2 = 1;
-
-    while ((sb64_cols >> max_log2) >= 4) {
-        max_log2++;
-    }
-
-    return max_log2 - 1;
-}
-
 Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) {
     PictureInfo picture_info{};
     gpu.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo));
@@ -379,14 +414,14 @@ Vp9FrameContainer VP9::GetCurrentFrame(const NvdecCommon::NvdecRegisters& state)
     Vp9FrameContainer frame{};
     {
         gpu.SyncGuestHost();
-        frame.info = std::move(GetVp9PictureInfo(state));
+        frame.info = GetVp9PictureInfo(state);
 
         frame.bit_stream.resize(frame.info.bitstream_size);
         gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.bit_stream.data(),
                                       frame.info.bitstream_size);
     }
     // Buffer two frames, saving the last show frame info
-    if (next_next_frame.bit_stream.size() != 0) {
+    if (!next_next_frame.bit_stream.empty()) {
         Vp9FrameContainer temp{
             .info = frame.info,
             .bit_stream = frame.bit_stream,
@@ -396,15 +431,15 @@ Vp9FrameContainer VP9::GetCurrentFrame(const NvdecCommon::NvdecRegisters& state)
         frame.bit_stream = next_next_frame.bit_stream;
         next_next_frame = std::move(temp);
 
-        if (next_frame.bit_stream.size() != 0) {
-            Vp9FrameContainer temp{
+        if (!next_frame.bit_stream.empty()) {
+            Vp9FrameContainer temp2{
                 .info = frame.info,
                 .bit_stream = frame.bit_stream,
             };
             next_frame.info.show_frame = frame.info.last_frame_shown;
             frame.info = next_frame.info;
             frame.bit_stream = next_frame.bit_stream;
-            next_frame = std::move(temp);
+            next_frame = std::move(temp2);
         } else {
             next_frame.info = frame.info;
             next_frame.bit_stream = frame.bit_stream;
@@ -605,12 +640,6 @@ std::vector<u8> VP9::ComposeCompressedHeader() {
 
     writer.End();
     return writer.GetBuffer();
-
-    const auto writer_bytearray = writer.GetBuffer();
-
-    std::vector<u8> compressed_header(writer_bytearray.size());
-    std::memcpy(compressed_header.data(), writer_bytearray.data(), writer_bytearray.size());
-    return compressed_header;
 }
 
 VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
@@ -648,7 +677,6 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
         current_frame_info.intra_only = true;
 
     } else {
-        std::array<s32, 3> ref_frame_index;
 
         if (!current_frame_info.show_frame) {
             uncomp_writer.WriteBit(current_frame_info.intra_only);
@@ -663,9 +691,9 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
         }
 
         // Last, Golden, Altref frames
-        ref_frame_index = std::array<s32, 3>{0, 1, 2};
+        std::array<s32, 3> ref_frame_index{0, 1, 2};
 
-        // set when next frame is hidden
+        // Set when next frame is hidden
         // altref and golden references are swapped
         if (swap_next_golden) {
             ref_frame_index = std::array<s32, 3>{0, 2, 1};
@@ -754,17 +782,19 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
         for (std::size_t index = 0; index < current_frame_info.ref_deltas.size(); index++) {
             const s8 old_deltas = loop_filter_ref_deltas[index];
             const s8 new_deltas = current_frame_info.ref_deltas[index];
+            const bool differing_delta = old_deltas != new_deltas;
 
-            loop_filter_delta_update |=
-                (update_loop_filter_ref_deltas[index] = old_deltas != new_deltas);
+            update_loop_filter_ref_deltas[index] = differing_delta;
+            loop_filter_delta_update |= differing_delta;
         }
 
         for (std::size_t index = 0; index < current_frame_info.mode_deltas.size(); index++) {
             const s8 old_deltas = loop_filter_mode_deltas[index];
             const s8 new_deltas = current_frame_info.mode_deltas[index];
+            const bool differing_delta = old_deltas != new_deltas;
 
-            loop_filter_delta_update |=
-                (update_loop_filter_mode_deltas[index] = old_deltas != new_deltas);
+            update_loop_filter_mode_deltas[index] = differing_delta;
+            loop_filter_delta_update |= differing_delta;
         }
 
         uncomp_writer.WriteBit(loop_filter_delta_update);
@@ -824,12 +854,12 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
     return uncomp_writer;
 }
 
-std::vector<u8>& VP9::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state) {
+const std::vector<u8>& VP9::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state) {
     std::vector<u8> bitstream;
     {
         Vp9FrameContainer curr_frame = GetCurrentFrame(state);
         current_frame_info = curr_frame.info;
-        bitstream = curr_frame.bit_stream;
+        bitstream = std::move(curr_frame.bit_stream);
     }
 
     // The uncompressed header routine sets PrevProb parameters needed for the compressed header
diff --git a/src/video_core/command_classes/codecs/vp9.h b/src/video_core/command_classes/codecs/vp9.h
index 748e11bae..e2504512c 100644
--- a/src/video_core/command_classes/codecs/vp9.h
+++ b/src/video_core/command_classes/codecs/vp9.h
@@ -4,9 +4,9 @@
 
 #pragma once
 
-#include <unordered_map>
+#include <array>
 #include <vector>
-#include "common/common_funcs.h"
+
 #include "common/common_types.h"
 #include "common/stream.h"
 #include "video_core/command_classes/codecs/vp9_types.h"
@@ -25,6 +25,12 @@ public:
     VpxRangeEncoder();
     ~VpxRangeEncoder();
 
+    VpxRangeEncoder(const VpxRangeEncoder&) = delete;
+    VpxRangeEncoder& operator=(const VpxRangeEncoder&) = delete;
+
+    VpxRangeEncoder(VpxRangeEncoder&&) = default;
+    VpxRangeEncoder& operator=(VpxRangeEncoder&&) = default;
+
     /// Writes the rightmost value_size bits from value into the stream
     void Write(s32 value, s32 value_size);
 
@@ -37,11 +43,11 @@ public:
     /// Signal the end of the bitstream
     void End();
 
-    std::vector<u8>& GetBuffer() {
+    [[nodiscard]] std::vector<u8>& GetBuffer() {
         return base_stream.GetBuffer();
     }
 
-    const std::vector<u8>& GetBuffer() const {
+    [[nodiscard]] const std::vector<u8>& GetBuffer() const {
         return base_stream.GetBuffer();
     }
 
@@ -52,17 +58,6 @@ private:
     u32 range{0xff};
     s32 count{-24};
     s32 half_probability{128};
-    static constexpr std::array<s32, 256> norm_lut{
-        0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-        3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-        2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    };
 };
 
 class VpxBitStreamWriter {
@@ -70,6 +65,12 @@ public:
     VpxBitStreamWriter();
     ~VpxBitStreamWriter();
 
+    VpxBitStreamWriter(const VpxBitStreamWriter&) = delete;
+    VpxBitStreamWriter& operator=(const VpxBitStreamWriter&) = delete;
+
+    VpxBitStreamWriter(VpxBitStreamWriter&&) = default;
+    VpxBitStreamWriter& operator=(VpxBitStreamWriter&&) = default;
+
     /// Write an unsigned integer value
     void WriteU(u32 value, u32 value_size);
 
@@ -86,10 +87,10 @@ public:
     void Flush();
 
     /// Returns byte_array
-    std::vector<u8>& GetByteArray();
+    [[nodiscard]] std::vector<u8>& GetByteArray();
 
     /// Returns const byte_array
-    const std::vector<u8>& GetByteArray() const;
+    [[nodiscard]] const std::vector<u8>& GetByteArray() const;
 
 private:
     /// Write bit_count bits from value into buffer
@@ -110,12 +111,18 @@ public:
     explicit VP9(GPU& gpu);
     ~VP9();
 
+    VP9(const VP9&) = delete;
+    VP9& operator=(const VP9&) = delete;
+
+    VP9(VP9&&) = default;
+    VP9& operator=(VP9&&) = delete;
+
     /// Composes the VP9 frame from the GPU state information. Based on the official VP9 spec
     /// documentation
-    std::vector<u8>& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state);
+    [[nodiscard]] const std::vector<u8>& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state);
 
     /// Returns true if the most recent frame was a hidden frame.
-    bool WasFrameHidden() const {
+    [[nodiscard]] bool WasFrameHidden() const {
         return hidden;
     }
 
@@ -132,12 +139,6 @@ private:
     /// Generates compressed header probability deltas in the bitstream writer
     void WriteProbabilityDelta(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob);
 
-    /// Adjusts old_prob depending on new_prob. Based on section 6.3.5 of VP9 Specification
-    s32 RemapProbability(s32 new_prob, s32 old_prob);
-
-    /// Recenters probability. Based on section 6.3.6 of VP9 Specification
-    s32 RecenterNonNeg(s32 new_prob, s32 old_prob);
-
     /// Inverse of 6.3.4 Decode term subexp
     void EncodeTermSubExp(VpxRangeEncoder& writer, s32 value);
 
@@ -157,22 +158,18 @@ private:
     /// Write motion vector probability updates. 6.3.17 in the spec
     void WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob);
 
-    /// 6.2.14 Tile size calculation
-    s32 CalcMinLog2TileCols(s32 frame_width);
-    s32 CalcMaxLog2TileCols(s32 frame_width);
-
     /// Returns VP9 information from NVDEC provided offset and size
-    Vp9PictureInfo GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state);
+    [[nodiscard]] Vp9PictureInfo GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state);
 
     /// Read and convert NVDEC provided entropy probs to Vp9EntropyProbs struct
     void InsertEntropy(u64 offset, Vp9EntropyProbs& dst);
 
     /// Returns frame to be decoded after buffering
-    Vp9FrameContainer GetCurrentFrame(const NvdecCommon::NvdecRegisters& state);
+    [[nodiscard]] Vp9FrameContainer GetCurrentFrame(const NvdecCommon::NvdecRegisters& state);
 
     /// Use NVDEC providied information to compose the headers for the current frame
-    std::vector<u8> ComposeCompressedHeader();
-    VpxBitStreamWriter ComposeUncompressedHeader();
+    [[nodiscard]] std::vector<u8> ComposeCompressedHeader();
+    [[nodiscard]] VpxBitStreamWriter ComposeUncompressedHeader();
 
     GPU& gpu;
     std::vector<u8> frame;
@@ -180,7 +177,7 @@ private:
     std::array<s8, 4> loop_filter_ref_deltas{};
     std::array<s8, 2> loop_filter_mode_deltas{};
 
-    bool hidden;
+    bool hidden = false;
     s64 current_frame_number = -2; // since we buffer 2 frames
     s32 grace_period = 6;          // frame offsets need to stabilize
     std::array<FrameContexts, 4> frame_ctxs{};
@@ -193,23 +190,6 @@ private:
 
     s32 diff_update_probability = 252;
     s32 frame_sync_code = 0x498342;
-    static constexpr std::array<s32, 254> map_lut = {
-        20,  21,  22,  23,  24,  25,  0,   26,  27,  28,  29,  30,  31,  32,  33,  34,  35,
-        36,  37,  1,   38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  2,   50,
-        51,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  3,   62,  63,  64,  65,  66,
-        67,  68,  69,  70,  71,  72,  73,  4,   74,  75,  76,  77,  78,  79,  80,  81,  82,
-        83,  84,  85,  5,   86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,  97,  6,
-        98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 7,   110, 111, 112, 113,
-        114, 115, 116, 117, 118, 119, 120, 121, 8,   122, 123, 124, 125, 126, 127, 128, 129,
-        130, 131, 132, 133, 9,   134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145,
-        10,  146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 11,  158, 159, 160,
-        161, 162, 163, 164, 165, 166, 167, 168, 169, 12,  170, 171, 172, 173, 174, 175, 176,
-        177, 178, 179, 180, 181, 13,  182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192,
-        193, 14,  194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 15,  206, 207,
-        208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 16,  218, 219, 220, 221, 222, 223,
-        224, 225, 226, 227, 228, 229, 17,  230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
-        240, 241, 18,  242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 19,
-    };
 };
 
 } // namespace Decoder
diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/command_classes/codecs/vp9_types.h
index 8688fdac0..4f0b05d22 100644
--- a/src/video_core/command_classes/codecs/vp9_types.h
+++ b/src/video_core/command_classes/codecs/vp9_types.h
@@ -4,13 +4,11 @@
 
 #pragma once
 
-#include <algorithm>
-#include <list>
+#include <array>
+#include <cstring>
 #include <vector>
-#include "common/cityhash.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
-#include "video_core/command_classes/nvdec_common.h"
 
 namespace Tegra {
 class GPU;
@@ -233,9 +231,8 @@ struct PictureInfo {
     u32 surface_params{};
     INSERT_PADDING_WORDS(3);
 
-    Vp9PictureInfo Convert() const {
-
-        return Vp9PictureInfo{
+    [[nodiscard]] Vp9PictureInfo Convert() const {
+        return {
             .is_key_frame = (vp9_flags & FrameFlags::IsKeyFrame) != 0,
             .intra_only = (vp9_flags & FrameFlags::IntraOnly) != 0,
             .last_frame_was_key = (vp9_flags & FrameFlags::LastFrameIsKeyFrame) != 0,
diff --git a/src/video_core/command_classes/host1x.cpp b/src/video_core/command_classes/host1x.cpp
index a5234ee47..c4dd4881a 100644
--- a/src/video_core/command_classes/host1x.cpp
+++ b/src/video_core/command_classes/host1x.cpp
@@ -15,7 +15,7 @@ void Tegra::Host1x::StateWrite(u32 offset, u32 arguments) {
     std::memcpy(state_offset, &arguments, sizeof(u32));
 }
 
-void Tegra::Host1x::ProcessMethod(Host1x::Method method, const std::vector<u32>& arguments) {
+void Tegra::Host1x::ProcessMethod(Method method, const std::vector<u32>& arguments) {
     StateWrite(static_cast<u32>(method), arguments[0]);
     switch (method) {
     case Method::WaitSyncpt:
diff --git a/src/video_core/command_classes/host1x.h b/src/video_core/command_classes/host1x.h
index 501a5ed2e..013eaa0c1 100644
--- a/src/video_core/command_classes/host1x.h
+++ b/src/video_core/command_classes/host1x.h
@@ -61,7 +61,7 @@ public:
     ~Host1x();
 
     /// Writes the method into the state, Invoke Execute() if encountered
-    void ProcessMethod(Host1x::Method method, const std::vector<u32>& arguments);
+    void ProcessMethod(Method method, const std::vector<u32>& arguments);
 
 private:
     /// For Host1x, execute is waiting on a syncpoint previously written into the state
diff --git a/src/video_core/command_classes/nvdec.cpp b/src/video_core/command_classes/nvdec.cpp
index ede9466eb..8ca7a7b06 100644
--- a/src/video_core/command_classes/nvdec.cpp
+++ b/src/video_core/command_classes/nvdec.cpp
@@ -2,13 +2,9 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
-#include <bitset>
 #include "common/assert.h"
-#include "common/bit_util.h"
-#include "core/memory.h"
 #include "video_core/command_classes/nvdec.h"
 #include "video_core/gpu.h"
-#include "video_core/memory_manager.h"
 
 namespace Tegra {
 
@@ -16,7 +12,7 @@ Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), codec(std::make_unique<Codec>(gpu)) {}
 
 Nvdec::~Nvdec() = default;
 
-void Nvdec::ProcessMethod(Nvdec::Method method, const std::vector<u32>& arguments) {
+void Nvdec::ProcessMethod(Method method, const std::vector<u32>& arguments) {
     if (method == Method::SetVideoCodec) {
         codec->StateWrite(static_cast<u32>(method), arguments[0]);
     } else {
diff --git a/src/video_core/command_classes/nvdec.h b/src/video_core/command_classes/nvdec.h
index c1a9d843e..eec4443f9 100644
--- a/src/video_core/command_classes/nvdec.h
+++ b/src/video_core/command_classes/nvdec.h
@@ -4,8 +4,8 @@
 
 #pragma once
 
+#include <memory>
 #include <vector>
-#include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "video_core/command_classes/codecs/codec.h"
 
@@ -23,17 +23,17 @@ public:
     ~Nvdec();
 
     /// Writes the method into the state, Invoke Execute() if encountered
-    void ProcessMethod(Nvdec::Method method, const std::vector<u32>& arguments);
+    void ProcessMethod(Method method, const std::vector<u32>& arguments);
 
     /// Return most recently decoded frame
-    AVFrame* GetFrame();
-    const AVFrame* GetFrame() const;
+    [[nodiscard]] AVFrame* GetFrame();
+    [[nodiscard]] const AVFrame* GetFrame() const;
 
 private:
     /// Invoke codec to decode a frame
     void Execute();
 
     GPU& gpu;
-    std::unique_ptr<Tegra::Codec> codec;
+    std::unique_ptr<Codec> codec;
 };
 } // namespace Tegra
diff --git a/src/video_core/command_classes/sync_manager.cpp b/src/video_core/command_classes/sync_manager.cpp
index a0ab44855..19dc9e0ab 100644
--- a/src/video_core/command_classes/sync_manager.cpp
+++ b/src/video_core/command_classes/sync_manager.cpp
@@ -27,22 +27,22 @@ SyncptIncrManager::SyncptIncrManager(GPU& gpu_) : gpu(gpu_) {}
 SyncptIncrManager::~SyncptIncrManager() = default;
 
 void SyncptIncrManager::Increment(u32 id) {
-    increments.push_back(SyncptIncr{0, id, true});
+    increments.emplace_back(0, 0, id, true);
     IncrementAllDone();
 }
 
 u32 SyncptIncrManager::IncrementWhenDone(u32 class_id, u32 id) {
     const u32 handle = current_id++;
-    increments.push_back(SyncptIncr{handle, class_id, id});
+    increments.emplace_back(handle, class_id, id);
     return handle;
 }
 
 void SyncptIncrManager::SignalDone(u32 handle) {
-    auto done_incr = std::find_if(increments.begin(), increments.end(),
-                                  [handle](SyncptIncr incr) { return incr.id == handle; });
-    if (done_incr != increments.end()) {
-        const SyncptIncr incr = *done_incr;
-        *done_incr = SyncptIncr{incr.id, incr.class_id, incr.syncpt_id, true};
+    const auto done_incr =
+        std::find_if(increments.begin(), increments.end(),
+                     [handle](const SyncptIncr& incr) { return incr.id == handle; });
+    if (done_incr != increments.cend()) {
+        done_incr->complete = true;
     }
     IncrementAllDone();
 }
diff --git a/src/video_core/command_classes/sync_manager.h b/src/video_core/command_classes/sync_manager.h
index 353b67573..2c321ec58 100644
--- a/src/video_core/command_classes/sync_manager.h
+++ b/src/video_core/command_classes/sync_manager.h
@@ -32,8 +32,8 @@ struct SyncptIncr {
     u32 syncpt_id;
     bool complete;
 
-    SyncptIncr(u32 id, u32 syncpt_id_, u32 class_id_, bool done = false)
-        : id(id), class_id(class_id_), syncpt_id(syncpt_id_), complete(done) {}
+    SyncptIncr(u32 id_, u32 class_id_, u32 syncpt_id_, bool done = false)
+        : id(id_), class_id(class_id_), syncpt_id(syncpt_id_), complete(done) {}
 };
 
 class SyncptIncrManager {
diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp
index 66e15a1a8..5b52da277 100644
--- a/src/video_core/command_classes/vic.cpp
+++ b/src/video_core/command_classes/vic.cpp
@@ -26,7 +26,7 @@ void Vic::VicStateWrite(u32 offset, u32 arguments) {
     std::memcpy(state_offset, &arguments, sizeof(u32));
 }
 
-void Vic::ProcessMethod(Vic::Method method, const std::vector<u32>& arguments) {
+void Vic::ProcessMethod(Method method, const std::vector<u32>& arguments) {
     LOG_DEBUG(HW_GPU, "Vic method 0x{:X}", static_cast<u32>(method));
     VicStateWrite(static_cast<u32>(method), arguments[0]);
     const u64 arg = static_cast<u64>(arguments[0]) << 8;
diff --git a/src/video_core/command_classes/vic.h b/src/video_core/command_classes/vic.h
index dd0a2aed8..8c4e284a1 100644
--- a/src/video_core/command_classes/vic.h
+++ b/src/video_core/command_classes/vic.h
@@ -63,11 +63,11 @@ public:
         SetOutputSurfaceChromaVOffset = 0x1ca
     };
 
-    explicit Vic(GPU& gpu, std::shared_ptr<Tegra::Nvdec> nvdec_processor);
+    explicit Vic(GPU& gpu, std::shared_ptr<Nvdec> nvdec_processor);
     ~Vic();
 
     /// Write to the device state.
-    void ProcessMethod(Vic::Method method, const std::vector<u32>& arguments);
+    void ProcessMethod(Method method, const std::vector<u32>& arguments);
 
 private:
     void Execute();
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index f2f96ac33..105b85a92 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include "common/cityhash.h"
 #include "common/microprofile.h"
 #include "core/core.h"
 #include "core/memory.h"
@@ -12,6 +13,20 @@
 
 namespace Tegra {
 
+void CommandList::RefreshIntegrityChecks(GPU& gpu) {
+    command_list_hashes.resize(command_lists.size());
+
+    for (std::size_t index = 0; index < command_lists.size(); ++index) {
+        const CommandListHeader command_list_header = command_lists[index];
+        std::vector<CommandHeader> command_headers(command_list_header.size);
+        gpu.MemoryManager().ReadBlockUnsafe(command_list_header.addr, command_headers.data(),
+                                            command_list_header.size * sizeof(u32));
+        command_list_hashes[index] =
+            Common::CityHash64(reinterpret_cast<char*>(command_headers.data()),
+                               command_list_header.size * sizeof(u32));
+    }
+}
+
 DmaPusher::DmaPusher(Core::System& system, GPU& gpu) : gpu{gpu}, system{system} {}
 
 DmaPusher::~DmaPusher() = default;
@@ -45,32 +60,51 @@ bool DmaPusher::Step() {
         return false;
     }
 
-    const CommandList& command_list{dma_pushbuffer.front()};
-    ASSERT_OR_EXECUTE(!command_list.empty(), {
-        // Somehow the command_list is empty, in order to avoid a crash
-        // We ignore it and assume its size is 0.
-        dma_pushbuffer.pop();
-        dma_pushbuffer_subindex = 0;
-        return true;
-    });
-    const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]};
-    const GPUVAddr dma_get = command_list_header.addr;
-
-    if (dma_pushbuffer_subindex >= command_list.size()) {
-        // We've gone through the current list, remove it from the queue
-        dma_pushbuffer.pop();
-        dma_pushbuffer_subindex = 0;
-    }
+    CommandList& command_list{dma_pushbuffer.front()};
 
-    if (command_list_header.size == 0) {
-        return true;
-    }
+    ASSERT_OR_EXECUTE(
+        command_list.command_lists.size() || command_list.prefetch_command_list.size(), {
+            // Somehow the command_list is empty, in order to avoid a crash
+            // We ignore it and assume its size is 0.
+            dma_pushbuffer.pop();
+            dma_pushbuffer_subindex = 0;
+            return true;
+        });
 
-    // Push buffer non-empty, read a word
-    command_headers.resize(command_list_header.size);
-    gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(),
-                                        command_list_header.size * sizeof(u32));
+    if (command_list.prefetch_command_list.size()) {
+        // Prefetched command list from nvdrv, used for things like synchronization
+        command_headers = std::move(command_list.prefetch_command_list);
+        dma_pushbuffer.pop();
+    } else {
+        const CommandListHeader command_list_header{
+            command_list.command_lists[dma_pushbuffer_subindex]};
+        const u64 next_hash = command_list.command_list_hashes[dma_pushbuffer_subindex++];
+        const GPUVAddr dma_get = command_list_header.addr;
+
+        if (dma_pushbuffer_subindex >= command_list.command_lists.size()) {
+            // We've gone through the current list, remove it from the queue
+            dma_pushbuffer.pop();
+            dma_pushbuffer_subindex = 0;
+        }
 
+        if (command_list_header.size == 0) {
+            return true;
+        }
+
+        // Push buffer non-empty, read a word
+        command_headers.resize(command_list_header.size);
+        gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(),
+                                            command_list_header.size * sizeof(u32));
+
+        // Integrity check
+        const u64 new_hash = Common::CityHash64(reinterpret_cast<char*>(command_headers.data()),
+                                                command_list_header.size * sizeof(u32));
+        if (new_hash != next_hash) {
+            LOG_CRITICAL(HW_GPU, "CommandList at addr=0x{:X} is corrupt, skipping!", dma_get);
+            dma_pushbuffer.pop();
+            return true;
+        }
+    }
     for (std::size_t index = 0; index < command_headers.size();) {
         const CommandHeader& command_header = command_headers[index];
 
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index efa90d170..8496ba2da 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -27,6 +27,31 @@ enum class SubmissionMode : u32 {
     IncreaseOnce = 5
 };
 
+// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
+// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
+// So the values you see in docs might be multiplied by 4.
+enum class BufferMethods : u32 {
+    BindObject = 0x0,
+    Nop = 0x2,
+    SemaphoreAddressHigh = 0x4,
+    SemaphoreAddressLow = 0x5,
+    SemaphoreSequence = 0x6,
+    SemaphoreTrigger = 0x7,
+    NotifyIntr = 0x8,
+    WrcacheFlush = 0x9,
+    Unk28 = 0xA,
+    UnkCacheFlush = 0xB,
+    RefCnt = 0x14,
+    SemaphoreAcquire = 0x1A,
+    SemaphoreRelease = 0x1B,
+    FenceValue = 0x1C,
+    FenceAction = 0x1D,
+    WaitForInterrupt = 0x1E,
+    Unk7c = 0x1F,
+    Yield = 0x20,
+    NonPullerMethods = 0x40,
+};
+
 struct CommandListHeader {
     union {
         u64 raw;
@@ -49,9 +74,29 @@ union CommandHeader {
 static_assert(std::is_standard_layout_v<CommandHeader>, "CommandHeader is not standard layout");
 static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!");
 
+static constexpr CommandHeader BuildCommandHeader(BufferMethods method, u32 arg_count,
+                                                  SubmissionMode mode) {
+    CommandHeader result{};
+    result.method.Assign(static_cast<u32>(method));
+    result.arg_count.Assign(arg_count);
+    result.mode.Assign(mode);
+    return result;
+}
+
 class GPU;
 
-using CommandList = std::vector<Tegra::CommandListHeader>;
+struct CommandList final {
+    CommandList() = default;
+    explicit CommandList(std::size_t size) : command_lists(size) {}
+    explicit CommandList(std::vector<Tegra::CommandHeader>&& prefetch_command_list)
+        : prefetch_command_list{std::move(prefetch_command_list)} {}
+
+    void RefreshIntegrityChecks(GPU& gpu);
+
+    std::vector<Tegra::CommandListHeader> command_lists;
+    std::vector<u64> command_list_hashes;
+    std::vector<Tegra::CommandHeader> prefetch_command_list;
+};
 
 /**
  * The DmaPusher class implements DMA submission to FIFOs, providing an area of memory that the
@@ -60,7 +105,7 @@ using CommandList = std::vector<Tegra::CommandListHeader>;
  * See https://envytools.readthedocs.io/en/latest/hw/fifo/dma-pusher.html#fifo-dma-pusher for
  * details on this implementation.
  */
-class DmaPusher {
+class DmaPusher final {
 public:
     explicit DmaPusher(Core::System& system, GPU& gpu);
     ~DmaPusher();
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index d374b73cf..a3c05d1b0 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -1893,6 +1893,7 @@ public:
         ICMP_IMM,
         FCMP_RR,
         FCMP_RC,
+        FCMP_IMMR,
         MUFU,  // Multi-Function Operator
         RRO_C, // Range Reduction Operator
         RRO_R,
@@ -2205,6 +2206,7 @@ private:
             INST("0111110-0-------", Id::HSET2_IMM, Type::HalfSet, "HSET2_IMM"),
             INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"),
             INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"),
+            INST("0011011-1010----", Id::FCMP_IMMR, Type::Arithmetic, "FCMP_IMMR"),
             INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
             INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
             INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"),
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 171f78183..ebd149c3a 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -194,30 +194,6 @@ void GPU::SyncGuestHost() {
 void GPU::OnCommandListEnd() {
     renderer->Rasterizer().ReleaseFences();
 }
-// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
-// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
-// So the values you see in docs might be multiplied by 4.
-enum class BufferMethods {
-    BindObject = 0x0,
-    Nop = 0x2,
-    SemaphoreAddressHigh = 0x4,
-    SemaphoreAddressLow = 0x5,
-    SemaphoreSequence = 0x6,
-    SemaphoreTrigger = 0x7,
-    NotifyIntr = 0x8,
-    WrcacheFlush = 0x9,
-    Unk28 = 0xA,
-    UnkCacheFlush = 0xB,
-    RefCnt = 0x14,
-    SemaphoreAcquire = 0x1A,
-    SemaphoreRelease = 0x1B,
-    FenceValue = 0x1C,
-    FenceAction = 0x1D,
-    Unk78 = 0x1E,
-    Unk7c = 0x1F,
-    Yield = 0x20,
-    NonPullerMethods = 0x40,
-};
 
 enum class GpuSemaphoreOperation {
     AcquireEqual = 0x1,
@@ -277,7 +253,12 @@ void GPU::CallPullerMethod(const MethodCall& method_call) {
     case BufferMethods::UnkCacheFlush:
     case BufferMethods::WrcacheFlush:
     case BufferMethods::FenceValue:
+        break;
     case BufferMethods::FenceAction:
+        ProcessFenceActionMethod();
+        break;
+    case BufferMethods::WaitForInterrupt:
+        ProcessWaitForInterruptMethod();
         break;
     case BufferMethods::SemaphoreTrigger: {
         ProcessSemaphoreTriggerMethod();
@@ -391,6 +372,25 @@ void GPU::ProcessBindMethod(const MethodCall& method_call) {
     }
 }
 
+void GPU::ProcessFenceActionMethod() {
+    switch (regs.fence_action.op) {
+    case FenceOperation::Acquire:
+        WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
+        break;
+    case FenceOperation::Increment:
+        IncrementSyncPoint(regs.fence_action.syncpoint_id);
+        break;
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented operation {}",
+                          static_cast<u32>(regs.fence_action.op.Value()));
+    }
+}
+
+void GPU::ProcessWaitForInterruptMethod() {
+    // TODO(bunnei) ImplementMe
+    LOG_WARNING(HW_GPU, "(STUBBED) called");
+}
+
 void GPU::ProcessSemaphoreTriggerMethod() {
     const auto semaphoreOperationMask = 0xF;
     const auto op =
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index b8c613b11..5444b49f3 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -263,6 +263,24 @@ public:
         return use_nvdec;
     }
 
+    enum class FenceOperation : u32 {
+        Acquire = 0,
+        Increment = 1,
+    };
+
+    union FenceAction {
+        u32 raw;
+        BitField<0, 1, FenceOperation> op;
+        BitField<8, 24, u32> syncpoint_id;
+
+        static constexpr CommandHeader Build(FenceOperation op, u32 syncpoint_id) {
+            FenceAction result{};
+            result.op.Assign(op);
+            result.syncpoint_id.Assign(syncpoint_id);
+            return {result.raw};
+        }
+    };
+
     struct Regs {
         static constexpr size_t NUM_REGS = 0x40;
 
@@ -291,10 +309,7 @@ public:
                 u32 semaphore_acquire;
                 u32 semaphore_release;
                 u32 fence_value;
-                union {
-                    BitField<4, 4, u32> operation;
-                    BitField<8, 8, u32> id;
-                } fence_action;
+                FenceAction fence_action;
                 INSERT_UNION_PADDING_WORDS(0xE2);
 
                 // Puller state
@@ -342,6 +357,8 @@ protected:
 
 private:
     void ProcessBindMethod(const MethodCall& method_call);
+    void ProcessFenceActionMethod();
+    void ProcessWaitForInterruptMethod();
     void ProcessSemaphoreTriggerMethod();
     void ProcessSemaphoreRelease();
     void ProcessSemaphoreAcquire();
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index e1217ca83..f34ed6735 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -771,13 +771,18 @@ void VKDevice::CollectTelemetryParameters() {
     VkPhysicalDeviceDriverPropertiesKHR driver{
         .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR,
         .pNext = nullptr,
+        .driverID = {},
+        .driverName = {},
+        .driverInfo = {},
+        .conformanceVersion = {},
     };
 
-    VkPhysicalDeviceProperties2KHR properties{
+    VkPhysicalDeviceProperties2KHR device_properties{
         .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR,
         .pNext = &driver,
+        .properties = {},
     };
-    physical.GetProperties2KHR(properties);
+    physical.GetProperties2KHR(device_properties);
 
     driver_id = driver.driverID;
     vendor_name = driver.driverName;
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index 696eaeb5f..0e8f9c352 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -159,6 +159,7 @@ std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules(
         .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
         .pNext = nullptr,
         .flags = 0,
+        .codeSize = 0,
     };
 
     std::vector<vk::ShaderModule> modules;
@@ -388,6 +389,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
         .logicOp = VK_LOGIC_OP_COPY,
         .attachmentCount = static_cast<u32>(num_attachments),
         .pAttachments = cb_attachments.data(),
+        .blendConstants = {},
     };
 
     std::vector dynamic_states{
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp
index aabd62c5c..39cc3b869 100644
--- a/src/video_core/shader/async_shaders.cpp
+++ b/src/video_core/shader/async_shaders.cpp
@@ -20,14 +20,15 @@ AsyncShaders::~AsyncShaders() {
 }
 
 void AsyncShaders::AllocateWorkers() {
-    // Max worker threads we should allow
-    constexpr u32 MAX_THREADS = 4;
-    // Deduce how many threads we can use
-    const u32 threads_used = std::thread::hardware_concurrency() / 4;
-    // Always allow at least 1 thread regardless of our settings
-    const auto max_worker_count = std::max(1U, threads_used);
-    // Don't use more than MAX_THREADS
-    const auto num_workers = std::min(max_worker_count, MAX_THREADS);
+    // Use at least one thread
+    u32 num_workers = 1;
+
+    // Deduce how many more threads we can use
+    const u32 thread_count = std::thread::hardware_concurrency();
+    if (thread_count >= 8) {
+        // Increase async workers by 1 for every 2 threads >= 8
+        num_workers += 1 + (thread_count - 8) / 2;
+    }
 
     // If we already have workers queued, ignore
     if (num_workers == worker_threads.size()) {
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
index 4db329fa5..afef5948d 100644
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -137,7 +137,8 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
         break;
     }
     case OpCode::Id::FCMP_RR:
-    case OpCode::Id::FCMP_RC: {
+    case OpCode::Id::FCMP_RC:
+    case OpCode::Id::FCMP_IMMR: {
         UNIMPLEMENTED_IF(instr.fcmp.ftz == 0);
         Node op_c = GetRegister(instr.gpr39);
         Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f));
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp
index e8515321b..13dd16356 100644
--- a/src/video_core/texture_cache/surface_params.cpp
+++ b/src/video_core/texture_cache/surface_params.cpp
@@ -240,6 +240,7 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface(
         .is_tiled = is_tiled,
         .srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB ||
                            config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB,
+        .is_layered = false,
         .block_width = is_tiled ? std::min(config.BlockWidth(), 5U) : 0U,
         .block_height = is_tiled ? std::min(config.BlockHeight(), 5U) : 0U,
         .block_depth = is_tiled ? std::min(config.BlockDepth(), 5U) : 0U,