1 files changed, 73 insertions, 41 deletions
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 713c14182..d8801b1f5 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include "common/cityhash.h"
 #include "common/microprofile.h"
 #include "core/core.h"
 #include "core/memory.h"
@@ -12,7 +13,7 @@
 
 namespace Tegra {
 
-DmaPusher::DmaPusher(GPU& gpu) : gpu(gpu) {}
+DmaPusher::DmaPusher(Core::System& system, GPU& gpu) : gpu{gpu}, system{system} {}
 
 DmaPusher::~DmaPusher() = default;
 
@@ -21,17 +22,22 @@ MICROPROFILE_DEFINE(DispatchCalls, "GPU", "Execute command buffer", MP_RGB(128,
 void DmaPusher::DispatchCalls() {
     MICROPROFILE_SCOPE(DispatchCalls);
 
+    gpu.SyncGuestHost();
     // On entering GPU code, assume all memory may be touched by the ARM core.
     gpu.Maxwell3D().OnMemoryWrite();
 
     dma_pushbuffer_subindex = 0;
 
-    while (Core::System::GetInstance().IsPoweredOn()) {
+    dma_state.is_last_call = true;
+
+    while (system.IsPoweredOn()) {
         if (!Step()) {
             break;
         }
     }
     gpu.FlushCommands();
+    gpu.SyncGuestHost();
+    gpu.OnCommandListEnd();
 }
 
 bool DmaPusher::Step() {
@@ -40,44 +46,59 @@ bool DmaPusher::Step() {
         return false;
     }
 
-    const CommandList& command_list{dma_pushbuffer.front()};
-    ASSERT_OR_EXECUTE(!command_list.empty(), {
-        // Somehow the command_list is empty, in order to avoid a crash
-        // We ignore it and assume its size is 0.
-        dma_pushbuffer.pop();
-        dma_pushbuffer_subindex = 0;
-        return true;
-    });
-    const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]};
-    GPUVAddr dma_get = command_list_header.addr;
-    GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32);
-    bool non_main = command_list_header.is_non_main;
-
-    if (dma_pushbuffer_subindex >= command_list.size()) {
-        // We've gone through the current list, remove it from the queue
-        dma_pushbuffer.pop();
-        dma_pushbuffer_subindex = 0;
-    }
+    CommandList& command_list{dma_pushbuffer.front()};
 
-    if (command_list_header.size == 0) {
-        return true;
-    }
+    ASSERT_OR_EXECUTE(
+        command_list.command_lists.size() || command_list.prefetch_command_list.size(), {
+            // Somehow the command_list is empty, in order to avoid a crash
+            // We ignore it and assume its size is 0.
+            dma_pushbuffer.pop();
+            dma_pushbuffer_subindex = 0;
+            return true;
+        });
 
-    // Push buffer non-empty, read a word
-    command_headers.resize(command_list_header.size);
-    gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(),
-                                        command_list_header.size * sizeof(u32));
+    if (command_list.prefetch_command_list.size()) {
+        // Prefetched command list from nvdrv, used for things like synchronization
+        command_headers = std::move(command_list.prefetch_command_list);
+        dma_pushbuffer.pop();
+    } else {
+        const CommandListHeader command_list_header{
+            command_list.command_lists[dma_pushbuffer_subindex++]};
+        const GPUVAddr dma_get = command_list_header.addr;
+
+        if (dma_pushbuffer_subindex >= command_list.command_lists.size()) {
+            // We've gone through the current list, remove it from the queue
+            dma_pushbuffer.pop();
+            dma_pushbuffer_subindex = 0;
+        }
 
-    for (const CommandHeader& command_header : command_headers) {
+        if (command_list_header.size == 0) {
+            return true;
+        }
 
-        // now, see if we're in the middle of a command
-        if (dma_state.length_pending) {
-            // Second word of long non-inc methods command - method count
-            dma_state.length_pending = 0;
-            dma_state.method_count = command_header.method_count_;
-        } else if (dma_state.method_count) {
+        // Push buffer non-empty, read a word
+        command_headers.resize(command_list_header.size);
+        gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(),
+                                            command_list_header.size * sizeof(u32));
+    }
+    for (std::size_t index = 0; index < command_headers.size();) {
+        const CommandHeader& command_header = command_headers[index];
+
+        if (dma_state.method_count) {
             // Data word of methods command
-            CallMethod(command_header.argument);
+            if (dma_state.non_incrementing) {
+                const u32 max_write = static_cast<u32>(
+                    std::min<std::size_t>(index + dma_state.method_count, command_headers.size()) -
+                    index);
+                CallMultiMethod(&command_header.argument, max_write);
+                dma_state.method_count -= max_write;
+                dma_state.is_last_call = true;
+                index += max_write;
+                continue;
+            } else {
+                dma_state.is_last_call = dma_state.method_count <= 1;
+                CallMethod(command_header.argument);
+            }
 
             if (!dma_state.non_incrementing) {
                 dma_state.method++;
@@ -117,11 +138,7 @@ bool DmaPusher::Step() {
                 break;
             }
         }
-    }
-
-    if (!non_main) {
-        // TODO (degasus): This is dead code, as dma_mget is never read.
-        dma_mget = dma_put;
+        index++;
     }
 
     return true;
@@ -134,7 +151,22 @@ void DmaPusher::SetState(const CommandHeader& command_header) {
 }
 
 void DmaPusher::CallMethod(u32 argument) const {
-    gpu.CallMethod({dma_state.method, argument, dma_state.subchannel, dma_state.method_count});
+    if (dma_state.method < non_puller_methods) {
+        gpu.CallMethod({dma_state.method, argument, dma_state.subchannel, dma_state.method_count});
+    } else {
+        subchannels[dma_state.subchannel]->CallMethod(dma_state.method, argument,
+                                                      dma_state.is_last_call);
+    }
+}
+
+void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const {
+    if (dma_state.method < non_puller_methods) {
+        gpu.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods,
+                            dma_state.method_count);
+    } else {
+        subchannels[dma_state.subchannel]->CallMultiMethod(dma_state.method, base_start,
+                                                           num_methods, dma_state.method_count);
+    }
 }
 
 } // namespace Tegra