From 3cbe352c18f69596d91c4862382d61a3d6515140 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Mon, 1 Nov 2021 18:53:32 +0100
Subject: NVDRV: Refactor and add new NvMap.

---
 src/common/bit_field.h | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'src/common')
diff --git a/src/common/bit_field.h b/src/common/bit_field.h
index 7e1df62b1..368b7b98c 100644
--- a/src/common/bit_field.h
+++ b/src/common/bit_field.h
@@ -127,11 +127,14 @@ public:
         }
     }
 
-    // This constructor and assignment operator might be considered ambiguous:
-    // Would they initialize the storage or just the bitfield?
-    // Hence, delete them. Use the Assign method to set bitfield values!
-    BitField(T val) = delete;
-    BitField& operator=(T val) = delete;
+    BitField(T val) {
+        Assign(val);
+    }
+
+    BitField& operator=(T val) {
+        Assign(val);
+        return *this;
+    }
 
     constexpr BitField() noexcept = default;
 
-- 
cgit v1.2.3


From b617874724c461cba270a00c0f8e67fc4a6d553a Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Wed, 10 Nov 2021 17:37:17 +0100
Subject: Common: implement MultiLevelPageTable.

---
 src/common/CMakeLists.txt             |  2 +
 src/common/multi_level_page_table.cpp |  7 +++
 src/common/multi_level_page_table.h   | 79 +++++++++++++++++++++++++++++++++
 src/common/multi_level_page_table.inc | 83 +++++++++++++++++++++++++++++++++++
 4 files changed, 171 insertions(+)
 create mode 100644 src/common/multi_level_page_table.cpp
 create mode 100644 src/common/multi_level_page_table.h
 create mode 100644 src/common/multi_level_page_table.inc

(limited to 'src/common')

diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 3447fabd8..2db414819 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -81,6 +81,8 @@ add_library(common STATIC
     microprofile.cpp
     microprofile.h
     microprofileui.h
+    multi_level_page_table.cpp
+    multi_level_page_table.h
     nvidia_flags.cpp
     nvidia_flags.h
     page_table.cpp
diff --git a/src/common/multi_level_page_table.cpp b/src/common/multi_level_page_table.cpp
new file mode 100644
index 000000000..561785ca7
--- /dev/null
+++ b/src/common/multi_level_page_table.cpp
@@ -0,0 +1,7 @@
+#include "common/multi_level_page_table.inc"
+
+namespace Common {
+template class Common::MultiLevelPageTable<GPUVAddr>;
+template class Common::MultiLevelPageTable<VAddr>;
+template class Common::MultiLevelPageTable<PAddr>;
+} // namespace Common
diff --git a/src/common/multi_level_page_table.h b/src/common/multi_level_page_table.h
new file mode 100644
index 000000000..dde1cc962
--- /dev/null
+++ b/src/common/multi_level_page_table.h
@@ -0,0 +1,79 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "common/common_types.h"
+
+namespace Common {
+
+template <typename BaseAddr>
+class MultiLevelPageTable final {
+public:
+    constexpr MultiLevelPageTable() = default;
+    explicit MultiLevelPageTable(std::size_t address_space_bits, std::size_t first_level_bits,
+                                 std::size_t page_bits);
+
+    ~MultiLevelPageTable() noexcept;
+
+    MultiLevelPageTable(const MultiLevelPageTable&) = delete;
+    MultiLevelPageTable& operator=(const MultiLevelPageTable&) = delete;
+
+    MultiLevelPageTable(MultiLevelPageTable&& other) noexcept
+        : address_space_bits{std::exchange(other.address_space_bits, 0)},
+          first_level_bits{std::exchange(other.first_level_bits, 0)}, page_bits{std::exchange(
+                                                                          other.page_bits, 0)},
+          first_level_shift{std::exchange(other.first_level_shift, 0)},
+          first_level_chunk_size{std::exchange(other.first_level_chunk_size, 0)},
+          first_level_map{std::move(other.first_level_map)}, base_ptr{std::exchange(other.base_ptr,
+                                                                                    nullptr)} {}
+
+    MultiLevelPageTable& operator=(MultiLevelPageTable&& other) noexcept {
+        address_space_bits = std::exchange(other.address_space_bits, 0);
+        first_level_bits = std::exchange(other.first_level_bits, 0);
+        page_bits = std::exchange(other.page_bits, 0);
+        first_level_shift = std::exchange(other.first_level_shift, 0);
+        first_level_chunk_size = std::exchange(other.first_level_chunk_size, 0);
+        alloc_size = std::exchange(other.alloc_size, 0);
+        first_level_map = std::move(other.first_level_map);
+        base_ptr = std::exchange(other.base_ptr, nullptr);
+        return *this;
+    }
+
+    void ReserveRange(u64 start, std::size_t size);
+
+    [[nodiscard]] constexpr const BaseAddr& operator[](std::size_t index) const {
+        return base_ptr[index];
+    }
+
+    [[nodiscard]] constexpr BaseAddr& operator[](std::size_t index) {
+        return base_ptr[index];
+    }
+
+    [[nodiscard]] constexpr BaseAddr* data() {
+        return base_ptr;
+    }
+
+    [[nodiscard]] constexpr const BaseAddr* data() const {
+        return base_ptr;
+    }
+
+private:
+    void AllocateLevel(u64 level);
+
+    std::size_t address_space_bits{};
+    std::size_t first_level_bits{};
+    std::size_t page_bits{};
+    std::size_t first_level_shift{};
+    std::size_t first_level_chunk_size{};
+    std::size_t alloc_size{};
+    std::vector<void*> first_level_map{};
+    BaseAddr* base_ptr{};
+};
+
+} // namespace Common
diff --git a/src/common/multi_level_page_table.inc b/src/common/multi_level_page_table.inc
new file mode 100644
index 000000000..a75e61f9d
--- /dev/null
+++ b/src/common/multi_level_page_table.inc
@@ -0,0 +1,83 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#ifdef _WIN32
+#include <windows.h>
+#else
+#include <sys/mman.h>
+#endif
+
+#include "common/assert.h"
+#include "common/multi_level_page_table.h"
+
+namespace Common {
+
+template <typename BaseAddr>
+MultiLevelPageTable<BaseAddr>::MultiLevelPageTable(std::size_t address_space_bits_,
+                                                   std::size_t first_level_bits_,
+                                                   std::size_t page_bits_)
+    : address_space_bits{address_space_bits_},
+      first_level_bits{first_level_bits_}, page_bits{page_bits_} {
+    first_level_shift = address_space_bits - first_level_bits;
+    first_level_chunk_size = 1ULL << (first_level_shift - page_bits);
+    alloc_size = (1ULL << (address_space_bits - page_bits)) * sizeof(BaseAddr);
+    std::size_t first_level_size = 1ULL << first_level_bits;
+    first_level_map.resize(first_level_size, nullptr);
+#ifdef _WIN32
+    void* base{VirtualAlloc(nullptr, alloc_size, MEM_RESERVE, PAGE_READWRITE)};
+#else
+    void* base{mmap(nullptr, alloc_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)};
+
+    if (base == MAP_FAILED) {
+        base = nullptr;
+    }
+#endif
+
+    ASSERT(base);
+    base_ptr = reinterpret_cast<BaseAddr*>(base);
+}
+
+template <typename BaseAddr>
+MultiLevelPageTable<BaseAddr>::~MultiLevelPageTable() noexcept {
+    if (!base_ptr) {
+        return;
+    }
+#ifdef _WIN32
+    ASSERT(VirtualFree(base_ptr, 0, MEM_RELEASE));
+#else
+    ASSERT(munmap(base_ptr, alloc_size) == 0);
+#endif
+}
+
+template <typename BaseAddr>
+void MultiLevelPageTable<BaseAddr>::ReserveRange(u64 start, std::size_t size) {
+    const u64 new_start = start >> first_level_shift;
+    const u64 new_end =
+        (start + size + (first_level_chunk_size << page_bits) - 1) >> first_level_shift;
+    for (u64 i = new_start; i <= new_end; i++) {
+        if (!first_level_map[i]) {
+            AllocateLevel(i);
+        }
+    }
+}
+
+template <typename BaseAddr>
+void MultiLevelPageTable<BaseAddr>::AllocateLevel(u64 level) {
+    void* ptr = reinterpret_cast<char*>(base_ptr) + level * first_level_chunk_size;
+#ifdef _WIN32
+    void* base{VirtualAlloc(ptr, first_level_chunk_size, MEM_COMMIT, PAGE_READWRITE)};
+#else
+    void* base{mmap(ptr, first_level_chunk_size, PROT_READ | PROT_WRITE,
+                    MAP_ANONYMOUS | MAP_PRIVATE, -1, 0)};
+
+    if (base == MAP_FAILED) {
+        base = nullptr;
+    }
+#endif
+    ASSERT(base);
+
+    first_level_map[level] = base;
+}
+
+} // namespace Common
-- 
cgit v1.2.3


From cbaf3fb433a351f7d9509f17f88d4896ba66afd1 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Thu, 11 Nov 2021 21:24:40 +0100
Subject: VideoCore: Update MemoryManager

---
 src/common/multi_level_page_table.cpp |   1 +
 src/common/multi_level_page_table.inc |   7 +-
 src/video_core/memory_manager.cpp     | 147 +++++++++++++---------------------
 src/video_core/memory_manager.h       |  98 +++++++----------------
 4 files changed, 86 insertions(+), 167 deletions(-)

(limited to 'src/common')

diff --git a/src/common/multi_level_page_table.cpp b/src/common/multi_level_page_table.cpp
index 561785ca7..aed04d0b5 100644
--- a/src/common/multi_level_page_table.cpp
+++ b/src/common/multi_level_page_table.cpp
@@ -4,4 +4,5 @@ namespace Common {
 template class Common::MultiLevelPageTable<GPUVAddr>;
 template class Common::MultiLevelPageTable<VAddr>;
 template class Common::MultiLevelPageTable<PAddr>;
+template class Common::MultiLevelPageTable<u32>;
 } // namespace Common
diff --git a/src/common/multi_level_page_table.inc b/src/common/multi_level_page_table.inc
index a75e61f9d..7fbcb908a 100644
--- a/src/common/multi_level_page_table.inc
+++ b/src/common/multi_level_page_table.inc
@@ -20,7 +20,7 @@ MultiLevelPageTable<BaseAddr>::MultiLevelPageTable(std::size_t address_space_bit
     : address_space_bits{address_space_bits_},
       first_level_bits{first_level_bits_}, page_bits{page_bits_} {
     first_level_shift = address_space_bits - first_level_bits;
-    first_level_chunk_size = 1ULL << (first_level_shift - page_bits);
+    first_level_chunk_size = (1ULL << (first_level_shift - page_bits)) * sizeof(BaseAddr);
     alloc_size = (1ULL << (address_space_bits - page_bits)) * sizeof(BaseAddr);
     std::size_t first_level_size = 1ULL << first_level_bits;
     first_level_map.resize(first_level_size, nullptr);
@@ -53,8 +53,7 @@ MultiLevelPageTable<BaseAddr>::~MultiLevelPageTable() noexcept {
 template <typename BaseAddr>
 void MultiLevelPageTable<BaseAddr>::ReserveRange(u64 start, std::size_t size) {
     const u64 new_start = start >> first_level_shift;
-    const u64 new_end =
-        (start + size + (first_level_chunk_size << page_bits) - 1) >> first_level_shift;
+    const u64 new_end = (start + size) >> first_level_shift;
     for (u64 i = new_start; i <= new_end; i++) {
         if (!first_level_map[i]) {
             AllocateLevel(i);
@@ -64,7 +63,7 @@ void MultiLevelPageTable<BaseAddr>::ReserveRange(u64 start, std::size_t size) {
 
 template <typename BaseAddr>
 void MultiLevelPageTable<BaseAddr>::AllocateLevel(u64 level) {
-    void* ptr = reinterpret_cast<char*>(base_ptr) + level * first_level_chunk_size;
+    void* ptr = reinterpret_cast<char *>(base_ptr) + level * first_level_chunk_size;
 #ifdef _WIN32
     void* base{VirtualAlloc(ptr, first_level_chunk_size, MEM_COMMIT, PAGE_READWRITE)};
 #else
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index a3efd365e..1e090279f 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -16,36 +16,63 @@
 
 namespace Tegra {
 
-MemoryManager::MemoryManager(Core::System& system_)
-    : system{system_}, page_table(page_table_size) {}
+MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 page_bits_)
+    : system{system_}, address_space_bits{address_space_bits_}, page_bits{page_bits_}, entries{},
+      page_table{address_space_bits, address_space_bits + page_bits - 38, page_bits} {
+    address_space_size = 1ULL << address_space_bits;
+    allocate_start = address_space_bits > 32 ? 1ULL << 32 : 0;
+    page_size = 1ULL << page_bits;
+    page_mask = page_size - 1ULL;
+    const u64 page_table_bits = address_space_bits - cpu_page_bits;
+    const u64 page_table_size = 1ULL << page_table_bits;
+    page_table_mask = page_table_size - 1;
+
+    entries.resize(page_table_size / 32, 0);
+}
 
 MemoryManager::~MemoryManager() = default;
 
-void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
-    rasterizer = rasterizer_;
+MemoryManager::EntryType MemoryManager::GetEntry(size_t position) const {
+    position = position >> page_bits;
+    const u64 entry_mask = entries[position / 32];
+    const size_t sub_index = position % 32;
+    return static_cast<EntryType>((entry_mask >> (2 * sub_index)) & 0x03ULL);
+}
+
+void MemoryManager::SetEntry(size_t position, MemoryManager::EntryType entry) {
+    position = position >> page_bits;
+    const u64 entry_mask = entries[position / 32];
+    const size_t sub_index = position % 32;
+    entries[position / 32] =
+        (~(3ULL << sub_index * 2) & entry_mask) | (static_cast<u64>(entry) << sub_index * 2);
 }
 
-GPUVAddr MemoryManager::UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size) {
+template <MemoryManager::EntryType entry_type>
+GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr,
+                                    size_t size) {
     u64 remaining_size{size};
+    if constexpr (entry_type == EntryType::Mapped) {
+        page_table.ReserveRange(gpu_addr, size);
+    }
     for (u64 offset{}; offset < size; offset += page_size) {
-        if (remaining_size < page_size) {
-            SetPageEntry(gpu_addr + offset, page_entry + offset, remaining_size);
-        } else {
-            SetPageEntry(gpu_addr + offset, page_entry + offset);
+        const GPUVAddr current_gpu_addr = gpu_addr + offset;
+        SetEntry(current_gpu_addr, entry_type);
+        if constexpr (entry_type == EntryType::Mapped) {
+            const VAddr current_cpu_addr = cpu_addr + offset;
+            const auto index = PageEntryIndex(current_gpu_addr);
+            page_table[index] = static_cast<u32>(current_cpu_addr >> 12ULL);
         }
         remaining_size -= page_size;
     }
     return gpu_addr;
 }
 
+void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
+    rasterizer = rasterizer_;
+}
+
 GPUVAddr MemoryManager::Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size) {
-    const auto it = std::ranges::lower_bound(map_ranges, gpu_addr, {}, &MapRange::first);
-    if (it != map_ranges.end() && it->first == gpu_addr) {
-        it->second = size;
-    } else {
-        map_ranges.insert(it, MapRange{gpu_addr, size});
-    }
-    return UpdateRange(gpu_addr, cpu_addr, size);
+    return PageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size);
 }
 
 GPUVAddr MemoryManager::MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align) {
@@ -62,13 +89,6 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
     if (size == 0) {
         return;
     }
-    const auto it = std::ranges::lower_bound(map_ranges, gpu_addr, {}, &MapRange::first);
-    if (it != map_ranges.end()) {
-        ASSERT(it->first == gpu_addr);
-        map_ranges.erase(it);
-    } else {
-        ASSERT_MSG(false, "Unmapping non-existent GPU address=0x{:x}", gpu_addr);
-    }
     const auto submapped_ranges = GetSubmappedRange(gpu_addr, size);
 
     for (const auto& [map_addr, map_size] : submapped_ranges) {
@@ -79,63 +99,23 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
         rasterizer->UnmapMemory(*cpu_addr, map_size);
     }
 
-    UpdateRange(gpu_addr, PageEntry::State::Unmapped, size);
+    PageTableOp<EntryType::Free>(gpu_addr, 0, size);
 }
 
 std::optional<GPUVAddr> MemoryManager::AllocateFixed(GPUVAddr gpu_addr, std::size_t size) {
     for (u64 offset{}; offset < size; offset += page_size) {
-        if (!GetPageEntry(gpu_addr + offset).IsUnmapped()) {
+        if (GetEntry(gpu_addr + offset) != EntryType::Free) {
             return std::nullopt;
         }
     }
 
-    return UpdateRange(gpu_addr, PageEntry::State::Allocated, size);
+    return PageTableOp<EntryType::Reserved>(gpu_addr, 0, size);
 }
 
 GPUVAddr MemoryManager::Allocate(std::size_t size, std::size_t align) {
     return *AllocateFixed(*FindFreeRange(size, align), size);
 }
 
-void MemoryManager::TryLockPage(PageEntry page_entry, std::size_t size) {
-    if (!page_entry.IsValid()) {
-        return;
-    }
-
-    ASSERT(system.CurrentProcess()
-               ->PageTable()
-               .LockForDeviceAddressSpace(page_entry.ToAddress(), size)
-               .IsSuccess());
-}
-
-void MemoryManager::TryUnlockPage(PageEntry page_entry, std::size_t size) {
-    if (!page_entry.IsValid()) {
-        return;
-    }
-
-    ASSERT(system.CurrentProcess()
-               ->PageTable()
-               .UnlockForDeviceAddressSpace(page_entry.ToAddress(), size)
-               .IsSuccess());
-}
-
-PageEntry MemoryManager::GetPageEntry(GPUVAddr gpu_addr) const {
-    return page_table[PageEntryIndex(gpu_addr)];
-}
-
-void MemoryManager::SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size) {
-    // TODO(bunnei): We should lock/unlock device regions. This currently causes issues due to
-    // improper tracking, but should be fixed in the future.
-
-    //// Unlock the old page
-    // TryUnlockPage(page_table[PageEntryIndex(gpu_addr)], size);
-
-    //// Lock the new page
-    // TryLockPage(page_entry, size);
-    auto& current_page = page_table[PageEntryIndex(gpu_addr)];
-
-    current_page = page_entry;
-}
-
 std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size_t align,
                                                      bool start_32bit_address) const {
     if (!align) {
@@ -145,9 +125,9 @@ std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size
     }
 
     u64 available_size{};
-    GPUVAddr gpu_addr{start_32bit_address ? address_space_start_low : address_space_start};
+    GPUVAddr gpu_addr{allocate_start};
     while (gpu_addr + available_size < address_space_size) {
-        if (GetPageEntry(gpu_addr + available_size).IsUnmapped()) {
+        if (GetEntry(gpu_addr + available_size) == EntryType::Free) {
             available_size += page_size;
 
             if (available_size >= size) {
@@ -168,15 +148,12 @@ std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size
 }
 
 std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const {
-    if (gpu_addr == 0) {
-        return std::nullopt;
-    }
-    const auto page_entry{GetPageEntry(gpu_addr)};
-    if (!page_entry.IsValid()) {
+    if (GetEntry(gpu_addr) != EntryType::Mapped) {
         return std::nullopt;
     }
 
-    return page_entry.ToAddress() + (gpu_addr & page_mask);
+    const VAddr cpu_addr_base = static_cast<VAddr>(page_table[PageEntryIndex(gpu_addr)]) << 12ULL;
+    return cpu_addr_base + (gpu_addr & page_mask);
 }
 
 std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr, std::size_t size) const {
@@ -227,10 +204,6 @@ template void MemoryManager::Write<u32>(GPUVAddr addr, u32 data);
 template void MemoryManager::Write<u64>(GPUVAddr addr, u64 data);
 
 u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) {
-    if (!GetPageEntry(gpu_addr).IsValid()) {
-        return {};
-    }
-
     const auto address{GpuToCpuAddress(gpu_addr)};
     if (!address) {
         return {};
@@ -240,10 +213,6 @@ u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) {
 }
 
 const u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) const {
-    if (!GetPageEntry(gpu_addr).IsValid()) {
-        return {};
-    }
-
     const auto address{GpuToCpuAddress(gpu_addr)};
     if (!address) {
         return {};
@@ -252,12 +221,6 @@ const u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) const {
     return system.Memory().GetPointer(*address);
 }
 
-size_t MemoryManager::BytesToMapEnd(GPUVAddr gpu_addr) const noexcept {
-    auto it = std::ranges::upper_bound(map_ranges, gpu_addr, {}, &MapRange::first);
-    --it;
-    return it->second - (gpu_addr - it->first);
-}
-
 void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size,
                                   bool is_safe) const {
     std::size_t remaining_size{size};
@@ -268,7 +231,7 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std:
         const std::size_t copy_amount{
             std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
         const auto page_addr{GpuToCpuAddress(page_index << page_bits)};
-        if (page_addr && *page_addr != 0) {
+        if (page_addr) {
             const auto src_addr{*page_addr + page_offset};
             if (is_safe) {
                 // Flush must happen on the rasterizer interface, such that memory is always
@@ -307,7 +270,7 @@ void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffe
         const std::size_t copy_amount{
             std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
         const auto page_addr{GpuToCpuAddress(page_index << page_bits)};
-        if (page_addr && *page_addr != 0) {
+        if (page_addr) {
             const auto dest_addr{*page_addr + page_offset};
 
             if (is_safe) {
@@ -392,7 +355,7 @@ bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) cons
     size_t page_index{gpu_addr >> page_bits};
     const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits};
     while (page_index < page_last) {
-        if (!page_table[page_index].IsValid() || page_table[page_index].ToAddress() == 0) {
+        if (GetEntry(page_index << page_bits) == EntryType::Free) {
             return false;
         }
         ++page_index;
@@ -408,7 +371,7 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
     size_t page_offset{gpu_addr & page_mask};
     std::optional<std::pair<GPUVAddr, std::size_t>> last_segment{};
     std::optional<VAddr> old_page_addr{};
-    const auto extend_size = [&last_segment, &page_index, &page_offset](std::size_t bytes) {
+    const auto extend_size = [this, &last_segment, &page_index, &page_offset](std::size_t bytes) {
         if (!last_segment) {
             const GPUVAddr new_base_addr = (page_index << page_bits) + page_offset;
             last_segment = {new_base_addr, bytes};
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 74f9ce175..0a763fd19 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -8,6 +8,7 @@
 #include <vector>
 
 #include "common/common_types.h"
+#include "common/multi_level_page_table.h"
 
 namespace VideoCore {
 class RasterizerInterface;
@@ -19,55 +20,10 @@ class System;
 
 namespace Tegra {
 
-class PageEntry final {
-public:
-    enum class State : u32 {
-        Unmapped = static_cast<u32>(-1),
-        Allocated = static_cast<u32>(-2),
-    };
-
-    constexpr PageEntry() = default;
-    constexpr PageEntry(State state_) : state{state_} {}
-    constexpr PageEntry(VAddr addr) : state{static_cast<State>(addr >> ShiftBits)} {}
-
-    [[nodiscard]] constexpr bool IsUnmapped() const {
-        return state == State::Unmapped;
-    }
-
-    [[nodiscard]] constexpr bool IsAllocated() const {
-        return state == State::Allocated;
-    }
-
-    [[nodiscard]] constexpr bool IsValid() const {
-        return !IsUnmapped() && !IsAllocated();
-    }
-
-    [[nodiscard]] constexpr VAddr ToAddress() const {
-        if (!IsValid()) {
-            return {};
-        }
-
-        return static_cast<VAddr>(state) << ShiftBits;
-    }
-
-    [[nodiscard]] constexpr PageEntry operator+(u64 offset) const {
-        // If this is a reserved value, offsets do not apply
-        if (!IsValid()) {
-            return *this;
-        }
-        return PageEntry{(static_cast<VAddr>(state) << ShiftBits) + offset};
-    }
-
-private:
-    static constexpr std::size_t ShiftBits{12};
-
-    State state{State::Unmapped};
-};
-static_assert(sizeof(PageEntry) == 4, "PageEntry is too large");
-
 class MemoryManager final {
 public:
-    explicit MemoryManager(Core::System& system_);
+    explicit MemoryManager(Core::System& system_, u64 address_space_bits_ = 40,
+                           u64 page_bits_ = 16);
     ~MemoryManager();
 
     /// Binds a renderer to the memory manager.
@@ -86,9 +42,6 @@ public:
     [[nodiscard]] u8* GetPointer(GPUVAddr addr);
     [[nodiscard]] const u8* GetPointer(GPUVAddr addr) const;
 
-    /// Returns the number of bytes until the end of the memory map containing the given GPU address
-    [[nodiscard]] size_t BytesToMapEnd(GPUVAddr gpu_addr) const noexcept;
-
     /**
      * ReadBlock and WriteBlock are full read and write operations over virtual
      * GPU Memory. It's important to use these when GPU memory may not be continuous
@@ -145,44 +98,47 @@ public:
     void FlushRegion(GPUVAddr gpu_addr, size_t size) const;
 
 private:
-    [[nodiscard]] PageEntry GetPageEntry(GPUVAddr gpu_addr) const;
-    void SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size = page_size);
-    GPUVAddr UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size);
     [[nodiscard]] std::optional<GPUVAddr> FindFreeRange(std::size_t size, std::size_t align,
                                                         bool start_32bit_address = false) const;
 
-    void TryLockPage(PageEntry page_entry, std::size_t size);
-    void TryUnlockPage(PageEntry page_entry, std::size_t size);
-
     void ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size,
                        bool is_safe) const;
     void WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size,
                         bool is_safe);
 
-    [[nodiscard]] static constexpr std::size_t PageEntryIndex(GPUVAddr gpu_addr) {
+    [[nodiscard]] inline std::size_t PageEntryIndex(GPUVAddr gpu_addr) const {
         return (gpu_addr >> page_bits) & page_table_mask;
     }
 
-    static constexpr u64 address_space_size = 1ULL << 40;
-    static constexpr u64 address_space_start = 1ULL << 32;
-    static constexpr u64 address_space_start_low = 1ULL << 16;
-    static constexpr u64 page_bits{16};
-    static constexpr u64 page_size{1 << page_bits};
-    static constexpr u64 page_mask{page_size - 1};
-    static constexpr u64 page_table_bits{24};
-    static constexpr u64 page_table_size{1 << page_table_bits};
-    static constexpr u64 page_table_mask{page_table_size - 1};
-
     Core::System& system;
 
+    const u64 address_space_bits;
+    const u64 page_bits;
+    u64 address_space_size;
+    u64 allocate_start;
+    u64 page_size;
+    u64 page_mask;
+    u64 page_table_mask;
+    static constexpr u64 cpu_page_bits{12};
+
     VideoCore::RasterizerInterface* rasterizer = nullptr;
 
-    std::vector<PageEntry> page_table;
+    enum class EntryType : u64 {
+        Free = 0,
+        Reserved = 1,
+        Mapped = 2,
+    };
+
+    std::vector<u64> entries;
+
+    template <EntryType entry_type>
+    GPUVAddr PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size);
+
+    EntryType GetEntry(size_t position) const;
 
-    using MapRange = std::pair<GPUVAddr, size_t>;
-    std::vector<MapRange> map_ranges;
+    void SetEntry(size_t position, EntryType entry);
 
-    std::vector<std::pair<VAddr, std::size_t>> cache_invalidate_queue;
+    Common::MultiLevelPageTable<u32> page_table;
 };
 
 } // namespace Tegra
-- 
cgit v1.2.3


From feb49c822d9cabc5bc7be9eab1f2bf4ba460176a Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sun, 14 Nov 2021 20:55:52 +0100
Subject: NVDRV: Remake ASGPU

---
 src/common/CMakeLists.txt                          |   2 +
 src/common/address_space.cpp                       |  11 +
 src/common/address_space.h                         | 134 ++++++
 src/common/address_space.inc                       | 338 +++++++++++++++
 .../hle/service/nvdrv/devices/nvhost_as_gpu.cpp    | 460 ++++++++++++++-------
 src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h | 163 ++++----
 src/video_core/memory_manager.cpp                  |  10 +-
 src/video_core/memory_manager.h                    |   3 +-
 8 files changed, 882 insertions(+), 239 deletions(-)
 create mode 100644 src/common/address_space.cpp
 create mode 100644 src/common/address_space.h
 create mode 100644 src/common/address_space.inc

(limited to 'src/common')

diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 2db414819..a02696873 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -17,6 +17,8 @@ endif ()
 include(GenerateSCMRev)
 
 add_library(common STATIC
+    address_space.cpp
+    address_space.h
     algorithm.h
     alignment.h
     announce_multiplayer_room.h
diff --git a/src/common/address_space.cpp b/src/common/address_space.cpp
new file mode 100644
index 000000000..6db85be87
--- /dev/null
+++ b/src/common/address_space.cpp
@@ -0,0 +1,11 @@
+// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
+// Licensed under GPLv3 or any later version
+// Refer to the license.txt file included.
+
+#include "common/address_space.inc"
+
+namespace Common {
+
+template class Common::FlatAllocator<u32, 0, 32>;
+
+}
diff --git a/src/common/address_space.h b/src/common/address_space.h
new file mode 100644
index 000000000..fd2f32b7d
--- /dev/null
+++ b/src/common/address_space.h
@@ -0,0 +1,134 @@
+// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
+// Licensed under GPLv3 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <concepts>
+#include <functional>
+#include <mutex>
+#include <vector>
+
+#include "common/common_types.h"
+
+namespace Common {
+template <typename VaType, size_t AddressSpaceBits>
+concept AddressSpaceValid = std::is_unsigned_v<VaType> && sizeof(VaType) * 8 >= AddressSpaceBits;
+
+struct EmptyStruct {};
+
+/**
+ * @brief FlatAddressSpaceMap provides a generic VA->PA mapping implementation using a sorted vector
+ */
+template <typename VaType, VaType UnmappedVa, typename PaType, PaType UnmappedPa,
+          bool PaContigSplit, size_t AddressSpaceBits, typename ExtraBlockInfo = EmptyStruct>
+requires AddressSpaceValid<VaType, AddressSpaceBits> class FlatAddressSpaceMap {
+private:
+    std::function<void(VaType, VaType)>
+        unmapCallback{}; //!< Callback called when the mappings in an region have changed
+
+protected:
+    /**
+     * @brief Represents a block of memory in the AS, the physical mapping is contiguous until
+     * another block with a different phys address is hit
+     */
+    struct Block {
+        VaType virt{UnmappedVa}; //!< VA of the block
+        PaType phys{UnmappedPa}; //!< PA of the block, will increase 1-1 with VA until a new block
+                                 //!< is encountered
+        [[no_unique_address]] ExtraBlockInfo extraInfo;
+
+        Block() = default;
+
+        Block(VaType virt, PaType phys, ExtraBlockInfo extraInfo)
+            : virt(virt), phys(phys), extraInfo(extraInfo) {}
+
+        constexpr bool Valid() {
+            return virt != UnmappedVa;
+        }
+
+        constexpr bool Mapped() {
+            return phys != UnmappedPa;
+        }
+
+        constexpr bool Unmapped() {
+            return phys == UnmappedPa;
+        }
+
+        bool operator<(const VaType& pVirt) const {
+            return virt < pVirt;
+        }
+    };
+
+    std::mutex blockMutex;
+    std::vector<Block> blocks{Block{}};
+
+    /**
+     * @brief Maps a PA range into the given AS region
+     * @note blockMutex MUST be locked when calling this
+     */
+    void MapLocked(VaType virt, PaType phys, VaType size, ExtraBlockInfo extraInfo);
+
+    /**
+     * @brief Unmaps the given range and merges it with other unmapped regions
+     * @note blockMutex MUST be locked when calling this
+     */
+    void UnmapLocked(VaType virt, VaType size);
+
+public:
+    static constexpr VaType VaMaximum{(1ULL << (AddressSpaceBits - 1)) +
+                                      ((1ULL << (AddressSpaceBits - 1)) -
+                                       1)}; //!< The maximum VA that this AS can technically reach
+
+    VaType vaLimit{VaMaximum}; //!< A soft limit on the maximum VA of the AS
+
+    FlatAddressSpaceMap(VaType vaLimit, std::function<void(VaType, VaType)> unmapCallback = {});
+
+    FlatAddressSpaceMap() = default;
+
+    void Map(VaType virt, PaType phys, VaType size, ExtraBlockInfo extraInfo = {}) {
+        std::scoped_lock lock(blockMutex);
+        MapLocked(virt, phys, size, extraInfo);
+    }
+
+    void Unmap(VaType virt, VaType size) {
+        std::scoped_lock lock(blockMutex);
+        UnmapLocked(virt, size);
+    }
+};
+
+/**
+ * @brief FlatMemoryManager specialises FlatAddressSpaceMap to work as an allocator, with an
+ * initial, fast linear pass and a subsequent slower pass that iterates until it finds a free block
+ */
+template <typename VaType, VaType UnmappedVa, size_t AddressSpaceBits>
+requires AddressSpaceValid<VaType, AddressSpaceBits> class FlatAllocator
+    : public FlatAddressSpaceMap<VaType, UnmappedVa, bool, false, false, AddressSpaceBits> {
+private:
+    using Base = FlatAddressSpaceMap<VaType, UnmappedVa, bool, false, false, AddressSpaceBits>;
+
+    VaType currentLinearAllocEnd; //!< The end address for the initial linear allocation pass, once
+                                  //!< this reaches the AS limit the slower allocation path will be
+                                  //!< used
+
+public:
+    VaType vaStart; //!< The base VA of the allocator, no allocations will be below this
+
+    FlatAllocator(VaType vaStart, VaType vaLimit = Base::VaMaximum);
+
+    /**
+     * @brief Allocates a region in the AS of the given size and returns its address
+     */
+    VaType Allocate(VaType size);
+
+    /**
+     * @brief Marks the given region in the AS as allocated
+     */
+    void AllocateFixed(VaType virt, VaType size);
+
+    /**
+     * @brief Frees an AS region so it can be used again
+     */
+    void Free(VaType virt, VaType size);
+};
+} // namespace Common
diff --git a/src/common/address_space.inc b/src/common/address_space.inc
new file mode 100644
index 000000000..907c55d88
--- /dev/null
+++ b/src/common/address_space.inc
@@ -0,0 +1,338 @@
+// SPDX-License-Identifier: GPLv3 or later
+// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
+
+#include "common/address_space.h"
+#include "common/assert.h"
+
+#define MAP_MEMBER(returnType)                                                                     \
+    template <typename VaType, VaType UnmappedVa, typename PaType, PaType UnmappedPa,              \
+              bool PaContigSplit, size_t AddressSpaceBits, typename ExtraBlockInfo>                \
+    requires AddressSpaceValid<VaType, AddressSpaceBits> returnType FlatAddressSpaceMap<           \
+        VaType, UnmappedVa, PaType, UnmappedPa, PaContigSplit, AddressSpaceBits, ExtraBlockInfo>
+#define MAP_MEMBER_CONST()                                                                         \
+    template <typename VaType, VaType UnmappedVa, typename PaType, PaType UnmappedPa,              \
+              bool PaContigSplit, size_t AddressSpaceBits, typename ExtraBlockInfo>                \
+    requires AddressSpaceValid<VaType, AddressSpaceBits> FlatAddressSpaceMap<                      \
+        VaType, UnmappedVa, PaType, UnmappedPa, PaContigSplit, AddressSpaceBits, ExtraBlockInfo>
+
+#define MM_MEMBER(returnType)                                                                      \
+    template <typename VaType, VaType UnmappedVa, size_t AddressSpaceBits>                         \
+    requires AddressSpaceValid<VaType, AddressSpaceBits> returnType                                \
+        FlatMemoryManager<VaType, UnmappedVa, AddressSpaceBits>
+
+#define ALLOC_MEMBER(returnType)                                                                   \
+    template <typename VaType, VaType UnmappedVa, size_t AddressSpaceBits>                         \
+    requires AddressSpaceValid<VaType, AddressSpaceBits> returnType                                \
+        FlatAllocator<VaType, UnmappedVa, AddressSpaceBits>
+#define ALLOC_MEMBER_CONST()                                                                       \
+    template <typename VaType, VaType UnmappedVa, size_t AddressSpaceBits>                         \
+    requires AddressSpaceValid<VaType, AddressSpaceBits>                                           \
+        FlatAllocator<VaType, UnmappedVa, AddressSpaceBits>
+
+namespace Common {
+MAP_MEMBER_CONST()::FlatAddressSpaceMap(VaType vaLimit,
+                                        std::function<void(VaType, VaType)> unmapCallback)
+    : unmapCallback(std::move(unmapCallback)), vaLimit(vaLimit) {
+    if (vaLimit > VaMaximum)
+        UNREACHABLE_MSG("Invalid VA limit!");
+}
+
+MAP_MEMBER(void)::MapLocked(VaType virt, PaType phys, VaType size, ExtraBlockInfo extraInfo) {
+    VaType virtEnd{virt + size};
+
+    if (virtEnd > vaLimit)
+        UNREACHABLE_MSG("Trying to map a block past the VA limit: virtEnd: 0x{:X}, vaLimit: 0x{:X}",
+                        virtEnd, vaLimit);
+
+    auto blockEndSuccessor{std::lower_bound(blocks.begin(), blocks.end(), virtEnd)};
+    if (blockEndSuccessor == blocks.begin())
+        UNREACHABLE_MSG("Trying to map a block before the VA start: virtEnd: 0x{:X}", virtEnd);
+
+    auto blockEndPredecessor{std::prev(blockEndSuccessor)};
+
+    if (blockEndSuccessor != blocks.end()) {
+        // We have blocks in front of us, if one is directly in front then we don't have to add a
+        // tail
+        if (blockEndSuccessor->virt != virtEnd) {
+            PaType tailPhys{[&]() -> PaType {
+                if constexpr (!PaContigSplit) {
+                    return blockEndPredecessor
+                        ->phys; // Always propagate unmapped regions rather than calculating offset
+                } else {
+                    if (blockEndPredecessor->Unmapped())
+                        return blockEndPredecessor->phys; // Always propagate unmapped regions
+                                                          // rather than calculating offset
+                    else
+                        return blockEndPredecessor->phys + virtEnd - blockEndPredecessor->virt;
+                }
+            }()};
+
+            if (blockEndPredecessor->virt >= virt) {
+                // If this block's start would be overlapped by the map then reuse it as a tail
+                // block
+                blockEndPredecessor->virt = virtEnd;
+                blockEndPredecessor->phys = tailPhys;
+                blockEndPredecessor->extraInfo = blockEndPredecessor->extraInfo;
+
+                // No longer predecessor anymore
+                blockEndSuccessor = blockEndPredecessor--;
+            } else {
+                // Else insert a new one and we're done
+                blocks.insert(blockEndSuccessor,
+                              {Block(virt, phys, extraInfo),
+                               Block(virtEnd, tailPhys, blockEndPredecessor->extraInfo)});
+                if (unmapCallback)
+                    unmapCallback(virt, size);
+
+                return;
+            }
+        }
+    } else {
+        // blockEndPredecessor will always be unmapped as blocks has to be terminated by an unmapped
+        // chunk
+        if (blockEndPredecessor != blocks.begin() && blockEndPredecessor->virt >= virt) {
+            // Move the unmapped block start backwards
+            blockEndPredecessor->virt = virtEnd;
+
+            // No longer predecessor anymore
+            blockEndSuccessor = blockEndPredecessor--;
+        } else {
+            // Else insert a new one and we're done
+            blocks.insert(blockEndSuccessor,
+                          {Block(virt, phys, extraInfo), Block(virtEnd, UnmappedPa, {})});
+            if (unmapCallback)
+                unmapCallback(virt, size);
+
+            return;
+        }
+    }
+
+    auto blockStartSuccessor{blockEndSuccessor};
+
+    // Walk the block vector to find the start successor as this is more efficient than another
+    // binary search in most scenarios
+    while (std::prev(blockStartSuccessor)->virt >= virt)
+        blockStartSuccessor--;
+
+    // Check that the start successor is either the end block or something in between
+    if (blockStartSuccessor->virt > virtEnd) {
+        UNREACHABLE_MSG("Unsorted block in AS map: virt: 0x{:X}", blockStartSuccessor->virt);
+    } else if (blockStartSuccessor->virt == virtEnd) {
+        // We need to create a new block as there are none spare that we would overwrite
+        blocks.insert(blockStartSuccessor, Block(virt, phys, extraInfo));
+    } else {
+        // Erase overwritten blocks
+        if (auto eraseStart{std::next(blockStartSuccessor)}; eraseStart != blockEndSuccessor)
+            blocks.erase(eraseStart, blockEndSuccessor);
+
+        // Reuse a block that would otherwise be overwritten as a start block
+        blockStartSuccessor->virt = virt;
+        blockStartSuccessor->phys = phys;
+        blockStartSuccessor->extraInfo = extraInfo;
+    }
+
+    if (unmapCallback)
+        unmapCallback(virt, size);
+}
+
+MAP_MEMBER(void)::UnmapLocked(VaType virt, VaType size) {
+    VaType virtEnd{virt + size};
+
+    if (virtEnd > vaLimit)
+        UNREACHABLE_MSG("Trying to map a block past the VA limit: virtEnd: 0x{:X}, vaLimit: 0x{:X}",
+                        virtEnd, vaLimit);
+
+    auto blockEndSuccessor{std::lower_bound(blocks.begin(), blocks.end(), virtEnd)};
+    if (blockEndSuccessor == blocks.begin())
+        UNREACHABLE_MSG("Trying to unmap a block before the VA start: virtEnd: 0x{:X}", virtEnd);
+
+    auto blockEndPredecessor{std::prev(blockEndSuccessor)};
+
+    auto walkBackToPredecessor{[&](auto iter) {
+        while (iter->virt >= virt)
+            iter--;
+
+        return iter;
+    }};
+
+    auto eraseBlocksWithEndUnmapped{[&](auto unmappedEnd) {
+        auto blockStartPredecessor{walkBackToPredecessor(unmappedEnd)};
+        auto blockStartSuccessor{std::next(blockStartPredecessor)};
+
+        auto eraseEnd{[&]() {
+            if (blockStartPredecessor->Unmapped()) {
+                // If the start predecessor is unmapped then we can erase everything in our region
+                // and be done
+                return std::next(unmappedEnd);
+            } else {
+                // Else reuse the end predecessor as the start of our unmapped region then erase all
+                // up to it
+                unmappedEnd->virt = virt;
+                return unmappedEnd;
+            }
+        }()};
+
+        // We can't have two unmapped regions after each other
+        if (eraseEnd != blocks.end() &&
+            (eraseEnd == blockStartSuccessor ||
+             (blockStartPredecessor->Unmapped() && eraseEnd->Unmapped())))
+            UNREACHABLE_MSG("Multiple contiguous unmapped regions are unsupported!");
+
+        blocks.erase(blockStartSuccessor, eraseEnd);
+    }};
+
+    // We can avoid any splitting logic if these are the case
+    if (blockEndPredecessor->Unmapped()) {
+        if (blockEndPredecessor->virt > virt)
+            eraseBlocksWithEndUnmapped(blockEndPredecessor);
+
+        if (unmapCallback)
+            unmapCallback(virt, size);
+
+        return; // The region is unmapped, bail out early
+    } else if (blockEndSuccessor->virt == virtEnd && blockEndSuccessor->Unmapped()) {
+        eraseBlocksWithEndUnmapped(blockEndSuccessor);
+
+        if (unmapCallback)
+            unmapCallback(virt, size);
+
+        return; // The region is unmapped here and doesn't need splitting, bail out early
+    } else if (blockEndSuccessor == blocks.end()) {
+        // This should never happen as the end should always follow an unmapped block
+        UNREACHABLE_MSG("Unexpected Memory Manager state!");
+    } else if (blockEndSuccessor->virt != virtEnd) {
+        // If one block is directly in front then we don't have to add a tail
+
+        // The previous block is mapped so we will need to add a tail with an offset
+        PaType tailPhys{[&]() {
+            if constexpr (PaContigSplit)
+                return blockEndPredecessor->phys + virtEnd - blockEndPredecessor->virt;
+            else
+                return blockEndPredecessor->phys;
+        }()};
+
+        if (blockEndPredecessor->virt >= virt) {
+            // If this block's start would be overlapped by the unmap then reuse it as a tail block
+            blockEndPredecessor->virt = virtEnd;
+            blockEndPredecessor->phys = tailPhys;
+
+            // No longer predecessor anymore
+            blockEndSuccessor = blockEndPredecessor--;
+        } else {
+            blocks.insert(blockEndSuccessor,
+                          {Block(virt, UnmappedPa, {}),
+                           Block(virtEnd, tailPhys, blockEndPredecessor->extraInfo)});
+            if (unmapCallback)
+                unmapCallback(virt, size);
+
+            return; // The previous block is mapped and ends before
+        }
+    }
+
+    // Walk the block vector to find the start predecessor as this is more efficient than another
+    // binary search in most scenarios
+    auto blockStartPredecessor{walkBackToPredecessor(blockEndSuccessor)};
+    auto blockStartSuccessor{std::next(blockStartPredecessor)};
+
+    if (blockStartSuccessor->virt > virtEnd) {
+        UNREACHABLE_MSG("Unsorted block in AS map: virt: 0x{:X}", blockStartSuccessor->virt);
+    } else if (blockStartSuccessor->virt == virtEnd) {
+        // There are no blocks between the start and the end that would let us skip inserting a new
+        // one for head
+
+        // The previous block is may be unmapped, if so we don't need to insert any unmaps after it
+        if (blockStartPredecessor->Mapped())
+            blocks.insert(blockStartSuccessor, Block(virt, UnmappedPa, {}));
+    } else if (blockStartPredecessor->Unmapped()) {
+        // If the previous block is unmapped
+        blocks.erase(blockStartSuccessor, blockEndPredecessor);
+    } else {
+        // Erase overwritten blocks, skipping the first one as we have written the unmapped start
+        // block there
+        if (auto eraseStart{std::next(blockStartSuccessor)}; eraseStart != blockEndSuccessor)
+            blocks.erase(eraseStart, blockEndSuccessor);
+
+        // Add in the unmapped block header
+        blockStartSuccessor->virt = virt;
+        blockStartSuccessor->phys = UnmappedPa;
+    }
+
+    if (unmapCallback)
+        unmapCallback(virt, size);
+}
+
+ALLOC_MEMBER_CONST()::FlatAllocator(VaType vaStart, VaType vaLimit)
+    : Base(vaLimit), currentLinearAllocEnd(vaStart), vaStart(vaStart) {}
+
+ALLOC_MEMBER(VaType)::Allocate(VaType size) {
+    std::scoped_lock lock(this->blockMutex);
+
+    VaType allocStart{UnmappedVa};
+    VaType allocEnd{currentLinearAllocEnd + size};
+
+    // Avoid searching backwards in the address space if possible
+    if (allocEnd >= currentLinearAllocEnd && allocEnd <= this->vaLimit) {
+        auto allocEndSuccessor{
+            std::lower_bound(this->blocks.begin(), this->blocks.end(), allocEnd)};
+        if (allocEndSuccessor == this->blocks.begin())
+            UNREACHABLE_MSG("First block in AS map is invalid!");
+
+        auto allocEndPredecessor{std::prev(allocEndSuccessor)};
+        if (allocEndPredecessor->virt <= currentLinearAllocEnd) {
+            allocStart = currentLinearAllocEnd;
+        } else {
+            // Skip over fixed any mappings in front of us
+            while (allocEndSuccessor != this->blocks.end()) {
+                if (allocEndSuccessor->virt - allocEndPredecessor->virt < size ||
+                    allocEndPredecessor->Mapped()) {
+                    allocStart = allocEndPredecessor->virt;
+                    break;
+                }
+
+                allocEndPredecessor = allocEndSuccessor++;
+
+                // Use the VA limit to calculate if we can fit in the final block since it has no
+                // successor
+                if (allocEndSuccessor == this->blocks.end()) {
+                    allocEnd = allocEndPredecessor->virt + size;
+
+                    if (allocEnd >= allocEndPredecessor->virt && allocEnd <= this->vaLimit)
+                        allocStart = allocEndPredecessor->virt;
+                }
+            }
+        }
+    }
+
+    if (allocStart != UnmappedVa) {
+        currentLinearAllocEnd = allocStart + size;
+    } else { // If linear allocation overflows the AS then find a gap
+        if (this->blocks.size() <= 2)
+            UNREACHABLE_MSG("Unexpected allocator state!");
+
+        auto searchPredecessor{this->blocks.begin()};
+        auto searchSuccessor{std::next(searchPredecessor)};
+
+        while (searchSuccessor != this->blocks.end() &&
+               (searchSuccessor->virt - searchPredecessor->virt < size ||
+                searchPredecessor->Mapped())) {
+            searchPredecessor = searchSuccessor++;
+        }
+
+        if (searchSuccessor != this->blocks.end())
+            allocStart = searchPredecessor->virt;
+        else
+            return {}; // AS is full
+    }
+
+    this->MapLocked(allocStart, true, size, {});
+    return allocStart;
+}
+
+ALLOC_MEMBER(void)::AllocateFixed(VaType virt, VaType size) {
+    this->Map(virt, true, size);
+}
+
+ALLOC_MEMBER(void)::Free(VaType virt, VaType size) {
+    this->Unmap(virt, size);
+}
+} // namespace Common
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
index 5c70c9a57..344ddfc90 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -6,6 +6,7 @@
 #include <cstring>
 #include <utility>
 
+#include "common/alignment.h"
 #include "common/assert.h"
 #include "common/logging/log.h"
 #include "core/core.h"
@@ -21,8 +22,8 @@
 namespace Service::Nvidia::Devices {
 
 nvhost_as_gpu::nvhost_as_gpu(Core::System& system_, Module& module_, NvCore::Container& core)
-    : nvdevice{system_}, module{module_}, container{core}, nvmap{core.GetNvMapFile()},
-      gmmu{std::make_shared<Tegra::MemoryManager>(system)} {}
+    : nvdevice{system_}, module{module_}, container{core}, nvmap{core.GetNvMapFile()}, vm{},
+      gmmu{} {}
 nvhost_as_gpu::~nvhost_as_gpu() = default;
 
 NvResult nvhost_as_gpu::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
@@ -89,12 +90,49 @@ NvResult nvhost_as_gpu::AllocAsEx(const std::vector<u8>& input, std::vector<u8>&
     IoctlAllocAsEx params{};
     std::memcpy(&params, input.data(), input.size());
 
-    LOG_WARNING(Service_NVDRV, "(STUBBED) called, big_page_size=0x{:X}", params.big_page_size);
-    if (params.big_page_size == 0) {
-        params.big_page_size = DEFAULT_BIG_PAGE_SIZE;
+    LOG_DEBUG(Service_NVDRV, "called, big_page_size=0x{:X}", params.big_page_size);
+
+    std::scoped_lock lock(mutex);
+
+    if (vm.initialised) {
+        UNREACHABLE_MSG("Cannot initialise an address space twice!");
+        return NvResult::InvalidState;
     }
 
-    big_page_size = params.big_page_size;
+    if (params.big_page_size) {
+        if (!std::has_single_bit(params.big_page_size)) {
+            LOG_ERROR(Service_NVDRV, "Non power-of-2 big page size: 0x{:X}!", params.big_page_size);
+            return NvResult::BadValue;
+        }
+
+        if (!(params.big_page_size & VM::SUPPORTED_BIG_PAGE_SIZES)) {
+            LOG_ERROR(Service_NVDRV, "Unsupported big page size: 0x{:X}!", params.big_page_size);
+            return NvResult::BadValue;
+        }
+
+        vm.big_page_size = params.big_page_size;
+        vm.big_page_size_bits = static_cast<u32>(std::countr_zero(params.big_page_size));
+
+        vm.va_range_start = params.big_page_size << VM::VA_START_SHIFT;
+    }
+
+    // If this is unspecified then default values should be used
+    if (params.va_range_start) {
+        vm.va_range_start = params.va_range_start;
+        vm.va_range_split = params.va_range_split;
+        vm.va_range_end = params.va_range_end;
+    }
+
+    const u64 start_pages{vm.va_range_start >> VM::PAGE_SIZE_BITS};
+    const u64 end_pages{vm.va_range_split >> VM::PAGE_SIZE_BITS};
+    vm.small_page_allocator = std::make_shared<VM::Allocator>(start_pages, end_pages);
+
+    const u64 start_big_pages{vm.va_range_split >> vm.big_page_size_bits};
+    const u64 end_big_pages{(vm.va_range_end - vm.va_range_split) >> vm.big_page_size_bits};
+    vm.big_page_allocator = std::make_unique<VM::Allocator>(start_big_pages, end_big_pages);
+
+    gmmu = std::make_shared<Tegra::MemoryManager>(system, 40, VM::PAGE_SIZE_BITS);
+    vm.initialised = true;
 
     return NvResult::Success;
 }
@@ -106,21 +144,73 @@ NvResult nvhost_as_gpu::AllocateSpace(const std::vector<u8>& input, std::vector<
     LOG_DEBUG(Service_NVDRV, "called, pages={:X}, page_size={:X}, flags={:X}", params.pages,
               params.page_size, params.flags);
 
-    const auto size{static_cast<u64>(params.pages) * static_cast<u64>(params.page_size)};
-    if ((params.flags & AddressSpaceFlags::FixedOffset) != AddressSpaceFlags::None) {
-        params.offset = *(gmmu->AllocateFixed(params.offset, size));
+    std::scoped_lock lock(mutex);
+
+    if (!vm.initialised) {
+        return NvResult::BadValue;
+    }
+
+    if (params.page_size != VM::YUZU_PAGESIZE && params.page_size != vm.big_page_size) {
+        return NvResult::BadValue;
+    }
+
+    if (params.page_size != vm.big_page_size &&
+        ((params.flags & MappingFlags::Sparse) != MappingFlags::None)) {
+        UNIMPLEMENTED_MSG("Sparse small pages are not implemented!");
+        return NvResult::NotImplemented;
+    }
+
+    const u32 page_size_bits{params.page_size == VM::YUZU_PAGESIZE ? VM::PAGE_SIZE_BITS
+                                                                   : vm.big_page_size_bits};
+
+    auto& allocator{params.page_size == VM::YUZU_PAGESIZE ? *vm.small_page_allocator
+                                                          : *vm.big_page_allocator};
+
+    if ((params.flags & MappingFlags::Fixed) != MappingFlags::None) {
+        allocator.AllocateFixed(static_cast<u32>(params.offset >> page_size_bits), params.pages);
     } else {
-        params.offset = gmmu->Allocate(size, params.align);
+        params.offset = static_cast<u64>(allocator.Allocate(params.pages)) << page_size_bits;
+        if (!params.offset) {
+            UNREACHABLE_MSG("Failed to allocate free space in the GPU AS!");
+            return NvResult::InsufficientMemory;
+        }
     }
 
-    auto result = NvResult::Success;
-    if (!params.offset) {
-        LOG_CRITICAL(Service_NVDRV, "allocation failed for size {}", size);
-        result = NvResult::InsufficientMemory;
+    u64 size{static_cast<u64>(params.pages) * params.page_size};
+
+    if ((params.flags & MappingFlags::Sparse) != MappingFlags::None) {
+        gmmu->MapSparse(params.offset, size);
     }
 
+    allocation_map[params.offset] = {
+        .size = size,
+        .page_size = params.page_size,
+        .sparse = (params.flags & MappingFlags::Sparse) != MappingFlags::None,
+    };
+
     std::memcpy(output.data(), &params, output.size());
-    return result;
+    return NvResult::Success;
+}
+
+void nvhost_as_gpu::FreeMappingLocked(u64 offset) {
+    auto mapping{mapping_map.at(offset)};
+
+    if (!mapping->fixed) {
+        auto& allocator{mapping->big_page ? *vm.big_page_allocator : *vm.small_page_allocator};
+        u32 page_size_bits{mapping->big_page ? vm.big_page_size_bits : VM::PAGE_SIZE_BITS};
+
+        allocator.Free(static_cast<u32>(mapping->offset >> page_size_bits),
+                       static_cast<u32>(mapping->size >> page_size_bits));
+    }
+
+    // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state
+    // Only FreeSpace can unmap them fully
+    if (mapping->sparse_alloc)
+        gmmu->MapSparse(offset, mapping->size);
+    else
+        gmmu->Unmap(offset, mapping->size);
+
+    mapping_map.erase(offset);
 }
 
 NvResult nvhost_as_gpu::FreeSpace(const std::vector<u8>& input, std::vector<u8>& output) {
@@ -130,7 +220,40 @@ NvResult nvhost_as_gpu::FreeSpace(const std::vector<u8>& input, std::vector<u8>&
     LOG_DEBUG(Service_NVDRV, "called, offset={:X}, pages={:X}, page_size={:X}", params.offset,
               params.pages, params.page_size);
 
-    gmmu->Unmap(params.offset, static_cast<std::size_t>(params.pages) * params.page_size);
+    std::scoped_lock lock(mutex);
+
+    if (!vm.initialised) {
+        return NvResult::BadValue;
+    }
+
+    try {
+        auto allocation{allocation_map[params.offset]};
+
+        if (allocation.page_size != params.page_size ||
+            allocation.size != (static_cast<u64>(params.pages) * params.page_size)) {
+            return NvResult::BadValue;
+        }
+
+        for (const auto& mapping : allocation.mappings) {
+            FreeMappingLocked(mapping->offset);
+        }
+
+        // Unset sparse flag if required
+        if (allocation.sparse) {
+            gmmu->Unmap(params.offset, allocation.size);
+        }
+
+        auto& allocator{params.page_size == VM::YUZU_PAGESIZE ? *vm.small_page_allocator
+                                                              : *vm.big_page_allocator};
+        u32 page_size_bits{params.page_size == VM::YUZU_PAGESIZE ? VM::PAGE_SIZE_BITS
+                                                                 : vm.big_page_size_bits};
+
+        allocator.Free(static_cast<u32>(params.offset >> page_size_bits),
+                       static_cast<u32>(allocation.size >> page_size_bits));
+        allocation_map.erase(params.offset);
+    } catch ([[maybe_unused]] const std::out_of_range& e) {
+        return NvResult::BadValue;
+    }
 
     std::memcpy(output.data(), &params, output.size());
     return NvResult::Success;
@@ -141,43 +264,51 @@ NvResult nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& out
 
     LOG_DEBUG(Service_NVDRV, "called, num_entries=0x{:X}", num_entries);
 
-    auto result = NvResult::Success;
     std::vector<IoctlRemapEntry> entries(num_entries);
     std::memcpy(entries.data(), input.data(), input.size());
 
+    std::scoped_lock lock(mutex);
+
+    if (!vm.initialised) {
+        return NvResult::BadValue;
+    }
+
     for (const auto& entry : entries) {
-        LOG_DEBUG(Service_NVDRV, "remap entry, offset=0x{:X} handle=0x{:X} pages=0x{:X}",
-                  entry.offset, entry.nvmap_handle, entry.pages);
-
-        if (entry.nvmap_handle == 0) {
-            // If nvmap handle is null, we should unmap instead.
-            const auto offset{static_cast<GPUVAddr>(entry.offset) << 0x10};
-            const auto size{static_cast<u64>(entry.pages) << 0x10};
-            gmmu->Unmap(offset, size);
-            continue;
+        GPUVAddr virtual_address{static_cast<u64>(entry.as_offset_big_pages)
+                                 << vm.big_page_size_bits};
+        u64 size{static_cast<u64>(entry.big_pages) << vm.big_page_size_bits};
+
+        auto alloc{allocation_map.upper_bound(virtual_address)};
+
+        if (alloc-- == allocation_map.begin() ||
+            (virtual_address - alloc->first) + size > alloc->second.size) {
+            LOG_WARNING(Service_NVDRV, "Cannot remap into an unallocated region!");
+            return NvResult::BadValue;
         }
 
-        const auto object{nvmap.GetHandle(entry.nvmap_handle)};
-        if (!object) {
-            LOG_CRITICAL(Service_NVDRV, "invalid nvmap_handle={:X}", entry.nvmap_handle);
-            result = NvResult::InvalidState;
-            break;
+        if (!alloc->second.sparse) {
+            LOG_WARNING(Service_NVDRV, "Cannot remap a non-sparse mapping!");
+            return NvResult::BadValue;
         }
 
-        const auto offset{static_cast<GPUVAddr>(entry.offset) << 0x10};
-        const auto size{static_cast<u64>(entry.pages) << 0x10};
-        const auto map_offset{static_cast<u64>(entry.map_offset) << 0x10};
-        const auto addr{gmmu->Map(object->address + map_offset, offset, size)};
+        if (!entry.handle) {
+            gmmu->MapSparse(virtual_address, size);
+        } else {
+            auto handle{nvmap.GetHandle(entry.handle)};
+            if (!handle) {
+                return NvResult::BadValue;
+            }
 
-        if (!addr) {
-            LOG_CRITICAL(Service_NVDRV, "map returned an invalid address!");
-            result = NvResult::InvalidState;
-            break;
+            VAddr cpu_address{static_cast<VAddr>(
+                handle->address +
+                (static_cast<u64>(entry.handle_offset_big_pages) << vm.big_page_size_bits))};
+
+            gmmu->Map(virtual_address, cpu_address, size);
         }
     }
 
     std::memcpy(output.data(), entries.data(), output.size());
-    return result;
+    return NvResult::Success;
 }
 
 NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output) {
@@ -187,75 +318,96 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8
     LOG_DEBUG(Service_NVDRV,
               "called, flags={:X}, nvmap_handle={:X}, buffer_offset={}, mapping_size={}"
               ", offset={}",
-              params.flags, params.nvmap_handle, params.buffer_offset, params.mapping_size,
+              params.flags, params.handle, params.buffer_offset, params.mapping_size,
               params.offset);
 
-    if ((params.flags & AddressSpaceFlags::Remap) != AddressSpaceFlags::None) {
-        if (const auto buffer_map{FindBufferMap(params.offset)}; buffer_map) {
-            const auto cpu_addr{static_cast<VAddr>(buffer_map->CpuAddr() + params.buffer_offset)};
-            const auto gpu_addr{static_cast<GPUVAddr>(params.offset + params.buffer_offset)};
+    std::scoped_lock lock(mutex);
+
+    if (!vm.initialised) {
+        return NvResult::BadValue;
+    }
 
-            if (!gmmu->Map(cpu_addr, gpu_addr, params.mapping_size)) {
-                LOG_CRITICAL(Service_NVDRV,
-                             "remap failed, flags={:X}, nvmap_handle={:X}, buffer_offset={}, "
-                             "mapping_size = {}, offset={}",
-                             params.flags, params.nvmap_handle, params.buffer_offset,
-                             params.mapping_size, params.offset);
+    // Remaps a subregion of an existing mapping to a different PA
+    if ((params.flags & MappingFlags::Remap) != MappingFlags::None) {
+        try {
+            auto mapping{mapping_map.at(params.offset)};
 
-                std::memcpy(output.data(), &params, output.size());
-                return NvResult::InvalidState;
+            if (mapping->size < params.mapping_size) {
+                LOG_WARNING(Service_NVDRV,
+                            "Cannot remap a partially mapped GPU address space region: 0x{:X}",
+                            params.offset);
+                return NvResult::BadValue;
             }
 
-            std::memcpy(output.data(), &params, output.size());
-            return NvResult::Success;
-        } else {
-            LOG_CRITICAL(Service_NVDRV, "address not mapped offset={}", params.offset);
+            u64 gpu_address{static_cast<u64>(params.offset + params.buffer_offset)};
+            VAddr cpu_address{mapping->ptr + params.buffer_offset};
+
+            gmmu->Map(gpu_address, cpu_address, params.mapping_size);
 
-            std::memcpy(output.data(), &params, output.size());
-            return NvResult::InvalidState;
+            return NvResult::Success;
+        } catch ([[maybe_unused]] const std::out_of_range& e) {
+            LOG_WARNING(Service_NVDRV, "Cannot remap an unmapped GPU address space region: 0x{:X}",
+                        params.offset);
+            return NvResult::BadValue;
         }
     }
 
-    const auto object{nvmap.GetHandle(params.nvmap_handle)};
-    if (!object) {
-        LOG_CRITICAL(Service_NVDRV, "invalid nvmap_handle={:X}", params.nvmap_handle);
-        std::memcpy(output.data(), &params, output.size());
-        return NvResult::InvalidState;
+    auto handle{nvmap.GetHandle(params.handle)};
+    if (!handle) {
+        return NvResult::BadValue;
     }
 
-    // The real nvservices doesn't make a distinction between handles and ids, and
-    // object can only have one handle and it will be the same as its id. Assert that this is the
-    // case to prevent unexpected behavior.
-    ASSERT(object->id == params.nvmap_handle);
+    VAddr cpu_address{static_cast<VAddr>(handle->address + params.buffer_offset)};
+    u64 size{params.mapping_size ? params.mapping_size : handle->orig_size};
 
-    u64 page_size{params.page_size};
-    if (!page_size) {
-        page_size = object->align;
-    }
+    if ((params.flags & MappingFlags::Fixed) != MappingFlags::None) {
+        auto alloc{allocation_map.upper_bound(params.offset)};
 
-    const auto physical_address{object->address + params.buffer_offset};
-    u64 size{params.mapping_size};
-    if (!size) {
-        size = object->size;
-    }
+        if (alloc-- == allocation_map.begin() ||
+            (params.offset - alloc->first) + size > alloc->second.size) {
+            UNREACHABLE_MSG("Cannot perform a fixed mapping into an unallocated region!");
+            return NvResult::BadValue;
+        }
 
-    const bool is_alloc{(params.flags & AddressSpaceFlags::FixedOffset) == AddressSpaceFlags::None};
-    if (is_alloc) {
-        params.offset = gmmu->MapAllocate(physical_address, size, page_size);
-    } else {
-        params.offset = gmmu->Map(physical_address, params.offset, size);
-    }
+        gmmu->Map(params.offset, cpu_address, size);
 
-    auto result = NvResult::Success;
-    if (!params.offset) {
-        LOG_CRITICAL(Service_NVDRV, "failed to map size={}", size);
-        result = NvResult::InvalidState;
+        auto mapping{std::make_shared<Mapping>(cpu_address, params.offset, size, true, false,
+                                               alloc->second.sparse)};
+        alloc->second.mappings.push_back(mapping);
+        mapping_map[params.offset] = mapping;
     } else {
-        AddBufferMap(params.offset, size, physical_address, is_alloc);
+        bool big_page{[&]() {
+            if (Common::IsAligned(handle->align, vm.big_page_size))
+                return true;
+            else if (Common::IsAligned(handle->align, VM::YUZU_PAGESIZE))
+                return false;
+            else {
+                UNREACHABLE();
+                return false;
+            }
+        }()};
+
+        auto& allocator{big_page ? *vm.big_page_allocator : *vm.small_page_allocator};
+        u32 page_size{big_page ? vm.big_page_size : VM::YUZU_PAGESIZE};
+        u32 page_size_bits{big_page ? vm.big_page_size_bits : VM::PAGE_SIZE_BITS};
+
+        params.offset = static_cast<u64>(allocator.Allocate(
+                            static_cast<u32>(Common::AlignUp(size, page_size) >> page_size_bits)))
+                        << page_size_bits;
+        if (!params.offset) {
+            UNREACHABLE_MSG("Failed to allocate free space in the GPU AS!");
+            return NvResult::InsufficientMemory;
+        }
+
+        gmmu->Map(params.offset, cpu_address, size);
+
+        auto mapping{
+            std::make_shared<Mapping>(cpu_address, params.offset, size, false, big_page, false)};
+        mapping_map[params.offset] = mapping;
     }
 
     std::memcpy(output.data(), &params, output.size());
-    return result;
+    return NvResult::Success;
 }
 
 NvResult nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& output) {
@@ -264,13 +416,36 @@ NvResult nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8
 
     LOG_DEBUG(Service_NVDRV, "called, offset=0x{:X}", params.offset);
 
-    if (const auto size{RemoveBufferMap(params.offset)}; size) {
-        gmmu->Unmap(params.offset, *size);
-    } else {
-        LOG_ERROR(Service_NVDRV, "invalid offset=0x{:X}", params.offset);
+    std::scoped_lock lock(mutex);
+
+    if (!vm.initialised) {
+        return NvResult::BadValue;
+    }
+
+    try {
+        auto mapping{mapping_map.at(params.offset)};
+
+        if (!mapping->fixed) {
+            auto& allocator{mapping->big_page ? *vm.big_page_allocator : *vm.small_page_allocator};
+            u32 page_size_bits{mapping->big_page ? vm.big_page_size_bits : VM::PAGE_SIZE_BITS};
+
+            allocator.Free(static_cast<u32>(mapping->offset >> page_size_bits),
+                           static_cast<u32>(mapping->size >> page_size_bits));
+        }
+
+        // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state
+        // Only FreeSpace can unmap them fully
+        if (mapping->sparse_alloc) {
+            gmmu->MapSparse(params.offset, mapping->size);
+        } else {
+            gmmu->Unmap(params.offset, mapping->size);
+        }
+
+        mapping_map.erase(params.offset);
+    } catch ([[maybe_unused]] const std::out_of_range& e) {
+        LOG_WARNING(Service_NVDRV, "Couldn't find region to unmap at 0x{:X}", params.offset);
     }
 
-    std::memcpy(output.data(), &params, output.size());
     return NvResult::Success;
 }
 
@@ -284,28 +459,37 @@ NvResult nvhost_as_gpu::BindChannel(const std::vector<u8>& input, std::vector<u8
     return NvResult::Success;
 }
 
+void nvhost_as_gpu::GetVARegionsImpl(IoctlGetVaRegions& params) {
+    params.buf_size = 2 * sizeof(VaRegion);
+
+    params.regions = std::array<VaRegion, 2>{
+        VaRegion{
+            .offset = vm.small_page_allocator->vaStart << VM::PAGE_SIZE_BITS,
+            .page_size = VM::YUZU_PAGESIZE,
+            .pages = vm.small_page_allocator->vaLimit - vm.small_page_allocator->vaStart,
+        },
+        VaRegion{
+            .offset = vm.big_page_allocator->vaStart << vm.big_page_size_bits,
+            .page_size = vm.big_page_size,
+            .pages = vm.big_page_allocator->vaLimit - vm.big_page_allocator->vaStart,
+        },
+    };
+}
+
 NvResult nvhost_as_gpu::GetVARegions(const std::vector<u8>& input, std::vector<u8>& output) {
     IoctlGetVaRegions params{};
     std::memcpy(&params, input.data(), input.size());
 
-    LOG_WARNING(Service_NVDRV, "(STUBBED) called, buf_addr={:X}, buf_size={:X}", params.buf_addr,
-                params.buf_size);
-
-    params.buf_size = 0x30;
+    LOG_DEBUG(Service_NVDRV, "called, buf_addr={:X}, buf_size={:X}", params.buf_addr,
+              params.buf_size);
 
-    params.small = IoctlVaRegion{
-        .offset = 0x04000000,
-        .page_size = DEFAULT_SMALL_PAGE_SIZE,
-        .pages = 0x3fbfff,
-    };
+    std::scoped_lock lock(mutex);
 
-    params.big = IoctlVaRegion{
-        .offset = 0x04000000,
-        .page_size = big_page_size,
-        .pages = 0x1bffff,
-    };
+    if (!vm.initialised) {
+        return NvResult::BadValue;
+    }
 
-    // TODO(ogniK): This probably can stay stubbed but should add support way way later
+    GetVARegionsImpl(params);
 
     std::memcpy(output.data(), &params, output.size());
     return NvResult::Success;
@@ -316,64 +500,24 @@ NvResult nvhost_as_gpu::GetVARegions(const std::vector<u8>& input, std::vector<u
     IoctlGetVaRegions params{};
     std::memcpy(&params, input.data(), input.size());
 
-    LOG_WARNING(Service_NVDRV, "(STUBBED) called, buf_addr={:X}, buf_size={:X}", params.buf_addr,
-                params.buf_size);
+    LOG_DEBUG(Service_NVDRV, "called, buf_addr={:X}, buf_size={:X}", params.buf_addr,
+              params.buf_size);
 
-    params.buf_size = 0x30;
-
-    params.small = IoctlVaRegion{
-        .offset = 0x04000000,
-        .page_size = 0x1000,
-        .pages = 0x3fbfff,
-    };
+    std::scoped_lock lock(mutex);
 
-    params.big = IoctlVaRegion{
-        .offset = 0x04000000,
-        .page_size = big_page_size,
-        .pages = 0x1bffff,
-    };
+    if (!vm.initialised) {
+        return NvResult::BadValue;
+    }
 
-    // TODO(ogniK): This probably can stay stubbed but should add support way way later
+    GetVARegionsImpl(params);
 
     std::memcpy(output.data(), &params, output.size());
-    std::memcpy(inline_output.data(), &params.small, sizeof(IoctlVaRegion));
-    std::memcpy(inline_output.data() + sizeof(IoctlVaRegion), &params.big, sizeof(IoctlVaRegion));
+    std::memcpy(inline_output.data(), &params.regions[0], sizeof(VaRegion));
+    std::memcpy(inline_output.data() + sizeof(VaRegion), &params.regions[1], sizeof(VaRegion));
 
     return NvResult::Success;
 }
 
-std::optional<nvhost_as_gpu::BufferMap> nvhost_as_gpu::FindBufferMap(GPUVAddr gpu_addr) const {
-    const auto end{buffer_mappings.upper_bound(gpu_addr)};
-    for (auto iter{buffer_mappings.begin()}; iter != end; ++iter) {
-        if (gpu_addr >= iter->second.StartAddr() && gpu_addr < iter->second.EndAddr()) {
-            return iter->second;
-        }
-    }
-
-    return std::nullopt;
-}
-
-void nvhost_as_gpu::AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr,
-                                 bool is_allocated) {
-    buffer_mappings[gpu_addr] = {gpu_addr, size, cpu_addr, is_allocated};
-}
-
-std::optional<std::size_t> nvhost_as_gpu::RemoveBufferMap(GPUVAddr gpu_addr) {
-    if (const auto iter{buffer_mappings.find(gpu_addr)}; iter != buffer_mappings.end()) {
-        std::size_t size{};
-
-        if (iter->second.IsAllocated()) {
-            size = iter->second.Size();
-        }
-
-        buffer_mappings.erase(iter);
-
-        return size;
-    }
-
-    return std::nullopt;
-}
-
 Kernel::KEvent* nvhost_as_gpu::QueryEvent(u32 event_id) {
     LOG_CRITICAL(Service_NVDRV, "Unknown AS GPU Event {}", event_id);
     return nullptr;
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
index f5fb33ba7..1d27739e2 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
@@ -5,14 +5,19 @@
 
 #pragma once
 
+#include <bit>
+#include <list>
 #include <map>
 #include <memory>
+#include <mutex>
 #include <optional>
 #include <vector>
 
+#include "common/address_space.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "common/swap.h"
+#include "core/hle/service/nvdrv/core/nvmap.h"
 #include "core/hle/service/nvdrv/devices/nvdevice.h"
 
 namespace Tegra {
@@ -30,17 +35,13 @@ class NvMap;
 
 namespace Service::Nvidia::Devices {
 
-constexpr u32 DEFAULT_BIG_PAGE_SIZE = 1 << 16;
-constexpr u32 DEFAULT_SMALL_PAGE_SIZE = 1 << 12;
-
-class nvmap;
-
-enum class AddressSpaceFlags : u32 {
-    None = 0x0,
-    FixedOffset = 0x1,
-    Remap = 0x100,
+enum class MappingFlags : u32 {
+    None = 0,
+    Fixed = 1 << 0,
+    Sparse = 1 << 1,
+    Remap = 1 << 8,
 };
-DECLARE_ENUM_FLAG_OPERATORS(AddressSpaceFlags);
+DECLARE_ENUM_FLAG_OPERATORS(MappingFlags);
 
 class nvhost_as_gpu final : public nvdevice {
 public:
@@ -59,46 +60,15 @@ public:
 
     Kernel::KEvent* QueryEvent(u32 event_id) override;
 
-private:
-    class BufferMap final {
-    public:
-        constexpr BufferMap() = default;
-
-        constexpr BufferMap(GPUVAddr start_addr_, std::size_t size_)
-            : start_addr{start_addr_}, end_addr{start_addr_ + size_} {}
-
-        constexpr BufferMap(GPUVAddr start_addr_, std::size_t size_, VAddr cpu_addr_,
-                            bool is_allocated_)
-            : start_addr{start_addr_}, end_addr{start_addr_ + size_}, cpu_addr{cpu_addr_},
-              is_allocated{is_allocated_} {}
-
-        constexpr VAddr StartAddr() const {
-            return start_addr;
-        }
-
-        constexpr VAddr EndAddr() const {
-            return end_addr;
-        }
-
-        constexpr std::size_t Size() const {
-            return end_addr - start_addr;
-        }
-
-        constexpr VAddr CpuAddr() const {
-            return cpu_addr;
-        }
-
-        constexpr bool IsAllocated() const {
-            return is_allocated;
-        }
-
-    private:
-        GPUVAddr start_addr{};
-        GPUVAddr end_addr{};
-        VAddr cpu_addr{};
-        bool is_allocated{};
+    struct VaRegion {
+        u64 offset;
+        u32 page_size;
+        u32 _pad0_;
+        u64 pages;
     };
+    static_assert(sizeof(VaRegion) == 0x18);
 
+private:
     struct IoctlAllocAsEx {
         u32_le flags{}; // usually passes 1
         s32_le as_fd{}; // ignored; passes 0
@@ -113,7 +83,7 @@ private:
     struct IoctlAllocSpace {
         u32_le pages{};
         u32_le page_size{};
-        AddressSpaceFlags flags{};
+        MappingFlags flags{};
         INSERT_PADDING_WORDS(1);
         union {
             u64_le offset;
@@ -130,19 +100,19 @@ private:
     static_assert(sizeof(IoctlFreeSpace) == 16, "IoctlFreeSpace is incorrect size");
 
     struct IoctlRemapEntry {
-        u16_le flags{};
-        u16_le kind{};
-        u32_le nvmap_handle{};
-        u32_le map_offset{};
-        u32_le offset{};
-        u32_le pages{};
+        u16 flags;
+        u16 kind;
+        NvCore::NvMap::Handle::Id handle;
+        u32 handle_offset_big_pages;
+        u32 as_offset_big_pages;
+        u32 big_pages;
     };
     static_assert(sizeof(IoctlRemapEntry) == 20, "IoctlRemapEntry is incorrect size");
 
     struct IoctlMapBufferEx {
-        AddressSpaceFlags flags{}; // bit0: fixed_offset, bit2: cacheable
-        u32_le kind{};             // -1 is default
-        u32_le nvmap_handle{};
+        MappingFlags flags{}; // bit0: fixed_offset, bit2: cacheable
+        u32_le kind{};        // -1 is default
+        NvCore::NvMap::Handle::Id handle;
         u32_le page_size{}; // 0 means don't care
         s64_le buffer_offset{};
         u64_le mapping_size{};
@@ -160,27 +130,15 @@ private:
     };
     static_assert(sizeof(IoctlBindChannel) == 4, "IoctlBindChannel is incorrect size");
 
-    struct IoctlVaRegion {
-        u64_le offset{};
-        u32_le page_size{};
-        INSERT_PADDING_WORDS(1);
-        u64_le pages{};
-    };
-    static_assert(sizeof(IoctlVaRegion) == 24, "IoctlVaRegion is incorrect size");
-
     struct IoctlGetVaRegions {
         u64_le buf_addr{}; // (contained output user ptr on linux, ignored)
         u32_le buf_size{}; // forced to 2*sizeof(struct va_region)
         u32_le reserved{};
-        IoctlVaRegion small{};
-        IoctlVaRegion big{};
+        std::array<VaRegion, 2> regions{};
     };
-    static_assert(sizeof(IoctlGetVaRegions) == 16 + sizeof(IoctlVaRegion) * 2,
+    static_assert(sizeof(IoctlGetVaRegions) == 16 + sizeof(VaRegion) * 2,
                   "IoctlGetVaRegions is incorrect size");
 
-    s32 channel{};
-    u32 big_page_size{DEFAULT_BIG_PAGE_SIZE};
-
     NvResult AllocAsEx(const std::vector<u8>& input, std::vector<u8>& output);
     NvResult AllocateSpace(const std::vector<u8>& input, std::vector<u8>& output);
     NvResult Remap(const std::vector<u8>& input, std::vector<u8>& output);
@@ -189,23 +147,74 @@ private:
     NvResult FreeSpace(const std::vector<u8>& input, std::vector<u8>& output);
     NvResult BindChannel(const std::vector<u8>& input, std::vector<u8>& output);
 
+    void GetVARegionsImpl(IoctlGetVaRegions& params);
     NvResult GetVARegions(const std::vector<u8>& input, std::vector<u8>& output);
     NvResult GetVARegions(const std::vector<u8>& input, std::vector<u8>& output,
                           std::vector<u8>& inline_output);
 
-    std::optional<BufferMap> FindBufferMap(GPUVAddr gpu_addr) const;
-    void AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr, bool is_allocated);
-    std::optional<std::size_t> RemoveBufferMap(GPUVAddr gpu_addr);
+    void FreeMappingLocked(u64 offset);
 
     Module& module;
 
     NvCore::Container& container;
     NvCore::NvMap& nvmap;
 
+    struct Mapping {
+        VAddr ptr;
+        u64 offset;
+        u64 size;
+        bool fixed;
+        bool big_page; // Only valid if fixed == false
+        bool sparse_alloc;
+
+        Mapping(VAddr ptr_, u64 offset_, u64 size_, bool fixed_, bool big_page_, bool sparse_alloc_)
+            : ptr(ptr_), offset(offset_), size(size_), fixed(fixed_), big_page(big_page_),
+              sparse_alloc(sparse_alloc_) {}
+    };
+
+    struct Allocation {
+        u64 size;
+        std::list<std::shared_ptr<Mapping>> mappings;
+        u32 page_size;
+        bool sparse;
+    };
+
+    std::map<u64, std::shared_ptr<Mapping>>
+        mapping_map; //!< This maps the base addresses of mapped buffers to their total sizes and
+                     //!< mapping type, this is needed as what was originally a single buffer may
+                     //!< have been split into multiple GPU side buffers with the remap flag.
+    std::map<u64, Allocation> allocation_map; //!< Holds allocations created by AllocSpace from
+                                              //!< which fixed buffers can be mapped into
+    std::mutex mutex;                         //!< Locks all AS operations
+
+    struct VM {
+        static constexpr u32 YUZU_PAGESIZE{0x1000};
+        static constexpr u32 PAGE_SIZE_BITS{std::countr_zero(YUZU_PAGESIZE)};
+
+        static constexpr u32 SUPPORTED_BIG_PAGE_SIZES{0x30000};
+        static constexpr u32 DEFAULT_BIG_PAGE_SIZE{0x20000};
+        u32 big_page_size{DEFAULT_BIG_PAGE_SIZE};
+        u32 big_page_size_bits{std::countr_zero(DEFAULT_BIG_PAGE_SIZE)};
+
+        static constexpr u32 VA_START_SHIFT{10};
+        static constexpr u64 DEFAULT_VA_SPLIT{1ULL << 34};
+        static constexpr u64 DEFAULT_VA_RANGE{1ULL << 37};
+        u64 va_range_start{DEFAULT_BIG_PAGE_SIZE << VA_START_SHIFT};
+        u64 va_range_split{DEFAULT_VA_SPLIT};
+        u64 va_range_end{DEFAULT_VA_RANGE};
+
+        using Allocator = Common::FlatAllocator<u32, 0, 32>;
+
+        std::unique_ptr<Allocator> big_page_allocator;
+        std::shared_ptr<Allocator>
+            small_page_allocator; //! Shared as this is also used by nvhost::GpuChannel
+
+        bool initialised{};
+    } vm;
     std::shared_ptr<Tegra::MemoryManager> gmmu;
 
-    // This is expected to be ordered, therefore we must use a map, not unordered_map
-    std::map<GPUVAddr, BufferMap> buffer_mappings;
+    // s32 channel{};
+    // u32 big_page_size{VM::DEFAULT_BIG_PAGE_SIZE};
 };
 
 } // namespace Service::Nvidia::Devices
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 9e946d448..fc68bcc73 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -71,18 +71,22 @@ void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_)
     rasterizer = rasterizer_;
 }
 
-GPUVAddr MemoryManager::Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size) {
+GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size) {
     return PageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size);
 }
 
+GPUVAddr MemoryManager::MapSparse(GPUVAddr gpu_addr, std::size_t size) {
+    return PageTableOp<EntryType::Reserved>(gpu_addr, 0, size);
+}
+
 GPUVAddr MemoryManager::MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align) {
-    return Map(cpu_addr, *FindFreeRange(size, align), size);
+    return Map(*FindFreeRange(size, align), cpu_addr, size);
 }
 
 GPUVAddr MemoryManager::MapAllocate32(VAddr cpu_addr, std::size_t size) {
     const std::optional<GPUVAddr> gpu_addr = FindFreeRange(size, 1, true);
     ASSERT(gpu_addr);
-    return Map(cpu_addr, *gpu_addr, size);
+    return Map(*gpu_addr, cpu_addr, size);
 }
 
 void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 0a763fd19..b8878476a 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -88,7 +88,8 @@ public:
     std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr,
                                                                     std::size_t size) const;
 
-    [[nodiscard]] GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size);
+    GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size);
+    GPUVAddr MapSparse(GPUVAddr gpu_addr, std::size_t size);
     [[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align);
     [[nodiscard]] GPUVAddr MapAllocate32(VAddr cpu_addr, std::size_t size);
     [[nodiscard]] std::optional<GPUVAddr> AllocateFixed(GPUVAddr gpu_addr, std::size_t size);
-- 
cgit v1.2.3


From e462191482c6507daed67802c6c1d2c50f10c96e Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 17 Dec 2021 16:45:06 +0100
Subject: Refactor VideoCore to use AS sepparate from Channel.

---
 src/common/hash.h                                 |   7 ++
 src/video_core/CMakeLists.txt                     |   1 +
 src/video_core/control/channel_state_cache.cpp    |   8 +-
 src/video_core/control/channel_state_cache.h      |  28 ++++-
 src/video_core/control/channel_state_cache.inc    |  23 +++-
 src/video_core/memory_manager.cpp                 |   5 +-
 src/video_core/memory_manager.h                   |   9 ++
 src/video_core/texture_cache/texture_cache.cpp    |  16 +++
 src/video_core/texture_cache/texture_cache.h      | 130 ++++++++--------------
 src/video_core/texture_cache/texture_cache_base.h |  96 +++++++---------
 10 files changed, 171 insertions(+), 152 deletions(-)
 create mode 100644 src/video_core/texture_cache/texture_cache.cpp

(limited to 'src/common')

diff --git a/src/common/hash.h b/src/common/hash.h
index b6f3e6d6f..e8fe78b07 100644
--- a/src/common/hash.h
+++ b/src/common/hash.h
@@ -18,4 +18,11 @@ struct PairHash {
     }
 };
 
+template <typename T>
+struct IdentityHash {
+    [[nodiscard]] size_t operator()(T value) const noexcept {
+        return static_cast<size_t>(value);
+    }
+};
+
 } // namespace Common
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index e216c51a2..35faa70a0 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -203,6 +203,7 @@ add_library(video_core STATIC
     texture_cache/render_targets.h
     texture_cache/samples_helper.h
     texture_cache/slot_vector.h
+    texture_cache/texture_cache.cpp
     texture_cache/texture_cache.h
     texture_cache/texture_cache_base.h
     texture_cache/types.h
diff --git a/src/video_core/control/channel_state_cache.cpp b/src/video_core/control/channel_state_cache.cpp
index f72a97b2f..ec7ba907c 100644
--- a/src/video_core/control/channel_state_cache.cpp
+++ b/src/video_core/control/channel_state_cache.cpp
@@ -1,5 +1,11 @@
 #include "video_core/control/channel_state_cache.inc"
 
 namespace VideoCommon {
+
+ChannelInfo::ChannelInfo(Tegra::Control::ChannelState& channel_state)
+    : maxwell3d{*channel_state.maxwell_3d}, kepler_compute{*channel_state.kepler_compute},
+      gpu_memory{*channel_state.memory_manager} {}
+
 template class VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo>;
-}
+
+} // namespace VideoCommon
diff --git a/src/video_core/control/channel_state_cache.h b/src/video_core/control/channel_state_cache.h
index c8298c003..c51040c83 100644
--- a/src/video_core/control/channel_state_cache.h
+++ b/src/video_core/control/channel_state_cache.h
@@ -2,6 +2,7 @@
 
 #include <deque>
 #include <limits>
+#include <mutex>
 #include <unordered_map>
 
 #include "common/common_types.h"
@@ -41,9 +42,10 @@ template <class P>
 class ChannelSetupCaches {
 public:
     /// Operations for seting the channel of execution.
+    virtual ~ChannelSetupCaches();
 
     /// Create channel state.
-    void CreateChannel(Tegra::Control::ChannelState& channel);
+    virtual void CreateChannel(Tegra::Control::ChannelState& channel);
 
     /// Bind a channel for execution.
     void BindToChannel(s32 id);
@@ -51,18 +53,34 @@ public:
     /// Erase channel's state.
     void EraseChannel(s32 id);
 
+    Tegra::MemoryManager* GetFromID(size_t id) const {
+        std::unique_lock<std::mutex> lk(config_mutex);
+        const auto ref = address_spaces.find(id);
+        return ref->second.gpu_memory;
+    }
+
 protected:
     static constexpr size_t UNSET_CHANNEL{std::numeric_limits<size_t>::max()};
 
-    std::deque<P> channel_storage;
-    std::deque<size_t> free_channel_ids;
-    std::unordered_map<s32, size_t> channel_map;
-
     P* channel_state;
     size_t current_channel_id{UNSET_CHANNEL};
+    size_t current_address_space{};
     Tegra::Engines::Maxwell3D* maxwell3d;
     Tegra::Engines::KeplerCompute* kepler_compute;
     Tegra::MemoryManager* gpu_memory;
+
+    std::deque<P> channel_storage;
+    std::deque<size_t> free_channel_ids;
+    std::unordered_map<s32, size_t> channel_map;
+    struct AddresSpaceRef {
+        size_t ref_count;
+        size_t storage_id;
+        Tegra::MemoryManager* gpu_memory;
+    };
+    std::unordered_map<size_t, AddresSpaceRef> address_spaces;
+    mutable std::mutex config_mutex;
+
+    virtual void OnGPUASRegister([[maybe_unused]] size_t map_id) {}
 };
 
 } // namespace VideoCommon
diff --git a/src/video_core/control/channel_state_cache.inc b/src/video_core/control/channel_state_cache.inc
index 3eb73af9f..185eabc35 100644
--- a/src/video_core/control/channel_state_cache.inc
+++ b/src/video_core/control/channel_state_cache.inc
@@ -6,18 +6,18 @@
 
 namespace VideoCommon {
 
-ChannelInfo::ChannelInfo(Tegra::Control::ChannelState& channel_state)
-    : maxwell3d{*channel_state.maxwell_3d}, kepler_compute{*channel_state.kepler_compute},
-      gpu_memory{*channel_state.memory_manager} {}
+template <class P>
+ChannelSetupCaches<P>::~ChannelSetupCaches() = default;
 
 template <class P>
 void ChannelSetupCaches<P>::CreateChannel(struct Tegra::Control::ChannelState& channel) {
+    std::unique_lock<std::mutex> lk(config_mutex);
     ASSERT(channel_map.find(channel.bind_id) == channel_map.end() && channel.bind_id >= 0);
     auto new_id = [this, &channel]() {
         if (!free_channel_ids.empty()) {
             auto id = free_channel_ids.front();
             free_channel_ids.pop_front();
-            new (&channel_storage[id]) ChannelInfo(channel);
+            new (&channel_storage[id]) P(channel);
             return id;
         }
         channel_storage.emplace_back(channel);
@@ -27,11 +27,24 @@ void ChannelSetupCaches<P>::CreateChannel(struct Tegra::Control::ChannelState& c
     if (current_channel_id != UNSET_CHANNEL) {
         channel_state = &channel_storage[current_channel_id];
     }
+    auto as_it = address_spaces.find(channel.memory_manager->GetID());
+    if (as_it != address_spaces.end()) {
+      as_it->second.ref_count++;
+      return;
+    }
+    AddresSpaceRef new_gpu_mem_ref{
+      .ref_count = 1,
+      .storage_id = address_spaces.size(),
+      .gpu_memory = channel.memory_manager.get(),
+    };
+    address_spaces.emplace(channel.memory_manager->GetID(), new_gpu_mem_ref);
+    OnGPUASRegister(channel.memory_manager->GetID());
 }
 
 /// Bind a channel for execution.
 template <class P>
 void ChannelSetupCaches<P>::BindToChannel(s32 id) {
+    std::unique_lock<std::mutex> lk(config_mutex);
     auto it = channel_map.find(id);
     ASSERT(it != channel_map.end() && id >= 0);
     current_channel_id = it->second;
@@ -39,11 +52,13 @@ void ChannelSetupCaches<P>::BindToChannel(s32 id) {
     maxwell3d = &channel_state->maxwell3d;
     kepler_compute = &channel_state->kepler_compute;
     gpu_memory = &channel_state->gpu_memory;
+    current_address_space = gpu_memory->GetID();
 }
 
 /// Erase channel's channel_state.
 template <class P>
 void ChannelSetupCaches<P>::EraseChannel(s32 id) {
+    std::unique_lock<std::mutex> lk(config_mutex);
     const auto it = channel_map.find(id);
     ASSERT(it != channel_map.end() && id >= 0);
     const auto this_id = it->second;
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index fc68bcc73..d4c0dca78 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -16,9 +16,12 @@
 
 namespace Tegra {
 
+std::atomic<size_t> MemoryManager::unique_identifier_generator{};
+
 MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 page_bits_)
     : system{system_}, address_space_bits{address_space_bits_}, page_bits{page_bits_}, entries{},
-      page_table{address_space_bits, address_space_bits + page_bits - 38, page_bits} {
+      page_table{address_space_bits, address_space_bits + page_bits - 38, page_bits},
+      unique_identifier{unique_identifier_generator.fetch_add(1, std::memory_order_acq_rel)} {
     address_space_size = 1ULL << address_space_bits;
     allocate_start = address_space_bits > 32 ? 1ULL << 32 : 0;
     page_size = 1ULL << page_bits;
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index b8878476a..56604ef3e 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -3,6 +3,7 @@
 
 #pragma once
 
+#include <atomic>
 #include <map>
 #include <optional>
 #include <vector>
@@ -26,6 +27,10 @@ public:
                            u64 page_bits_ = 16);
     ~MemoryManager();
 
+    size_t GetID() const {
+        return unique_identifier;
+    }
+
     /// Binds a renderer to the memory manager.
     void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
 
@@ -140,6 +145,10 @@ private:
     void SetEntry(size_t position, EntryType entry);
 
     Common::MultiLevelPageTable<u32> page_table;
+
+    const size_t unique_identifier;
+
+    static std::atomic<size_t> unique_identifier_generator;
 };
 
 } // namespace Tegra
diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp
new file mode 100644
index 000000000..bc905a1a4
--- /dev/null
+++ b/src/video_core/texture_cache/texture_cache.cpp
@@ -0,0 +1,16 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv3 or any later version
+// Refer to the license.txt file included.
+
+#include "video_core/control/channel_state_cache.inc"
+#include "video_core/texture_cache/texture_cache_base.h"
+
+namespace VideoCommon {
+
+TextureCacheChannelInfo::TextureCacheChannelInfo(Tegra::Control::ChannelState& state) noexcept
+    : ChannelInfo(state), graphics_image_table{gpu_memory}, graphics_sampler_table{gpu_memory},
+      compute_image_table{gpu_memory}, compute_sampler_table{gpu_memory} {}
+
+template class VideoCommon::ChannelSetupCaches<VideoCommon::TextureCacheChannelInfo>;
+
+} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 0ec999d63..89c5faf88 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -1,5 +1,7 @@
-// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
+// SPDX-FileCopyrightText: 2021 yuzu emulator team
+// (https://github.com/skyline-emu/)
+// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3
+// or any later version Refer to the license.txt file included.
 
 #pragma once
 
@@ -41,10 +43,6 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
 
     // Setup channels
     current_channel_id = UNSET_CHANNEL;
-    state = nullptr;
-    maxwell3d = nullptr;
-    kepler_compute = nullptr;
-    gpu_memory = nullptr;
 
     // Make sure the first index is reserved for the null resources
     // This way the null resource becomes a compile time constant
@@ -156,23 +154,24 @@ void TextureCache<P>::MarkModification(ImageId id) noexcept {
 template <class P>
 template <bool has_blacklists>
 void TextureCache<P>::FillGraphicsImageViews(std::span<ImageViewInOut> views) {
-    FillImageViews<has_blacklists>(state->graphics_image_table, state->graphics_image_view_ids,
-                                   views);
+    FillImageViews<has_blacklists>(channel_state->graphics_image_table,
+                                   channel_state->graphics_image_view_ids, views);
 }
 
 template <class P>
 void TextureCache<P>::FillComputeImageViews(std::span<ImageViewInOut> views) {
-    FillImageViews<true>(state->compute_image_table, state->compute_image_view_ids, views);
+    FillImageViews<true>(channel_state->compute_image_table, channel_state->compute_image_view_ids,
+                         views);
 }
 
 template <class P>
 typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
-    if (index > state->graphics_sampler_table.Limit()) {
+    if (index > channel_state->graphics_sampler_table.Limit()) {
         LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
         return &slot_samplers[NULL_SAMPLER_ID];
     }
-    const auto [descriptor, is_new] = state->graphics_sampler_table.Read(index);
-    SamplerId& id = state->graphics_sampler_ids[index];
+    const auto [descriptor, is_new] = channel_state->graphics_sampler_table.Read(index);
+    SamplerId& id = channel_state->graphics_sampler_ids[index];
     if (is_new) {
         id = FindSampler(descriptor);
     }
@@ -181,12 +180,12 @@ typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
 
 template <class P>
 typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
-    if (index > state->compute_sampler_table.Limit()) {
+    if (index > channel_state->compute_sampler_table.Limit()) {
         LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
         return &slot_samplers[NULL_SAMPLER_ID];
     }
-    const auto [descriptor, is_new] = state->compute_sampler_table.Read(index);
-    SamplerId& id = state->compute_sampler_ids[index];
+    const auto [descriptor, is_new] = channel_state->compute_sampler_table.Read(index);
+    SamplerId& id = channel_state->compute_sampler_ids[index];
     if (is_new) {
         id = FindSampler(descriptor);
     }
@@ -199,11 +198,12 @@ void TextureCache<P>::SynchronizeGraphicsDescriptors() {
     const bool linked_tsc = maxwell3d->regs.sampler_index == SamplerIndex::ViaHeaderIndex;
     const u32 tic_limit = maxwell3d->regs.tic.limit;
     const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d->regs.tsc.limit;
-    if (state->graphics_sampler_table.Synchornize(maxwell3d->regs.tsc.Address(), tsc_limit)) {
-        state->graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
+    if (channel_state->graphics_sampler_table.Synchornize(maxwell3d->regs.tsc.Address(),
+                                                          tsc_limit)) {
+        channel_state->graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
     }
-    if (state->graphics_image_table.Synchornize(maxwell3d->regs.tic.Address(), tic_limit)) {
-        state->graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
+    if (channel_state->graphics_image_table.Synchornize(maxwell3d->regs.tic.Address(), tic_limit)) {
+        channel_state->graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
     }
 }
 
@@ -213,11 +213,12 @@ void TextureCache<P>::SynchronizeComputeDescriptors() {
     const u32 tic_limit = kepler_compute->regs.tic.limit;
     const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute->regs.tsc.limit;
     const GPUVAddr tsc_gpu_addr = kepler_compute->regs.tsc.Address();
-    if (state->compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) {
-        state->compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
+    if (channel_state->compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) {
+        channel_state->compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
     }
-    if (state->compute_image_table.Synchornize(kepler_compute->regs.tic.Address(), tic_limit)) {
-        state->compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
+    if (channel_state->compute_image_table.Synchornize(kepler_compute->regs.tic.Address(),
+                                                       tic_limit)) {
+        channel_state->compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
     }
 }
 
@@ -738,7 +739,7 @@ ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) {
     if (!IsValidEntry(*gpu_memory, config)) {
         return NULL_IMAGE_VIEW_ID;
     }
-    const auto [pair, is_new] = state->image_views.try_emplace(config);
+    const auto [pair, is_new] = channel_state->image_views.try_emplace(config);
     ImageViewId& image_view_id = pair->second;
     if (is_new) {
         image_view_id = CreateImageView(config);
@@ -1198,7 +1199,7 @@ SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {
     if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) {
         return NULL_SAMPLER_ID;
     }
-    const auto [pair, is_new] = state->samplers.try_emplace(config);
+    const auto [pair, is_new] = channel_state->samplers.try_emplace(config);
     if (is_new) {
         pair->second = slot_samplers.insert(runtime, config);
     }
@@ -1327,8 +1328,8 @@ void TextureCache<P>::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Fu
     static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
     boost::container::small_vector<ImageId, 8> images;
     ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) {
-        const auto it = state->gpu_page_table.find(page);
-        if (it == state->gpu_page_table.end()) {
+        const auto it = channel_state->gpu_page_table->find(page);
+        if (it == channel_state->gpu_page_table->end()) {
             if constexpr (BOOL_BREAK) {
                 return false;
             } else {
@@ -1454,8 +1455,9 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
     }
     image.lru_index = lru_cache.Insert(image_id, frame_tick);
 
-    ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
-                   [this, image_id](u64 page) { state->gpu_page_table[page].push_back(image_id); });
+    ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, image_id](u64 page) {
+        (*channel_state->gpu_page_table)[page].push_back(image_id);
+    });
     if (False(image.flags & ImageFlagBits::Sparse)) {
         auto map_id =
             slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id);
@@ -1486,9 +1488,9 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
     image.flags &= ~ImageFlagBits::BadOverlap;
     lru_cache.Free(image.lru_index);
     const auto& clear_page_table =
-        [this, image_id](
-            u64 page,
-            std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>>& selected_page_table) {
+        [this, image_id](u64 page,
+                         std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>>&
+                             selected_page_table) {
             const auto page_it = selected_page_table.find(page);
             if (page_it == selected_page_table.end()) {
                 ASSERT_MSG(false, "Unregistering unregistered page=0x{:x}", page << YUZU_PAGEBITS);
@@ -1504,7 +1506,7 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
             image_ids.erase(vector_it);
         };
     ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) {
-        clear_page_table(page, state->gpu_page_table);
+        clear_page_table(page, (*channel_state->gpu_page_table));
     });
     if (False(image.flags & ImageFlagBits::Sparse)) {
         const auto map_id = image.map_view_id;
@@ -1701,11 +1703,11 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
 
 template <class P>
 void TextureCache<P>::RemoveImageViewReferences(std::span<const ImageViewId> removed_views) {
-    auto it = state->image_views.begin();
-    while (it != state->image_views.end()) {
+    auto it = channel_state->image_views.begin();
+    while (it != channel_state->image_views.end()) {
         const auto found = std::ranges::find(removed_views, it->second);
         if (found != removed_views.end()) {
-            it = state->image_views.erase(it);
+            it = channel_state->image_views.erase(it);
         } else {
             ++it;
         }
@@ -1967,61 +1969,19 @@ bool TextureCache<P>::IsFullClear(ImageViewId id) {
            scissor.max_y >= size.height;
 }
 
-template <class P>
-TextureCache<P>::ChannelInfo::ChannelInfo(Tegra::Control::ChannelState& state) noexcept
-    : maxwell3d{*state.maxwell_3d}, kepler_compute{*state.kepler_compute},
-      gpu_memory{*state.memory_manager}, graphics_image_table{gpu_memory},
-      graphics_sampler_table{gpu_memory}, compute_image_table{gpu_memory}, compute_sampler_table{
-                                                                               gpu_memory} {}
-
 template <class P>
 void TextureCache<P>::CreateChannel(struct Tegra::Control::ChannelState& channel) {
-    ASSERT(channel_map.find(channel.bind_id) == channel_map.end() && channel.bind_id >= 0);
-    auto new_id = [this, &channel]() {
-        if (!free_channel_ids.empty()) {
-            auto id = free_channel_ids.front();
-            free_channel_ids.pop_front();
-            new (&channel_storage[id]) ChannelInfo(channel);
-            return id;
-        }
-        channel_storage.emplace_back(channel);
-        return channel_storage.size() - 1;
-    }();
-    channel_map.emplace(channel.bind_id, new_id);
-    if (current_channel_id != UNSET_CHANNEL) {
-        state = &channel_storage[current_channel_id];
-    }
+    VideoCommon::ChannelSetupCaches<TextureCacheChannelInfo>::CreateChannel(channel);
+    const auto it = channel_map.find(channel.bind_id);
+    auto* this_state = &channel_storage[it->second];
+    const auto& this_as_ref = address_spaces[channel.memory_manager->GetID()];
+    this_state->gpu_page_table = &gpu_page_table_storage[this_as_ref.storage_id];
 }
 
 /// Bind a channel for execution.
 template <class P>
-void TextureCache<P>::BindToChannel(s32 id) {
-    auto it = channel_map.find(id);
-    ASSERT(it != channel_map.end() && id >= 0);
-    current_channel_id = it->second;
-    state = &channel_storage[current_channel_id];
-    maxwell3d = &state->maxwell3d;
-    kepler_compute = &state->kepler_compute;
-    gpu_memory = &state->gpu_memory;
-}
-
-/// Erase channel's state.
-template <class P>
-void TextureCache<P>::EraseChannel(s32 id) {
-    const auto it = channel_map.find(id);
-    ASSERT(it != channel_map.end() && id >= 0);
-    const auto this_id = it->second;
-    free_channel_ids.push_back(this_id);
-    channel_map.erase(it);
-    if (this_id == current_channel_id) {
-        current_channel_id = UNSET_CHANNEL;
-        state = nullptr;
-        maxwell3d = nullptr;
-        kepler_compute = nullptr;
-        gpu_memory = nullptr;
-    } else if (current_channel_id != UNSET_CHANNEL) {
-        state = &channel_storage[current_channel_id];
-    }
+void TextureCache<P>::OnGPUASRegister([[maybe_unused]] size_t map_id) {
+    gpu_page_table_storage.emplace_back();
 }
 
 } // namespace VideoCommon
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index 69efcb718..b24968b03 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -1,5 +1,7 @@
-// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
+// SPDX-FileCopyrightText: 2021 yuzu emulator team
+// (https://github.com/skyline-emu/)
+// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3
+// or any later version Refer to the license.txt file included.
 
 #pragma once
 
@@ -13,9 +15,11 @@
 #include <queue>
 
 #include "common/common_types.h"
+#include "common/hash.h"
 #include "common/literals.h"
 #include "common/lru_cache.h"
 #include "video_core/compatible_formats.h"
+#include "video_core/control/channel_state_cache.h"
 #include "video_core/delayed_destruction_ring.h"
 #include "video_core/engines/fermi_2d.h"
 #include "video_core/surface.h"
@@ -50,8 +54,35 @@ struct ImageViewInOut {
     ImageViewId id{};
 };
 
+using TextureCacheGPUMap = std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>>;
+
+class TextureCacheChannelInfo : public ChannelInfo {
+public:
+    TextureCacheChannelInfo() = delete;
+    TextureCacheChannelInfo(Tegra::Control::ChannelState& state) noexcept;
+    TextureCacheChannelInfo(const TextureCacheChannelInfo& state) = delete;
+    TextureCacheChannelInfo& operator=(const TextureCacheChannelInfo&) = delete;
+    TextureCacheChannelInfo(TextureCacheChannelInfo&& other) noexcept = default;
+    TextureCacheChannelInfo& operator=(TextureCacheChannelInfo&& other) noexcept = default;
+
+    DescriptorTable<TICEntry> graphics_image_table{gpu_memory};
+    DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory};
+    std::vector<SamplerId> graphics_sampler_ids;
+    std::vector<ImageViewId> graphics_image_view_ids;
+
+    DescriptorTable<TICEntry> compute_image_table{gpu_memory};
+    DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory};
+    std::vector<SamplerId> compute_sampler_ids;
+    std::vector<ImageViewId> compute_image_view_ids;
+
+    std::unordered_map<TICEntry, ImageViewId> image_views;
+    std::unordered_map<TSCEntry, SamplerId> samplers;
+
+    TextureCacheGPUMap* gpu_page_table;
+};
+
 template <class P>
-class TextureCache {
+class TextureCache : public VideoCommon::ChannelSetupCaches<TextureCacheChannelInfo> {
     /// Address shift for caching images into a hash table
     static constexpr u64 YUZU_PAGEBITS = 20;
 
@@ -85,13 +116,6 @@ class TextureCache {
         PixelFormat src_format;
     };
 
-    template <typename T>
-    struct IdentityHash {
-        [[nodiscard]] size_t operator()(T value) const noexcept {
-            return static_cast<size_t>(value);
-        }
-    };
-
 public:
     explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&);
 
@@ -179,13 +203,7 @@ public:
     [[nodiscard]] bool IsRescaling(const ImageViewBase& image_view) const noexcept;
 
     /// Create channel state.
-    void CreateChannel(struct Tegra::Control::ChannelState& channel);
-
-    /// Bind a channel for execution.
-    void BindToChannel(s32 id);
-
-    /// Erase channel's state.
-    void EraseChannel(s32 id);
+    void CreateChannel(Tegra::Control::ChannelState& channel) final override;
 
     std::mutex mutex;
 
@@ -221,6 +239,8 @@ private:
         }
     }
 
+    void OnGPUASRegister(size_t map_id) final override;
+
     /// Runs the Garbage Collector.
     void RunGarbageCollector();
 
@@ -355,51 +375,15 @@ private:
 
     Runtime& runtime;
 
-    struct ChannelInfo {
-        ChannelInfo() = delete;
-        ChannelInfo(struct Tegra::Control::ChannelState& state) noexcept;
-        ChannelInfo(const ChannelInfo& state) = delete;
-        ChannelInfo& operator=(const ChannelInfo&) = delete;
-        ChannelInfo(ChannelInfo&& other) noexcept = default;
-        ChannelInfo& operator=(ChannelInfo&& other) noexcept = default;
-
-        Tegra::Engines::Maxwell3D& maxwell3d;
-        Tegra::Engines::KeplerCompute& kepler_compute;
-        Tegra::MemoryManager& gpu_memory;
-
-        DescriptorTable<TICEntry> graphics_image_table{gpu_memory};
-        DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory};
-        std::vector<SamplerId> graphics_sampler_ids;
-        std::vector<ImageViewId> graphics_image_view_ids;
-
-        DescriptorTable<TICEntry> compute_image_table{gpu_memory};
-        DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory};
-        std::vector<SamplerId> compute_sampler_ids;
-        std::vector<ImageViewId> compute_image_view_ids;
-
-        std::unordered_map<TICEntry, ImageViewId> image_views;
-        std::unordered_map<TSCEntry, SamplerId> samplers;
-
-        std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table;
-    };
-
-    std::deque<ChannelInfo> channel_storage;
-    std::deque<size_t> free_channel_ids;
-    std::unordered_map<s32, size_t> channel_map;
-
-    ChannelInfo* state;
-    size_t current_channel_id{UNSET_CHANNEL};
     VideoCore::RasterizerInterface& rasterizer;
-    Tegra::Engines::Maxwell3D* maxwell3d;
-    Tegra::Engines::KeplerCompute* kepler_compute;
-    Tegra::MemoryManager* gpu_memory;
+    std::deque<TextureCacheGPUMap> gpu_page_table_storage;
 
     RenderTargets render_targets;
 
     std::unordered_map<RenderTargets, FramebufferId> framebuffers;
 
-    std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table;
-    std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> sparse_page_table;
+    std::unordered_map<u64, std::vector<ImageMapId>, Common::IdentityHash<u64>> page_table;
+    std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>> sparse_page_table;
     std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views;
 
     VAddr virtual_invalid_space{};
-- 
cgit v1.2.3


From 4d60410dd979fb688de7735d2b4b25a557bdeac7 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sat, 5 Feb 2022 18:15:26 +0100
Subject: MemoryManager: initial multi paging system implementation.

---
 src/common/multi_level_page_table.inc              |   3 +
 .../hle/service/nvdrv/devices/nvhost_as_gpu.cpp    |  45 ++-
 src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h |   1 +
 src/core/hle/service/nvdrv/devices/nvmap.cpp       |  10 +
 src/video_core/memory_manager.cpp                  | 436 +++++++++++++--------
 src/video_core/memory_manager.h                    |  57 ++-
 6 files changed, 343 insertions(+), 209 deletions(-)

(limited to 'src/common')

diff --git a/src/common/multi_level_page_table.inc b/src/common/multi_level_page_table.inc
index 7fbcb908a..9a68cad93 100644
--- a/src/common/multi_level_page_table.inc
+++ b/src/common/multi_level_page_table.inc
@@ -19,6 +19,9 @@ MultiLevelPageTable<BaseAddr>::MultiLevelPageTable(std::size_t address_space_bit
                                                    std::size_t page_bits_)
     : address_space_bits{address_space_bits_},
       first_level_bits{first_level_bits_}, page_bits{page_bits_} {
+    if (page_bits == 0) {
+      return;
+    }
     first_level_shift = address_space_bits - first_level_bits;
     first_level_chunk_size = (1ULL << (first_level_shift - page_bits)) * sizeof(BaseAddr);
     alloc_size = (1ULL << (address_space_bits - page_bits)) * sizeof(BaseAddr);
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
index db2a6c3b2..d95a88393 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -133,7 +133,8 @@ NvResult nvhost_as_gpu::AllocAsEx(const std::vector<u8>& input, std::vector<u8>&
     const u64 end_big_pages{(vm.va_range_end - vm.va_range_split) >> vm.big_page_size_bits};
     vm.big_page_allocator = std::make_unique<VM::Allocator>(start_big_pages, end_big_pages);
 
-    gmmu = std::make_shared<Tegra::MemoryManager>(system, 40, VM::PAGE_SIZE_BITS);
+    gmmu = std::make_shared<Tegra::MemoryManager>(system, 40, vm.big_page_size_bits,
+                                                  VM::PAGE_SIZE_BITS);
     system.GPU().InitAddressSpace(*gmmu);
     vm.initialised = true;
 
@@ -189,6 +190,7 @@ NvResult nvhost_as_gpu::AllocateSpace(const std::vector<u8>& input, std::vector<
         .size = size,
         .page_size = params.page_size,
         .sparse = (params.flags & MappingFlags::Sparse) != MappingFlags::None,
+        .big_pages = params.page_size != VM::YUZU_PAGESIZE,
     };
 
     std::memcpy(output.data(), &params, output.size());
@@ -209,7 +211,7 @@ void nvhost_as_gpu::FreeMappingLocked(u64 offset) {
     // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state
     // Only FreeSpace can unmap them fully
     if (mapping->sparse_alloc)
-        gmmu->MapSparse(offset, mapping->size);
+        gmmu->MapSparse(offset, mapping->size, mapping->big_page);
     else
         gmmu->Unmap(offset, mapping->size);
 
@@ -294,8 +296,9 @@ NvResult nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& out
             return NvResult::BadValue;
         }
 
+        const bool use_big_pages = alloc->second.big_pages;
         if (!entry.handle) {
-            gmmu->MapSparse(virtual_address, size);
+            gmmu->MapSparse(virtual_address, size, use_big_pages);
         } else {
             auto handle{nvmap.GetHandle(entry.handle)};
             if (!handle) {
@@ -306,7 +309,7 @@ NvResult nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& out
                 handle->address +
                 (static_cast<u64>(entry.handle_offset_big_pages) << vm.big_page_size_bits))};
 
-            gmmu->Map(virtual_address, cpu_address, size);
+            gmmu->Map(virtual_address, cpu_address, size, use_big_pages);
         }
     }
 
@@ -345,7 +348,7 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8
             u64 gpu_address{static_cast<u64>(params.offset + params.buffer_offset)};
             VAddr cpu_address{mapping->ptr + params.buffer_offset};
 
-            gmmu->Map(gpu_address, cpu_address, params.mapping_size);
+            gmmu->Map(gpu_address, cpu_address, params.mapping_size, mapping->big_page);
 
             return NvResult::Success;
         } catch ([[maybe_unused]] const std::out_of_range& e) {
@@ -363,6 +366,17 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8
     VAddr cpu_address{static_cast<VAddr>(handle->address + params.buffer_offset)};
     u64 size{params.mapping_size ? params.mapping_size : handle->orig_size};
 
+    bool big_page{[&]() {
+        if (Common::IsAligned(handle->align, vm.big_page_size))
+            return true;
+        else if (Common::IsAligned(handle->align, VM::YUZU_PAGESIZE))
+            return false;
+        else {
+            UNREACHABLE();
+            return false;
+        }
+    }()};
+
     if ((params.flags & MappingFlags::Fixed) != MappingFlags::None) {
         auto alloc{allocation_map.upper_bound(params.offset)};
 
@@ -372,23 +386,14 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8
             return NvResult::BadValue;
         }
 
-        gmmu->Map(params.offset, cpu_address, size);
+        const bool use_big_pages = alloc->second.big_pages && big_page;
+        gmmu->Map(params.offset, cpu_address, size, use_big_pages);
 
-        auto mapping{std::make_shared<Mapping>(cpu_address, params.offset, size, true, false,
-                                               alloc->second.sparse)};
+        auto mapping{std::make_shared<Mapping>(cpu_address, params.offset, size, true,
+                                               use_big_pages, alloc->second.sparse)};
         alloc->second.mappings.push_back(mapping);
         mapping_map[params.offset] = mapping;
     } else {
-        bool big_page{[&]() {
-            if (Common::IsAligned(handle->align, vm.big_page_size))
-                return true;
-            else if (Common::IsAligned(handle->align, VM::YUZU_PAGESIZE))
-                return false;
-            else {
-                UNREACHABLE();
-                return false;
-            }
-        }()};
 
         auto& allocator{big_page ? *vm.big_page_allocator : *vm.small_page_allocator};
         u32 page_size{big_page ? vm.big_page_size : VM::YUZU_PAGESIZE};
@@ -402,7 +407,7 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8
             return NvResult::InsufficientMemory;
         }
 
-        gmmu->Map(params.offset, cpu_address, size);
+        gmmu->Map(params.offset, cpu_address, Common::AlignUp(size, page_size), big_page);
 
         auto mapping{
             std::make_shared<Mapping>(cpu_address, params.offset, size, false, big_page, false)};
@@ -439,7 +444,7 @@ NvResult nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8
         // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state
         // Only FreeSpace can unmap them fully
         if (mapping->sparse_alloc) {
-            gmmu->MapSparse(params.offset, mapping->size);
+            gmmu->MapSparse(params.offset, mapping->size, mapping->big_page);
         } else {
             gmmu->Unmap(params.offset, mapping->size);
         }
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
index 1d27739e2..12e881f0d 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
@@ -177,6 +177,7 @@ private:
         std::list<std::shared_ptr<Mapping>> mappings;
         u32 page_size;
         bool sparse;
+        bool big_pages;
     };
 
     std::map<u64, std::shared_ptr<Mapping>>
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.cpp b/src/core/hle/service/nvdrv/devices/nvmap.cpp
index 279997e81..992c117f1 100644
--- a/src/core/hle/service/nvdrv/devices/nvmap.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvmap.cpp
@@ -9,6 +9,8 @@
 #include "common/assert.h"
 #include "common/logging/log.h"
 #include "core/core.h"
+#include "core/hle/kernel/k_page_table.h"
+#include "core/hle/kernel/k_process.h"
 #include "core/hle/service/nvdrv/core/container.h"
 #include "core/hle/service/nvdrv/core/nvmap.h"
 #include "core/hle/service/nvdrv/devices/nvmap.h"
@@ -136,6 +138,10 @@ NvResult nvmap::IocAlloc(const std::vector<u8>& input, std::vector<u8>& output)
         LOG_CRITICAL(Service_NVDRV, "Object failed to allocate, handle={:08X}", params.handle);
         return result;
     }
+    ASSERT(system.CurrentProcess()
+               ->PageTable()
+               .LockForDeviceAddressSpace(handle_description->address, handle_description->size)
+               .IsSuccess());
     std::memcpy(output.data(), &params, sizeof(params));
     return result;
 }
@@ -256,6 +262,10 @@ NvResult nvmap::IocFree(const std::vector<u8>& input, std::vector<u8>& output) {
     }
 
     if (auto freeInfo{file.FreeHandle(params.handle, false)}) {
+        ASSERT(system.CurrentProcess()
+                   ->PageTable()
+                   .UnlockForDeviceAddressSpace(freeInfo->address, freeInfo->size)
+                   .IsSuccess());
         params.address = freeInfo->address;
         params.size = static_cast<u32>(freeInfo->size);
         params.flags.raw = 0;
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index b36067613..836ece136 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -7,6 +7,7 @@
 #include "common/assert.h"
 #include "common/logging/log.h"
 #include "core/core.h"
+#include "core/device_memory.h"
 #include "core/hle/kernel/k_page_table.h"
 #include "core/hle/kernel/k_process.h"
 #include "core/memory.h"
@@ -14,40 +15,69 @@
 #include "video_core/rasterizer_interface.h"
 #include "video_core/renderer_base.h"
 
+#pragma optimize("", off)
+
 namespace Tegra {
 
 std::atomic<size_t> MemoryManager::unique_identifier_generator{};
 
-MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 page_bits_)
-    : system{system_}, address_space_bits{address_space_bits_}, page_bits{page_bits_}, entries{},
-      page_table{address_space_bits, address_space_bits + page_bits - 38, page_bits},
+MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 big_page_bits_,
+                             u64 page_bits_)
+    : system{system_}, memory{system.Memory()}, device_memory{system.DeviceMemory()},
+      address_space_bits{address_space_bits_}, page_bits{page_bits_}, big_page_bits{big_page_bits_},
+      entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38,
+                                           page_bits != big_page_bits ? page_bits : 0},
       unique_identifier{unique_identifier_generator.fetch_add(1, std::memory_order_acq_rel)} {
     address_space_size = 1ULL << address_space_bits;
-    allocate_start = address_space_bits > 32 ? 1ULL << 32 : 0;
     page_size = 1ULL << page_bits;
     page_mask = page_size - 1ULL;
-    const u64 page_table_bits = address_space_bits - cpu_page_bits;
+    big_page_size = 1ULL << big_page_bits;
+    big_page_mask = big_page_size - 1ULL;
+    const u64 page_table_bits = address_space_bits - page_bits;
+    const u64 big_page_table_bits = address_space_bits - big_page_bits;
     const u64 page_table_size = 1ULL << page_table_bits;
+    const u64 big_page_table_size = 1ULL << big_page_table_bits;
     page_table_mask = page_table_size - 1;
+    big_page_table_mask = big_page_table_size - 1;
 
+    big_entries.resize(big_page_table_size / 32, 0);
+    big_page_table_cpu.resize(big_page_table_size);
+    big_page_table_physical.resize(big_page_table_size);
     entries.resize(page_table_size / 32, 0);
 }
 
 MemoryManager::~MemoryManager() = default;
 
+template <bool is_big_page>
 MemoryManager::EntryType MemoryManager::GetEntry(size_t position) const {
-    position = position >> page_bits;
-    const u64 entry_mask = entries[position / 32];
-    const size_t sub_index = position % 32;
-    return static_cast<EntryType>((entry_mask >> (2 * sub_index)) & 0x03ULL);
+    if constexpr (is_big_page) {
+        position = position >> big_page_bits;
+        const u64 entry_mask = big_entries[position / 32];
+        const size_t sub_index = position % 32;
+        return static_cast<EntryType>((entry_mask >> (2 * sub_index)) & 0x03ULL);
+    } else {
+        position = position >> page_bits;
+        const u64 entry_mask = entries[position / 32];
+        const size_t sub_index = position % 32;
+        return static_cast<EntryType>((entry_mask >> (2 * sub_index)) & 0x03ULL);
+    }
 }
 
+template <bool is_big_page>
 void MemoryManager::SetEntry(size_t position, MemoryManager::EntryType entry) {
-    position = position >> page_bits;
-    const u64 entry_mask = entries[position / 32];
-    const size_t sub_index = position % 32;
-    entries[position / 32] =
-        (~(3ULL << sub_index * 2) & entry_mask) | (static_cast<u64>(entry) << sub_index * 2);
+    if constexpr (is_big_page) {
+        position = position >> big_page_bits;
+        const u64 entry_mask = big_entries[position / 32];
+        const size_t sub_index = position % 32;
+        big_entries[position / 32] =
+            (~(3ULL << sub_index * 2) & entry_mask) | (static_cast<u64>(entry) << sub_index * 2);
+    } else {
+        position = position >> page_bits;
+        const u64 entry_mask = entries[position / 32];
+        const size_t sub_index = position % 32;
+        entries[position / 32] =
+            (~(3ULL << sub_index * 2) & entry_mask) | (static_cast<u64>(entry) << sub_index * 2);
+    }
 }
 
 template <MemoryManager::EntryType entry_type>
@@ -59,48 +89,66 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp
     }
     for (u64 offset{}; offset < size; offset += page_size) {
         const GPUVAddr current_gpu_addr = gpu_addr + offset;
-        [[maybe_unused]] const auto current_entry_type = GetEntry(current_gpu_addr);
-        SetEntry(current_gpu_addr, entry_type);
+        [[maybe_unused]] const auto current_entry_type = GetEntry<false>(current_gpu_addr);
+        SetEntry<false>(current_gpu_addr, entry_type);
         if (current_entry_type != entry_type) {
             rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, page_size);
         }
         if constexpr (entry_type == EntryType::Mapped) {
             const VAddr current_cpu_addr = cpu_addr + offset;
-            const auto index = PageEntryIndex(current_gpu_addr);
-            const u32 sub_value = static_cast<u32>(current_cpu_addr >> 12ULL);
-            if (current_entry_type == entry_type && sub_value != page_table[index]) {
-                rasterizer->InvalidateRegion(static_cast<VAddr>(page_table[index]) << 12ULL,
-                                             page_size);
-            }
-            page_table[index] = static_cast<u32>(current_cpu_addr >> 12ULL);
+            const auto index = PageEntryIndex<false>(current_gpu_addr);
+            const u32 sub_value = static_cast<u32>(current_cpu_addr >> cpu_page_bits);
+            page_table[index] = sub_value;
         }
         remaining_size -= page_size;
     }
     return gpu_addr;
 }
 
+template <MemoryManager::EntryType entry_type>
+GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr,
+                                       size_t size) {
+    u64 remaining_size{size};
+    for (u64 offset{}; offset < size; offset += big_page_size) {
+        const GPUVAddr current_gpu_addr = gpu_addr + offset;
+        [[maybe_unused]] const auto current_entry_type = GetEntry<true>(current_gpu_addr);
+        SetEntry<true>(current_gpu_addr, entry_type);
+        if (current_entry_type != entry_type) {
+            rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, big_page_size);
+        }
+        if constexpr (entry_type == EntryType::Mapped) {
+            const VAddr current_cpu_addr = cpu_addr + offset;
+            const auto index = PageEntryIndex<true>(current_gpu_addr);
+            const u32 sub_value = static_cast<u32>(current_cpu_addr >> cpu_page_bits);
+            big_page_table_cpu[index] = sub_value;
+            const PAddr phys_address =
+                device_memory.GetPhysicalAddr(memory.GetPointer(current_cpu_addr));
+            big_page_table_physical[index] = static_cast<u32>(phys_address);
+        }
+        remaining_size -= big_page_size;
+    }
+    return gpu_addr;
+}
+
 void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
     rasterizer = rasterizer_;
 }
 
-GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size) {
+GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size,
+                            bool is_big_pages) {
+    if (is_big_pages) [[likely]] {
+        return BigPageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size);
+    }
     return PageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size);
 }
 
-GPUVAddr MemoryManager::MapSparse(GPUVAddr gpu_addr, std::size_t size) {
+GPUVAddr MemoryManager::MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages) {
+    if (is_big_pages) [[likely]] {
+        return BigPageTableOp<EntryType::Reserved>(gpu_addr, 0, size);
+    }
     return PageTableOp<EntryType::Reserved>(gpu_addr, 0, size);
 }
 
-GPUVAddr MemoryManager::MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align) {
-    return Map(*FindFreeRange(size, align), cpu_addr, size);
-}
-
-GPUVAddr MemoryManager::MapAllocate32(VAddr cpu_addr, std::size_t size) {
-    const std::optional<GPUVAddr> gpu_addr = FindFreeRange(size, 1, true);
-    ASSERT(gpu_addr);
-    return Map(*gpu_addr, cpu_addr, size);
-}
-
 void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
     if (size == 0) {
         return;
@@ -115,61 +163,24 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
         rasterizer->UnmapMemory(*cpu_addr, map_size);
     }
 
+    BigPageTableOp<EntryType::Free>(gpu_addr, 0, size);
     PageTableOp<EntryType::Free>(gpu_addr, 0, size);
 }
 
-std::optional<GPUVAddr> MemoryManager::AllocateFixed(GPUVAddr gpu_addr, std::size_t size) {
-    for (u64 offset{}; offset < size; offset += page_size) {
-        if (GetEntry(gpu_addr + offset) != EntryType::Free) {
+std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const {
+    if (GetEntry<true>(gpu_addr) != EntryType::Mapped) [[unlikely]] {
+        if (GetEntry<false>(gpu_addr) != EntryType::Mapped) {
             return std::nullopt;
         }
-    }
 
-    return PageTableOp<EntryType::Reserved>(gpu_addr, 0, size);
-}
-
-GPUVAddr MemoryManager::Allocate(std::size_t size, std::size_t align) {
-    return *AllocateFixed(*FindFreeRange(size, align), size);
-}
-
-std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size_t align,
-                                                     bool start_32bit_address) const {
-    if (!align) {
-        align = page_size;
-    } else {
-        align = Common::AlignUp(align, page_size);
-    }
-
-    u64 available_size{};
-    GPUVAddr gpu_addr{start_32bit_address ? 0 : allocate_start};
-    while (gpu_addr + available_size < address_space_size) {
-        if (GetEntry(gpu_addr + available_size) == EntryType::Free) {
-            available_size += page_size;
-
-            if (available_size >= size) {
-                return gpu_addr;
-            }
-        } else {
-            gpu_addr += available_size + page_size;
-            available_size = 0;
-
-            const auto remainder{gpu_addr % align};
-            if (remainder) {
-                gpu_addr = (gpu_addr - remainder) + align;
-            }
-        }
-    }
-
-    return std::nullopt;
-}
-
-std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const {
-    if (GetEntry(gpu_addr) != EntryType::Mapped) {
-        return std::nullopt;
+        const VAddr cpu_addr_base = static_cast<VAddr>(page_table[PageEntryIndex<false>(gpu_addr)])
+                                    << cpu_page_bits;
+        return cpu_addr_base + (gpu_addr & page_mask);
     }
 
-    const VAddr cpu_addr_base = static_cast<VAddr>(page_table[PageEntryIndex(gpu_addr)]) << 12ULL;
-    return cpu_addr_base + (gpu_addr & page_mask);
+    const VAddr cpu_addr_base =
+        static_cast<VAddr>(big_page_table_cpu[PageEntryIndex<true>(gpu_addr)]) << cpu_page_bits;
+    return cpu_addr_base + (gpu_addr & big_page_mask);
 }
 
 std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr, std::size_t size) const {
@@ -225,7 +236,7 @@ u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) {
         return {};
     }
 
-    return system.Memory().GetPointer(*address);
+    return memory.GetPointer(*address);
 }
 
 const u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) const {
@@ -234,98 +245,161 @@ const u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) const {
         return {};
     }
 
-    return system.Memory().GetPointer(*address);
+    return memory.GetPointer(*address);
 }
 
-void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size,
-                                  bool is_safe) const {
+#pragma inline_recursion(on)
+
+template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped>
+inline void MemoryManager::MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size,
+                                           FuncMapped&& func_mapped, FuncReserved&& func_reserved,
+                                           FuncUnmapped&& func_unmapped) const {
+    u64 used_page_size;
+    u64 used_page_mask;
+    u64 used_page_bits;
+    if constexpr (is_big_pages) {
+        used_page_size = big_page_size;
+        used_page_mask = big_page_mask;
+        used_page_bits = big_page_bits;
+    } else {
+        used_page_size = page_size;
+        used_page_mask = page_mask;
+        used_page_bits = page_bits;
+    }
     std::size_t remaining_size{size};
-    std::size_t page_index{gpu_src_addr >> page_bits};
-    std::size_t page_offset{gpu_src_addr & page_mask};
+    std::size_t page_index{gpu_src_addr >> used_page_bits};
+    std::size_t page_offset{gpu_src_addr & used_page_mask};
+    GPUVAddr current_address = gpu_src_addr;
 
     while (remaining_size > 0) {
         const std::size_t copy_amount{
-            std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
-        const auto page_addr{GpuToCpuAddress(page_index << page_bits)};
-        if (page_addr) {
-            const auto src_addr{*page_addr + page_offset};
-            if (is_safe) {
-                // Flush must happen on the rasterizer interface, such that memory is always
-                // synchronous when it is read (even when in asynchronous GPU mode).
-                // Fixes Dead Cells title menu.
-                rasterizer->FlushRegion(src_addr, copy_amount);
-            }
-            system.Memory().ReadBlockUnsafe(src_addr, dest_buffer, copy_amount);
-        } else {
-            std::memset(dest_buffer, 0, copy_amount);
+            std::min(static_cast<std::size_t>(used_page_size) - page_offset, remaining_size)};
+        auto entry = GetEntry<is_big_pages>(current_address);
+        if (entry == EntryType::Mapped) [[likely]] {
+            func_mapped(page_index, page_offset, copy_amount);
+        } else if (entry == EntryType::Reserved) {
+            func_reserved(page_index, page_offset, copy_amount);
+        } else [[unlikely]] {
+            func_unmapped(page_index, page_offset, copy_amount);
         }
-
         page_index++;
         page_offset = 0;
-        dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
         remaining_size -= copy_amount;
+        current_address += copy_amount;
     }
 }
 
+template <bool is_safe>
+void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer,
+                                  std::size_t size) const {
+    auto set_to_zero = [&]([[maybe_unused]] std::size_t page_index,
+                           [[maybe_unused]] std::size_t offset, std::size_t copy_amount) {
+        std::memset(dest_buffer, 0, copy_amount);
+        dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
+    };
+    auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
+        const VAddr cpu_addr_base =
+            (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset;
+        if constexpr (is_safe) {
+            rasterizer->FlushRegion(cpu_addr_base, copy_amount);
+        }
+        memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount);
+        dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
+    };
+    auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
+        const VAddr cpu_addr_base =
+            (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset;
+        if constexpr (is_safe) {
+            rasterizer->FlushRegion(cpu_addr_base, copy_amount);
+        }
+        memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount);
+        // u8* physical = device_memory.GetPointer(big_page_table_physical[page_index] + offset);
+        // std::memcpy(dest_buffer, physical, copy_amount);
+        dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
+    };
+    auto read_short_pages = [&](std::size_t page_index, std::size_t offset,
+                                std::size_t copy_amount) {
+        GPUVAddr base = (page_index << big_page_bits) + offset;
+        MemoryOperation<false>(base, copy_amount, mapped_normal, set_to_zero, set_to_zero);
+    };
+    MemoryOperation<true>(gpu_src_addr, size, mapped_big, set_to_zero, read_short_pages);
+}
+
 void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const {
-    ReadBlockImpl(gpu_src_addr, dest_buffer, size, true);
+    ReadBlockImpl<true>(gpu_src_addr, dest_buffer, size);
 }
 
 void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer,
                                     const std::size_t size) const {
-    ReadBlockImpl(gpu_src_addr, dest_buffer, size, false);
+    ReadBlockImpl<false>(gpu_src_addr, dest_buffer, size);
 }
 
-void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size,
-                                   bool is_safe) {
-    std::size_t remaining_size{size};
-    std::size_t page_index{gpu_dest_addr >> page_bits};
-    std::size_t page_offset{gpu_dest_addr & page_mask};
-
-    while (remaining_size > 0) {
-        const std::size_t copy_amount{
-            std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
-        const auto page_addr{GpuToCpuAddress(page_index << page_bits)};
-        if (page_addr) {
-            const auto dest_addr{*page_addr + page_offset};
-
-            if (is_safe) {
-                // Invalidate must happen on the rasterizer interface, such that memory is always
-                // synchronous when it is written (even when in asynchronous GPU mode).
-                rasterizer->InvalidateRegion(dest_addr, copy_amount);
-            }
-            system.Memory().WriteBlockUnsafe(dest_addr, src_buffer, copy_amount);
+template <bool is_safe>
+void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer,
+                                   std::size_t size) {
+    auto just_advance = [&]([[maybe_unused]] std::size_t page_index,
+                            [[maybe_unused]] std::size_t offset, std::size_t copy_amount) {
+        src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
+    };
+    auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
+        const VAddr cpu_addr_base =
+            (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset;
+        if constexpr (is_safe) {
+            rasterizer->InvalidateRegion(cpu_addr_base, copy_amount);
         }
-
-        page_index++;
-        page_offset = 0;
+        memory.WriteBlockUnsafe(cpu_addr_base, src_buffer, copy_amount);
         src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
-        remaining_size -= copy_amount;
-    }
+    };
+    auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
+        const VAddr cpu_addr_base =
+            (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset;
+        if constexpr (is_safe) {
+            rasterizer->InvalidateRegion(cpu_addr_base, copy_amount);
+        }
+        memory.WriteBlockUnsafe(cpu_addr_base, src_buffer, copy_amount);
+        /*u8* physical =
+            device_memory.GetPointer(big_page_table_physical[page_index] << cpu_page_bits) + offset;
+        std::memcpy(physical, src_buffer, copy_amount);*/
+        src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
+    };
+    auto write_short_pages = [&](std::size_t page_index, std::size_t offset,
+                                 std::size_t copy_amount) {
+        GPUVAddr base = (page_index << big_page_bits) + offset;
+        MemoryOperation<false>(base, copy_amount, mapped_normal, just_advance, just_advance);
+    };
+    MemoryOperation<true>(gpu_dest_addr, size, mapped_big, just_advance, write_short_pages);
 }
 
 void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size) {
-    WriteBlockImpl(gpu_dest_addr, src_buffer, size, true);
+    WriteBlockImpl<true>(gpu_dest_addr, src_buffer, size);
 }
 
 void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer,
                                      std::size_t size) {
-    WriteBlockImpl(gpu_dest_addr, src_buffer, size, false);
+    WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size);
 }
 
 void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size) const {
-    size_t remaining_size{size};
-    size_t page_index{gpu_addr >> page_bits};
-    size_t page_offset{gpu_addr & page_mask};
-    while (remaining_size > 0) {
-        const size_t num_bytes{std::min(page_size - page_offset, remaining_size)};
-        if (const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; page_addr) {
-            rasterizer->FlushRegion(*page_addr + page_offset, num_bytes);
-        }
-        ++page_index;
-        page_offset = 0;
-        remaining_size -= num_bytes;
-    }
+    auto do_nothing = [&]([[maybe_unused]] std::size_t page_index,
+                          [[maybe_unused]] std::size_t offset,
+                          [[maybe_unused]] std::size_t copy_amount) {};
+
+    auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
+        const VAddr cpu_addr_base =
+            (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset;
+        rasterizer->FlushRegion(cpu_addr_base, copy_amount);
+    };
+    auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
+        const VAddr cpu_addr_base =
+            (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset;
+        rasterizer->FlushRegion(cpu_addr_base, copy_amount);
+    };
+    auto flush_short_pages = [&](std::size_t page_index, std::size_t offset,
+                                 std::size_t copy_amount) {
+        GPUVAddr base = (page_index << big_page_bits) + offset;
+        MemoryOperation<false>(base, copy_amount, mapped_normal, do_nothing, do_nothing);
+    };
+    MemoryOperation<true>(gpu_addr, size, mapped_big, do_nothing, flush_short_pages);
 }
 
 void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size) {
@@ -348,7 +422,7 @@ bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const {
 }
 
 bool MemoryManager::IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const {
-    size_t page_index{gpu_addr >> page_bits};
+    size_t page_index{gpu_addr >> big_page_bits};
     const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits};
     std::optional<VAddr> old_page_addr{};
     while (page_index != page_last) {
@@ -371,7 +445,7 @@ bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) cons
     size_t page_index{gpu_addr >> page_bits};
     const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits};
     while (page_index < page_last) {
-        if (GetEntry(page_index << page_bits) == EntryType::Free) {
+        if (GetEntry<false>(page_index << page_bits) == EntryType::Free) {
             return false;
         }
         ++page_index;
@@ -379,47 +453,63 @@ bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) cons
     return true;
 }
 
+#pragma inline_recursion(on)
+
 std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
     GPUVAddr gpu_addr, std::size_t size) const {
     std::vector<std::pair<GPUVAddr, std::size_t>> result{};
-    size_t page_index{gpu_addr >> page_bits};
-    size_t remaining_size{size};
-    size_t page_offset{gpu_addr & page_mask};
     std::optional<std::pair<GPUVAddr, std::size_t>> last_segment{};
     std::optional<VAddr> old_page_addr{};
-    const auto extend_size = [this, &last_segment, &page_index, &page_offset](std::size_t bytes) {
-        if (!last_segment) {
-            const GPUVAddr new_base_addr = (page_index << page_bits) + page_offset;
-            last_segment = {new_base_addr, bytes};
-        } else {
-            last_segment->second += bytes;
-        }
-    };
-    const auto split = [&last_segment, &result] {
+    const auto split = [&last_segment, &result]([[maybe_unused]] std::size_t page_index,
+                                                [[maybe_unused]] std::size_t offset,
+                                                [[maybe_unused]] std::size_t copy_amount) {
         if (last_segment) {
             result.push_back(*last_segment);
             last_segment = std::nullopt;
         }
     };
-    while (remaining_size > 0) {
-        const size_t num_bytes{std::min(page_size - page_offset, remaining_size)};
-        const auto page_addr{GpuToCpuAddress(page_index << page_bits)};
-        if (!page_addr || *page_addr == 0) {
-            split();
-        } else if (old_page_addr) {
-            if (*old_page_addr + page_size != *page_addr) {
-                split();
+    const auto extend_size_big = [this, &split, &old_page_addr,
+                                  &last_segment](std::size_t page_index, std::size_t offset,
+                                                 std::size_t copy_amount) {
+        const VAddr cpu_addr_base =
+            (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset;
+        if (old_page_addr) {
+            if (*old_page_addr != cpu_addr_base) {
+                split(0, 0, 0);
             }
-            extend_size(num_bytes);
+        }
+        old_page_addr = {cpu_addr_base + copy_amount};
+        if (!last_segment) {
+            const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset;
+            last_segment = {new_base_addr, copy_amount};
         } else {
-            extend_size(num_bytes);
+            last_segment->second += copy_amount;
         }
-        ++page_index;
-        page_offset = 0;
-        remaining_size -= num_bytes;
-        old_page_addr = page_addr;
-    }
-    split();
+    };
+    const auto extend_size_short = [this, &split, &old_page_addr,
+                                    &last_segment](std::size_t page_index, std::size_t offset,
+                                                   std::size_t copy_amount) {
+        const VAddr cpu_addr_base =
+            (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset;
+        if (old_page_addr) {
+            if (*old_page_addr != cpu_addr_base) {
+                split(0, 0, 0);
+            }
+        }
+        old_page_addr = {cpu_addr_base + copy_amount};
+        if (!last_segment) {
+            const GPUVAddr new_base_addr = (page_index << page_bits) + offset;
+            last_segment = {new_base_addr, copy_amount};
+        } else {
+            last_segment->second += copy_amount;
+        }
+    };
+    auto do_short_pages = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
+        GPUVAddr base = (page_index << big_page_bits) + offset;
+        MemoryOperation<false>(base, copy_amount, extend_size_short, split, split);
+    };
+    MemoryOperation<true>(gpu_addr, size, extend_size_big, split, do_short_pages);
+    split(0, 0, 0);
     return result;
 }
 
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 56604ef3e..9c388a06e 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -10,21 +10,26 @@
 
 #include "common/common_types.h"
 #include "common/multi_level_page_table.h"
+#include "common/virtual_buffer.h"
 
 namespace VideoCore {
 class RasterizerInterface;
 }
 
 namespace Core {
+class DeviceMemory;
+namespace Memory {
+class Memory;
+} // namespace Memory
 class System;
-}
+} // namespace Core
 
 namespace Tegra {
 
 class MemoryManager final {
 public:
     explicit MemoryManager(Core::System& system_, u64 address_space_bits_ = 40,
-                           u64 page_bits_ = 16);
+                           u64 big_page_bits_ = 16, u64 page_bits_ = 12);
     ~MemoryManager();
 
     size_t GetID() const {
@@ -93,12 +98,8 @@ public:
     std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr,
                                                                     std::size_t size) const;
 
-    GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size);
-    GPUVAddr MapSparse(GPUVAddr gpu_addr, std::size_t size);
-    [[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align);
-    [[nodiscard]] GPUVAddr MapAllocate32(VAddr cpu_addr, std::size_t size);
-    [[nodiscard]] std::optional<GPUVAddr> AllocateFixed(GPUVAddr gpu_addr, std::size_t size);
-    [[nodiscard]] GPUVAddr Allocate(std::size_t size, std::size_t align);
+    GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, bool is_big_pages = true);
+    GPUVAddr MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages = true);
     void Unmap(GPUVAddr gpu_addr, std::size_t size);
 
     void FlushRegion(GPUVAddr gpu_addr, size_t size) const;
@@ -107,26 +108,42 @@ private:
     [[nodiscard]] std::optional<GPUVAddr> FindFreeRange(std::size_t size, std::size_t align,
                                                         bool start_32bit_address = false) const;
 
-    void ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size,
-                       bool is_safe) const;
-    void WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size,
-                        bool is_safe);
+    template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped>
+    inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped,
+                                FuncReserved&& func_reserved, FuncUnmapped&& func_unmapped) const;
+
+    template <bool is_safe>
+    void ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const;
+
+    template <bool is_safe>
+    void WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
 
+    template <bool is_big_page>
     [[nodiscard]] inline std::size_t PageEntryIndex(GPUVAddr gpu_addr) const {
-        return (gpu_addr >> page_bits) & page_table_mask;
+        if constexpr (is_big_page) {
+            return (gpu_addr >> big_page_bits) & big_page_table_mask;
+        } else {
+            return (gpu_addr >> page_bits) & page_table_mask;
+        }
     }
 
     Core::System& system;
+    Core::Memory::Memory& memory;
+    Core::DeviceMemory& device_memory;
 
     const u64 address_space_bits;
     const u64 page_bits;
     u64 address_space_size;
-    u64 allocate_start;
     u64 page_size;
     u64 page_mask;
     u64 page_table_mask;
     static constexpr u64 cpu_page_bits{12};
 
+    const u64 big_page_bits;
+    u64 big_page_size;
+    u64 big_page_mask;
+    u64 big_page_table_mask;
+
     VideoCore::RasterizerInterface* rasterizer = nullptr;
 
     enum class EntryType : u64 {
@@ -136,15 +153,23 @@ private:
     };
 
     std::vector<u64> entries;
+    std::vector<u64> big_entries;
 
     template <EntryType entry_type>
     GPUVAddr PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size);
 
-    EntryType GetEntry(size_t position) const;
+    template <EntryType entry_type>
+    GPUVAddr BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size);
+
+    template <bool is_big_page>
+    inline EntryType GetEntry(size_t position) const;
 
-    void SetEntry(size_t position, EntryType entry);
+    template <bool is_big_page>
+    inline void SetEntry(size_t position, EntryType entry);
 
     Common::MultiLevelPageTable<u32> page_table;
+    Common::VirtualBuffer<u32> big_page_table_cpu;
+    Common::VirtualBuffer<u32> big_page_table_physical;
 
     const size_t unique_identifier;
 
-- 
cgit v1.2.3


From f5fd6b5c8674fcf64a3e70809ee0a34d3a95beb6 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Sun, 14 Aug 2022 02:36:36 -0700
Subject: DMA & InlineToMemory Engines Rework.

---
 src/common/algorithm.h                             |   8 +
 src/video_core/buffer_cache/buffer_cache.h         |   4 +-
 src/video_core/engines/engine_upload.cpp           |  46 ++++-
 src/video_core/engines/engine_upload.h             |   6 +-
 src/video_core/engines/kepler_compute.cpp          |  13 +-
 src/video_core/engines/kepler_memory.cpp           |  13 +-
 src/video_core/engines/maxwell_3d.cpp              |   5 +-
 src/video_core/engines/maxwell_dma.cpp             |  91 ++++++---
 src/video_core/engines/maxwell_dma.h               |   6 +
 src/video_core/host1x/vic.cpp                      |   5 +-
 src/video_core/memory_manager.cpp                  |  91 +++++++++
 src/video_core/memory_manager.h                    |   6 +
 src/video_core/rasterizer_interface.h              |   2 +-
 src/video_core/renderer_opengl/gl_rasterizer.cpp   |   2 +-
 src/video_core/renderer_opengl/gl_rasterizer.h     |   2 +-
 src/video_core/renderer_vulkan/vk_compute_pass.cpp |   2 -
 src/video_core/renderer_vulkan/vk_rasterizer.cpp   |   2 +-
 src/video_core/renderer_vulkan/vk_rasterizer.h     |   2 +-
 src/video_core/texture_cache/util.cpp              |   1 -
 src/video_core/textures/decoders.cpp               | 225 +++++++--------------
 src/video_core/textures/decoders.h                 |  33 +--
 21 files changed, 323 insertions(+), 242 deletions(-)

(limited to 'src/common')

diff --git a/src/common/algorithm.h b/src/common/algorithm.h
index 9ddfd637b..055dca142 100644
--- a/src/common/algorithm.h
+++ b/src/common/algorithm.h
@@ -24,4 +24,12 @@ template <class ForwardIt, class T, class Compare = std::less<>>
     return first != last && !comp(value, *first) ? first : last;
 }
 
+template <typename T, typename Func, typename... Args>
+T FoldRight(T initial_value, Func&& func, Args&&... args) {
+    T value{initial_value};
+    const auto high_func = [&value, &func]<typename T>(T x) { value = func(value, x); };
+    (std::invoke(high_func, std::forward<Args>(args)), ...);
+    return value;
+}
+
 } // namespace Common
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index e55cac0d6..359c11d6f 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -126,7 +126,7 @@ public:
 
     void DownloadMemory(VAddr cpu_addr, u64 size);
 
-    bool InlineMemory(VAddr dest_address, size_t copy_size, std::span<u8> inlined_buffer);
+    bool InlineMemory(VAddr dest_address, size_t copy_size, std::span<const u8> inlined_buffer);
 
     void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size);
 
@@ -1685,7 +1685,7 @@ void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes,
 
 template <class P>
 bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
-                                  std::span<u8> inlined_buffer) {
+                                  std::span<const u8> inlined_buffer) {
     const bool is_dirty = IsRegionRegistered(dest_address, copy_size);
     if (!is_dirty) {
         return false;
diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp
index 6ff5b1eca..a34819234 100644
--- a/src/video_core/engines/engine_upload.cpp
+++ b/src/video_core/engines/engine_upload.cpp
@@ -3,6 +3,7 @@
 
 #include <cstring>
 
+#include "common/algorithm.h"
 #include "common/assert.h"
 #include "video_core/engines/engine_upload.h"
 #include "video_core/memory_manager.h"
@@ -34,21 +35,48 @@ void State::ProcessData(const u32 data, const bool is_last_call) {
     if (!is_last_call) {
         return;
     }
+    ProcessData(inner_buffer);
+}
+
+void State::ProcessData(const u32* data, size_t num_data) {
+    std::span<const u8> read_buffer(reinterpret_cast<const u8*>(data), num_data * sizeof(u32));
+    ProcessData(read_buffer);
+}
+
+void State::ProcessData(std::span<const u8> read_buffer) {
     const GPUVAddr address{regs.dest.Address()};
     if (is_linear) {
-        rasterizer->AccelerateInlineToMemory(address, copy_size, inner_buffer);
+        if (regs.line_count == 1) {
+            rasterizer->AccelerateInlineToMemory(address, copy_size, read_buffer);
+        } else {
+            for (u32 line = 0; line < regs.line_count; ++line) {
+                const GPUVAddr dest_line = address + static_cast<size_t>(line) * regs.dest.pitch;
+                memory_manager.WriteBlockUnsafe(
+                    dest_line, read_buffer.data() + static_cast<size_t>(line) * regs.line_length_in,
+                    regs.line_length_in);
+            }
+            memory_manager.InvalidateRegion(address, regs.dest.pitch * regs.line_count);
+        }
     } else {
-        UNIMPLEMENTED_IF(regs.dest.z != 0);
-        UNIMPLEMENTED_IF(regs.dest.depth != 1);
-        UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 0);
-        UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 0);
+        u32 width = regs.dest.width;
+        u32 x_elements = regs.line_length_in;
+        u32 x_offset = regs.dest.x;
+        const u32 bpp_shift = Common::FoldRight(
+            4U, [](u32 x, u32 y) { return std::min(x, static_cast<u32>(std::countr_zero(y))); },
+            width, x_elements, x_offset, static_cast<u32>(address));
+        width >>= bpp_shift;
+        x_elements >>= bpp_shift;
+        x_offset >>= bpp_shift;
+        const u32 bytes_per_pixel = 1U << bpp_shift;
         const std::size_t dst_size = Tegra::Texture::CalculateSize(
-            true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 0);
+            true, bytes_per_pixel, width, regs.dest.height, regs.dest.depth,
+            regs.dest.BlockHeight(), regs.dest.BlockDepth());
         tmp_buffer.resize(dst_size);
         memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
-        Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, regs.dest.y,
-                                      regs.dest.BlockHeight(), copy_size, inner_buffer.data(),
-                                      tmp_buffer.data());
+        Tegra::Texture::SwizzleSubrect(tmp_buffer, read_buffer, bytes_per_pixel, width,
+                                       regs.dest.height, regs.dest.depth, x_offset, regs.dest.y,
+                                       x_elements, regs.line_count, regs.dest.BlockHeight(),
+                                       regs.dest.BlockDepth(), regs.line_length_in);
         memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
     }
 }
diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h
index 94ff3314a..f08f6e36a 100644
--- a/src/video_core/engines/engine_upload.h
+++ b/src/video_core/engines/engine_upload.h
@@ -3,6 +3,7 @@
 
 #pragma once
 
+#include <span>
 #include <vector>
 #include "common/bit_field.h"
 #include "common/common_types.h"
@@ -33,7 +34,7 @@ struct Registers {
         u32 width;
         u32 height;
         u32 depth;
-        u32 z;
+        u32 layer;
         u32 x;
         u32 y;
 
@@ -62,11 +63,14 @@ public:
 
     void ProcessExec(bool is_linear_);
     void ProcessData(u32 data, bool is_last_call);
+    void ProcessData(const u32* data, size_t num_data);
 
     /// Binds a rasterizer to this engine.
     void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
 
 private:
+    void ProcessData(std::span<const u8> read_buffer);
+
     u32 write_offset = 0;
     u32 copy_size = 0;
     std::vector<u8> inner_buffer;
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 5db254d94..7c50bdbe0 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -36,8 +36,6 @@ void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_cal
     }
     case KEPLER_COMPUTE_REG_INDEX(data_upload): {
         upload_state.ProcessData(method_argument, is_last_call);
-        if (is_last_call) {
-        }
         break;
     }
     case KEPLER_COMPUTE_REG_INDEX(launch):
@@ -50,8 +48,15 @@ void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_cal
 
 void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
                                     u32 methods_pending) {
-    for (std::size_t i = 0; i < amount; i++) {
-        CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
+    switch (method) {
+    case KEPLER_COMPUTE_REG_INDEX(data_upload):
+        upload_state.ProcessData(base_start, static_cast<size_t>(amount));
+        return;
+    default:
+        for (std::size_t i = 0; i < amount; i++) {
+            CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
+        }
+        break;
     }
 }
 
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index e2b029542..a3fbab1e5 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -33,8 +33,6 @@ void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call
     }
     case KEPLERMEMORY_REG_INDEX(data): {
         upload_state.ProcessData(method_argument, is_last_call);
-        if (is_last_call) {
-        }
         break;
     }
     }
@@ -42,8 +40,15 @@ void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call
 
 void KeplerMemory::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
                                    u32 methods_pending) {
-    for (std::size_t i = 0; i < amount; i++) {
-        CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
+    switch (method) {
+    case KEPLERMEMORY_REG_INDEX(data):
+        upload_state.ProcessData(base_start, static_cast<size_t>(amount));
+        return;
+    default:
+        for (std::size_t i = 0; i < amount; i++) {
+            CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
+        }
+        break;
     }
 }
 
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index add1ccebe..632052c53 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -239,8 +239,6 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
         return upload_state.ProcessExec(regs.exec_upload.linear != 0);
     case MAXWELL3D_REG_INDEX(data_upload):
         upload_state.ProcessData(argument, is_last_call);
-        if (is_last_call) {
-        }
         return;
     case MAXWELL3D_REG_INDEX(fragment_barrier):
         return rasterizer->FragmentBarrier();
@@ -316,6 +314,9 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
     case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 15:
         ProcessCBMultiData(base_start, amount);
         break;
+    case MAXWELL3D_REG_INDEX(data_upload):
+        upload_state.ProcessData(base_start, static_cast<size_t>(amount));
+        return;
     default:
         for (std::size_t i = 0; i < amount; i++) {
             CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 0efe58282..a12a95ce2 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -1,6 +1,7 @@
 // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
 
+#include "common/algorithm.h"
 #include "common/assert.h"
 #include "common/logging/log.h"
 #include "common/microprofile.h"
@@ -54,8 +55,6 @@ void MaxwellDMA::Launch() {
     const LaunchDMA& launch = regs.launch_dma;
     ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE);
     ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED);
-    ASSERT(regs.dst_params.origin.x == 0);
-    ASSERT(regs.dst_params.origin.y == 0);
 
     const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH;
     const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH;
@@ -121,12 +120,13 @@ void MaxwellDMA::CopyPitchToPitch() {
 
 void MaxwellDMA::CopyBlockLinearToPitch() {
     UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0);
-    UNIMPLEMENTED_IF(regs.src_params.block_size.depth != 0);
     UNIMPLEMENTED_IF(regs.src_params.layer != 0);
 
+    const bool is_remapping = regs.launch_dma.remap_enable != 0;
+
     // Optimized path for micro copies.
     const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count;
-    if (dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X &&
+    if (!is_remapping && dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X &&
         regs.src_params.height > GOB_SIZE_Y) {
         FastCopyBlockLinearToPitch();
         return;
@@ -134,10 +134,27 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
 
     // Deswizzle the input and copy it over.
     UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0);
-    const u32 bytes_per_pixel =
-        regs.launch_dma.remap_enable ? regs.pitch_out / regs.line_length_in : 1;
     const Parameters& src_params = regs.src_params;
-    const u32 width = src_params.width;
+
+    const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1;
+    const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1;
+
+    const u32 base_bpp = !is_remapping ? 1U : num_remap_components * remap_components_size;
+
+    u32 width = src_params.width;
+    u32 x_elements = regs.line_length_in;
+    u32 x_offset = src_params.origin.x;
+    u32 bpp_shift = 0U;
+    if (!is_remapping) {
+        bpp_shift = Common::FoldRight(
+            4U, [](u32 x, u32 y) { return std::min(x, static_cast<u32>(std::countr_zero(y))); },
+            width, x_elements, x_offset, static_cast<u32>(regs.offset_in));
+        width >>= bpp_shift;
+        x_elements >>= bpp_shift;
+        x_offset >>= bpp_shift;
+    }
+
+    const u32 bytes_per_pixel = base_bpp << bpp_shift;
     const u32 height = src_params.height;
     const u32 depth = src_params.depth;
     const u32 block_height = src_params.block_size.height;
@@ -155,30 +172,46 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
     memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size);
     memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
 
-    UnswizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_out, width, bytes_per_pixel,
-                     block_height, src_params.origin.x, src_params.origin.y, write_buffer.data(),
-                     read_buffer.data());
+    UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset,
+                     src_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
+                     regs.pitch_out);
 
     memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
 }
 
 void MaxwellDMA::CopyPitchToBlockLinear() {
     UNIMPLEMENTED_IF_MSG(regs.dst_params.block_size.width != 0, "Block width is not one");
+    UNIMPLEMENTED_IF(regs.dst_params.layer != 0);
     UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0);
 
+    const bool is_remapping = regs.launch_dma.remap_enable != 0;
+    const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1;
+    const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1;
+
     const auto& dst_params = regs.dst_params;
-    const u32 bytes_per_pixel =
-        regs.launch_dma.remap_enable ? regs.pitch_in / regs.line_length_in : 1;
-    const u32 width = dst_params.width;
+
+    const u32 base_bpp = !is_remapping ? 1U : num_remap_components * remap_components_size;
+
+    u32 width = dst_params.width;
+    u32 x_elements = regs.line_length_in;
+    u32 x_offset = dst_params.origin.x;
+    u32 bpp_shift = 0U;
+    if (!is_remapping) {
+        bpp_shift = Common::FoldRight(
+            4U, [](u32 x, u32 y) { return std::min(x, static_cast<u32>(std::countr_zero(y))); },
+            width, x_elements, x_offset, static_cast<u32>(regs.offset_out));
+        width >>= bpp_shift;
+        x_elements >>= bpp_shift;
+        x_offset >>= bpp_shift;
+    }
+
+    const u32 bytes_per_pixel = base_bpp << bpp_shift;
     const u32 height = dst_params.height;
     const u32 depth = dst_params.depth;
     const u32 block_height = dst_params.block_size.height;
     const u32 block_depth = dst_params.block_size.depth;
     const size_t dst_size =
         CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth);
-    const size_t dst_layer_size =
-        CalculateSize(true, bytes_per_pixel, width, height, 1, block_height, block_depth);
-
     const size_t src_size = static_cast<size_t>(regs.pitch_in) * regs.line_count;
 
     if (read_buffer.size() < src_size) {
@@ -188,32 +221,23 @@ void MaxwellDMA::CopyPitchToBlockLinear() {
         write_buffer.resize(dst_size);
     }
 
+    memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size);
     if (Settings::IsGPULevelExtreme()) {
-        memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size);
         memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
     } else {
-        memory_manager.ReadBlockUnsafe(regs.offset_in, read_buffer.data(), src_size);
         memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size);
     }
 
     // If the input is linear and the output is tiled, swizzle the input and copy it over.
-    if (regs.dst_params.block_size.depth > 0) {
-        ASSERT(dst_params.layer == 0);
-        SwizzleSliceToVoxel(regs.line_length_in, regs.line_count, regs.pitch_in, width, height,
-                            bytes_per_pixel, block_height, block_depth, dst_params.origin.x,
-                            dst_params.origin.y, write_buffer.data(), read_buffer.data());
-    } else {
-        SwizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_in, width, bytes_per_pixel,
-                       write_buffer.data() + dst_layer_size * dst_params.layer, read_buffer.data(),
-                       block_height, dst_params.origin.x, dst_params.origin.y);
-    }
+    SwizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset,
+                   dst_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
+                   regs.pitch_in);
 
     memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
 }
 
 void MaxwellDMA::FastCopyBlockLinearToPitch() {
-    const u32 bytes_per_pixel =
-        regs.launch_dma.remap_enable ? regs.pitch_out / regs.line_length_in : 1;
+    const u32 bytes_per_pixel = 1U;
     const size_t src_size = GOB_SIZE;
     const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count;
     u32 pos_x = regs.src_params.origin.x;
@@ -239,9 +263,10 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() {
         memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size);
     }
 
-    UnswizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_out, regs.src_params.width,
-                     bytes_per_pixel, regs.src_params.block_size.height, pos_x, pos_y,
-                     write_buffer.data(), read_buffer.data());
+    UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, regs.src_params.width,
+                     regs.src_params.height, 1, pos_x, pos_y, regs.line_length_in, regs.line_count,
+                     regs.src_params.block_size.height, regs.src_params.block_size.depth,
+                     regs.pitch_out);
 
     memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
 }
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index 074bac92c..9c5d567a6 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -189,10 +189,16 @@ public:
             BitField<4, 3, Swizzle> dst_y;
             BitField<8, 3, Swizzle> dst_z;
             BitField<12, 3, Swizzle> dst_w;
+            BitField<0, 12, u32> dst_components_raw;
             BitField<16, 2, u32> component_size_minus_one;
             BitField<20, 2, u32> num_src_components_minus_one;
             BitField<24, 2, u32> num_dst_components_minus_one;
         };
+
+        Swizzle GetComponent(size_t i) {
+            const u32 raw = dst_components_raw;
+            return static_cast<Swizzle>((raw >> (i * 3)) & 0x7);
+        }
     };
     static_assert(sizeof(RemapConst) == 12);
 
diff --git a/src/video_core/host1x/vic.cpp b/src/video_core/host1x/vic.cpp
index 5d8039841..b9ac41529 100644
--- a/src/video_core/host1x/vic.cpp
+++ b/src/video_core/host1x/vic.cpp
@@ -156,8 +156,9 @@ void Vic::WriteRGBFrame(const AVFrame* frame, const VicConfig& config) {
         const u32 block_height = static_cast<u32>(config.block_linear_height_log2);
         const auto size = Texture::CalculateSize(true, 4, width, height, 1, block_height, 0);
         luma_buffer.resize(size);
-        Texture::SwizzleSubrect(width, height, width * 4, width, 4, luma_buffer.data(),
-                                converted_frame_buf_addr, block_height, 0, 0);
+        std::span<const u8> frame_buff(converted_frame_buf_addr, 4 * width * height);
+        Texture::SwizzleSubrect(luma_buffer, frame_buff, 4, width, height, 1,
+                                0, 0, width, height, block_height, 0, width * 4);
 
         host1x.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), size);
     } else {
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 4e52ce0fd..4a692448e 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -462,6 +462,97 @@ void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size) const {
     MemoryOperation<true>(gpu_addr, size, mapped_big, do_nothing, flush_short_pages);
 }
 
+bool MemoryManager::IsMemoryDirty(GPUVAddr gpu_addr, size_t size) const {
+    bool result = false;
+    auto do_nothing = [&]([[maybe_unused]] std::size_t page_index,
+                          [[maybe_unused]] std::size_t offset,
+                          [[maybe_unused]] std::size_t copy_amount) { return false; };
+
+    auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
+        const VAddr cpu_addr_base =
+            (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset;
+        result |= rasterizer->MustFlushRegion(cpu_addr_base, copy_amount);
+        return result;
+    };
+    auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
+        const VAddr cpu_addr_base =
+            (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset;
+        result |= rasterizer->MustFlushRegion(cpu_addr_base, copy_amount);
+        return result;
+    };
+    auto check_short_pages = [&](std::size_t page_index, std::size_t offset,
+                                 std::size_t copy_amount) {
+        GPUVAddr base = (page_index << big_page_bits) + offset;
+        MemoryOperation<false>(base, copy_amount, mapped_normal, do_nothing, do_nothing);
+        return result;
+    };
+    MemoryOperation<true>(gpu_addr, size, mapped_big, do_nothing, check_short_pages);
+    return result;
+}
+
+size_t MemoryManager::MaxContinousRange(GPUVAddr gpu_addr, size_t size) const {
+    std::optional<VAddr> old_page_addr{};
+    size_t range_so_far = 0;
+    bool result{false};
+    auto fail = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset,
+                    std::size_t copy_amount) {
+        result = true;
+        return true;
+    };
+    auto short_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
+        const VAddr cpu_addr_base =
+            (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset;
+        if (old_page_addr && *old_page_addr != cpu_addr_base) {
+            result = true;
+            return true;
+        }
+        range_so_far += copy_amount;
+        old_page_addr = {cpu_addr_base + copy_amount};
+        return false;
+    };
+    auto big_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
+        const VAddr cpu_addr_base =
+            (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset;
+        if (old_page_addr && *old_page_addr != cpu_addr_base) {
+            return true;
+        }
+        range_so_far += copy_amount;
+        old_page_addr = {cpu_addr_base + copy_amount};
+        return false;
+    };
+    auto check_short_pages = [&](std::size_t page_index, std::size_t offset,
+                                 std::size_t copy_amount) {
+        GPUVAddr base = (page_index << big_page_bits) + offset;
+        MemoryOperation<false>(base, copy_amount, short_check, fail, fail);
+        return result;
+    };
+    MemoryOperation<true>(gpu_addr, size, big_check, fail, check_short_pages);
+    return range_so_far;
+}
+
+void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size) const {
+    auto do_nothing = [&]([[maybe_unused]] std::size_t page_index,
+                          [[maybe_unused]] std::size_t offset,
+                          [[maybe_unused]] std::size_t copy_amount) {};
+
+    auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
+        const VAddr cpu_addr_base =
+            (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset;
+        rasterizer->InvalidateRegion(cpu_addr_base, copy_amount);
+    };
+    auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
+        const VAddr cpu_addr_base =
+            (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset;
+        rasterizer->InvalidateRegion(cpu_addr_base, copy_amount);
+    };
+    auto invalidate_short_pages = [&](std::size_t page_index, std::size_t offset,
+                                      std::size_t copy_amount) {
+        GPUVAddr base = (page_index << big_page_bits) + offset;
+        MemoryOperation<false>(base, copy_amount, mapped_normal, do_nothing, do_nothing);
+    };
+    MemoryOperation<true>(gpu_addr, size, mapped_big, do_nothing, invalidate_short_pages);
+}
+
 void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size) {
     std::vector<u8> tmp_buffer(size);
     ReadBlock(gpu_src_addr, tmp_buffer.data(), size);
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 8f8877a92..9c08edc20 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -104,6 +104,12 @@ public:
 
     void FlushRegion(GPUVAddr gpu_addr, size_t size) const;
 
+    void InvalidateRegion(GPUVAddr gpu_addr, size_t size) const;
+
+    bool IsMemoryDirty(GPUVAddr gpu_addr, size_t size) const;
+
+    size_t MaxContinousRange(GPUVAddr gpu_addr, size_t size) const;
+
 private:
     template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped>
     inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped,
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index cb07f3d38..d2d40884c 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -129,7 +129,7 @@ public:
     [[nodiscard]] virtual Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() = 0;
 
     virtual void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
-                                          std::span<u8> memory) = 0;
+                                          std::span<const u8> memory) = 0;
 
     /// Attempt to use a faster method to display the framebuffer to screen
     [[nodiscard]] virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config,
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 02bb17715..c2d80605d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -476,7 +476,7 @@ Tegra::Engines::AccelerateDMAInterface& RasterizerOpenGL::AccessAccelerateDMA()
 }
 
 void RasterizerOpenGL::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
-                                                std::span<u8> memory) {
+                                                std::span<const u8> memory) {
     auto cpu_addr = gpu_memory->GpuToCpuAddress(address);
     if (!cpu_addr) [[unlikely]] {
         gpu_memory->WriteBlock(address, memory.data(), copy_size);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index fe0ba979a..45131b785 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -99,7 +99,7 @@ public:
                                const Tegra::Engines::Fermi2D::Config& copy_config) override;
     Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override;
     void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
-                                  std::span<u8> memory) override;
+                                  std::span<const u8> memory) override;
     bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
                            u32 pixel_stride) override;
     void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index f17a5ccd6..241d7573e 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -26,8 +26,6 @@
 
 namespace Vulkan {
 
-using Tegra::Texture::SWIZZLE_TABLE;
-
 namespace {
 
 constexpr u32 ASTC_BINDING_INPUT_BUFFER = 0;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index a35e41199..acfd5da7d 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -548,7 +548,7 @@ Tegra::Engines::AccelerateDMAInterface& RasterizerVulkan::AccessAccelerateDMA()
 }
 
 void RasterizerVulkan::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
-                                                std::span<u8> memory) {
+                                                std::span<const u8> memory) {
     auto cpu_addr = gpu_memory->GpuToCpuAddress(address);
     if (!cpu_addr) [[unlikely]] {
         gpu_memory->WriteBlock(address, memory.data(), copy_size);
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index fb9e83e8f..4cde3c983 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -95,7 +95,7 @@ public:
                                const Tegra::Engines::Fermi2D::Config& copy_config) override;
     Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override;
     void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
-                                  std::span<u8> memory) override;
+                                  std::span<const u8> memory) override;
     bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
                            u32 pixel_stride) override;
     void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index bea1c27d0..1223df5a0 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -517,7 +517,6 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr
     const u32 host_bytes_per_layer = num_blocks_per_layer * bytes_per_block;
 
     UNIMPLEMENTED_IF(info.tile_width_spacing > 0);
-
     UNIMPLEMENTED_IF(copy.image_offset.x != 0);
     UNIMPLEMENTED_IF(copy.image_offset.y != 0);
     UNIMPLEMENTED_IF(copy.image_offset.z != 0);
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 913f8ebcb..fcc636e0b 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -89,6 +89,69 @@ void SwizzleImpl(std::span<u8> output, std::span<const u8> input, u32 width, u32
     }
 }
 
+template <bool TO_LINEAR, u32 BYTES_PER_PIXEL>
+void SwizzleSubrectImpl(std::span<u8> output, std::span<const u8> input, u32 width, u32 height,
+                        u32 depth, u32 origin_x, u32 origin_y, u32 extent_x, u32 num_lines,
+                        u32 block_height, u32 block_depth, u32 pitch_linear) {
+    // The origin of the transformation can be configured here, leave it as zero as the current API
+    // doesn't expose it.
+    static constexpr u32 origin_z = 0;
+
+    // We can configure here a custom pitch
+    // As it's not exposed 'width * BYTES_PER_PIXEL' will be the expected pitch.
+    const u32 pitch = pitch_linear;
+    const u32 stride = Common::AlignUpLog2(width * BYTES_PER_PIXEL, GOB_SIZE_X_SHIFT);
+
+    const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT);
+    const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth);
+    const u32 slice_size =
+        Common::DivCeilLog2(height, block_height + GOB_SIZE_Y_SHIFT) * block_size;
+
+    const u32 block_height_mask = (1U << block_height) - 1;
+    const u32 block_depth_mask = (1U << block_depth) - 1;
+    const u32 x_shift = GOB_SIZE_SHIFT + block_height + block_depth;
+
+    u32 unprocessed_lines = num_lines;
+    u32 extent_y = std::min(num_lines, height - origin_y);
+
+    for (u32 slice = 0; slice < depth; ++slice) {
+        const u32 z = slice + origin_z;
+        const u32 offset_z = (z >> block_depth) * slice_size +
+                             ((z & block_depth_mask) << (GOB_SIZE_SHIFT + block_height));
+        const u32 lines_in_y = std::min(unprocessed_lines, extent_y);
+        for (u32 line = 0; line < lines_in_y; ++line) {
+            const u32 y = line + origin_y;
+            const u32 swizzled_y = pdep<SWIZZLE_Y_BITS>(y);
+
+            const u32 block_y = y >> GOB_SIZE_Y_SHIFT;
+            const u32 offset_y = (block_y >> block_height) * block_size +
+                                 ((block_y & block_height_mask) << GOB_SIZE_SHIFT);
+
+            u32 swizzled_x = pdep<SWIZZLE_X_BITS>(origin_x * BYTES_PER_PIXEL);
+            for (u32 column = 0; column < extent_x;
+                 ++column, incrpdep<SWIZZLE_X_BITS, BYTES_PER_PIXEL>(swizzled_x)) {
+                const u32 x = (column + origin_x) * BYTES_PER_PIXEL;
+                const u32 offset_x = (x >> GOB_SIZE_X_SHIFT) << x_shift;
+
+                const u32 base_swizzled_offset = offset_z + offset_y + offset_x;
+                const u32 swizzled_offset = base_swizzled_offset + (swizzled_x | swizzled_y);
+
+                const u32 unswizzled_offset =
+                    slice * pitch * height + line * pitch + column * BYTES_PER_PIXEL;
+
+                u8* const dst = &output[TO_LINEAR ? swizzled_offset : unswizzled_offset];
+                const u8* const src = &input[TO_LINEAR ? unswizzled_offset : swizzled_offset];
+
+                std::memcpy(dst, src, BYTES_PER_PIXEL);
+            }
+        }
+        unprocessed_lines -= lines_in_y;
+        if (unprocessed_lines == 0) {
+            return;
+        }
+    }
+}
+
 template <bool TO_LINEAR>
 void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
              u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) {
@@ -111,97 +174,6 @@ void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixe
     }
 }
 
-template <u32 BYTES_PER_PIXEL>
-void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
-                    u8* swizzled_data, const u8* unswizzled_data, u32 block_height_bit,
-                    u32 offset_x, u32 offset_y) {
-    const u32 block_height = 1U << block_height_bit;
-    const u32 image_width_in_gobs =
-        (swizzled_width * BYTES_PER_PIXEL + (GOB_SIZE_X - 1)) / GOB_SIZE_X;
-    for (u32 line = 0; line < subrect_height; ++line) {
-        const u32 dst_y = line + offset_y;
-        const u32 gob_address_y =
-            (dst_y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs +
-            ((dst_y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE;
-
-        const u32 swizzled_y = pdep<SWIZZLE_Y_BITS>(dst_y);
-        u32 swizzled_x = pdep<SWIZZLE_X_BITS>(offset_x * BYTES_PER_PIXEL);
-        for (u32 x = 0; x < subrect_width;
-             ++x, incrpdep<SWIZZLE_X_BITS, BYTES_PER_PIXEL>(swizzled_x)) {
-            const u32 dst_x = x + offset_x;
-            const u32 gob_address =
-                gob_address_y + (dst_x * BYTES_PER_PIXEL / GOB_SIZE_X) * GOB_SIZE * block_height;
-            const u32 swizzled_offset = gob_address + (swizzled_x | swizzled_y);
-            const u32 unswizzled_offset = line * source_pitch + x * BYTES_PER_PIXEL;
-
-            const u8* const source_line = unswizzled_data + unswizzled_offset;
-            u8* const dest_addr = swizzled_data + swizzled_offset;
-            std::memcpy(dest_addr, source_line, BYTES_PER_PIXEL);
-        }
-    }
-}
-
-template <u32 BYTES_PER_PIXEL>
-void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 block_height,
-                      u32 origin_x, u32 origin_y, u8* output, const u8* input) {
-    const u32 stride = width * BYTES_PER_PIXEL;
-    const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X;
-    const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height);
-
-    const u32 block_height_mask = (1U << block_height) - 1;
-    const u32 x_shift = GOB_SIZE_SHIFT + block_height;
-
-    for (u32 line = 0; line < line_count; ++line) {
-        const u32 src_y = line + origin_y;
-        const u32 swizzled_y = pdep<SWIZZLE_Y_BITS>(src_y);
-
-        const u32 block_y = src_y >> GOB_SIZE_Y_SHIFT;
-        const u32 src_offset_y = (block_y >> block_height) * block_size +
-                                 ((block_y & block_height_mask) << GOB_SIZE_SHIFT);
-
-        u32 swizzled_x = pdep<SWIZZLE_X_BITS>(origin_x * BYTES_PER_PIXEL);
-        for (u32 column = 0; column < line_length_in;
-             ++column, incrpdep<SWIZZLE_X_BITS, BYTES_PER_PIXEL>(swizzled_x)) {
-            const u32 src_x = (column + origin_x) * BYTES_PER_PIXEL;
-            const u32 src_offset_x = (src_x >> GOB_SIZE_X_SHIFT) << x_shift;
-
-            const u32 swizzled_offset = src_offset_y + src_offset_x + (swizzled_x | swizzled_y);
-            const u32 unswizzled_offset = line * pitch + column * BYTES_PER_PIXEL;
-
-            std::memcpy(output + unswizzled_offset, input + swizzled_offset, BYTES_PER_PIXEL);
-        }
-    }
-}
-
-template <u32 BYTES_PER_PIXEL>
-void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 height,
-                         u32 block_height, u32 block_depth, u32 origin_x, u32 origin_y, u8* output,
-                         const u8* input) {
-    UNIMPLEMENTED_IF(origin_x > 0);
-    UNIMPLEMENTED_IF(origin_y > 0);
-
-    const u32 stride = width * BYTES_PER_PIXEL;
-    const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X;
-    const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth);
-
-    const u32 block_height_mask = (1U << block_height) - 1;
-    const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height + block_depth;
-
-    for (u32 line = 0; line < line_count; ++line) {
-        const u32 swizzled_y = pdep<SWIZZLE_Y_BITS>(line);
-        const u32 block_y = line / GOB_SIZE_Y;
-        const u32 dst_offset_y =
-            (block_y >> block_height) * block_size + (block_y & block_height_mask) * GOB_SIZE;
-
-        u32 swizzled_x = 0;
-        for (u32 x = 0; x < line_length_in; ++x, incrpdep<SWIZZLE_X_BITS, 1>(swizzled_x)) {
-            const u32 dst_offset =
-                ((x / GOB_SIZE_X) << x_shift) + dst_offset_y + (swizzled_x | swizzled_y);
-            const u32 src_offset = x * BYTES_PER_PIXEL + line * pitch;
-            std::memcpy(output + dst_offset, input + src_offset, BYTES_PER_PIXEL);
-        }
-    }
-}
 } // Anonymous namespace
 
 void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel,
@@ -218,15 +190,15 @@ void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_p
                   stride_alignment);
 }
 
-void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
-                    u32 bytes_per_pixel, u8* swizzled_data, const u8* unswizzled_data,
-                    u32 block_height_bit, u32 offset_x, u32 offset_y) {
+void SwizzleSubrect(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
+                    u32 height, u32 depth, u32 origin_x, u32 origin_y, u32 extent_x, u32 extent_y,
+                    u32 block_height, u32 block_depth, u32 pitch_linear) {
     switch (bytes_per_pixel) {
 #define BPP_CASE(x)                                                                                \
     case x:                                                                                        \
-        return SwizzleSubrect<x>(subrect_width, subrect_height, source_pitch, swizzled_width,      \
-                                 swizzled_data, unswizzled_data, block_height_bit, offset_x,       \
-                                 offset_y);
+        return SwizzleSubrectImpl<true, x>(output, input, width, height, depth, origin_x,          \
+                                           origin_y, extent_x, extent_y, block_height,             \
+                                           block_depth, pitch_linear);
         BPP_CASE(1)
         BPP_CASE(2)
         BPP_CASE(3)
@@ -241,13 +213,15 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32
     }
 }
 
-void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 bytes_per_pixel,
-                      u32 block_height, u32 origin_x, u32 origin_y, u8* output, const u8* input) {
+void UnswizzleSubrect(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel,
+                      u32 width, u32 height, u32 depth, u32 origin_x, u32 origin_y, u32 extent_x,
+                      u32 extent_y, u32 block_height, u32 block_depth, u32 pitch_linear) {
     switch (bytes_per_pixel) {
 #define BPP_CASE(x)                                                                                \
     case x:                                                                                        \
-        return UnswizzleSubrect<x>(line_length_in, line_count, pitch, width, block_height,         \
-                                   origin_x, origin_y, output, input);
+        return SwizzleSubrectImpl<false, x>(output, input, width, height, depth, origin_x,         \
+                                            origin_y, extent_x, extent_y, block_height,            \
+                                            block_depth, pitch_linear);
         BPP_CASE(1)
         BPP_CASE(2)
         BPP_CASE(3)
@@ -262,55 +236,6 @@ void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width,
     }
 }
 
-void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 height,
-                         u32 bytes_per_pixel, u32 block_height, u32 block_depth, u32 origin_x,
-                         u32 origin_y, u8* output, const u8* input) {
-    switch (bytes_per_pixel) {
-#define BPP_CASE(x)                                                                                \
-    case x:                                                                                        \
-        return SwizzleSliceToVoxel<x>(line_length_in, line_count, pitch, width, height,            \
-                                      block_height, block_depth, origin_x, origin_y, output,       \
-                                      input);
-        BPP_CASE(1)
-        BPP_CASE(2)
-        BPP_CASE(3)
-        BPP_CASE(4)
-        BPP_CASE(6)
-        BPP_CASE(8)
-        BPP_CASE(12)
-        BPP_CASE(16)
-#undef BPP_CASE
-    default:
-        ASSERT_MSG(false, "Invalid bytes_per_pixel={}", bytes_per_pixel);
-    }
-}
-
-void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y,
-                   const u32 block_height_bit, const std::size_t copy_size, const u8* source_data,
-                   u8* swizzle_data) {
-    const u32 block_height = 1U << block_height_bit;
-    const u32 image_width_in_gobs{(width + GOB_SIZE_X - 1) / GOB_SIZE_X};
-    std::size_t count = 0;
-    for (std::size_t y = dst_y; y < height && count < copy_size; ++y) {
-        const std::size_t gob_address_y =
-            (y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs +
-            ((y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE;
-        const u32 swizzled_y = pdep<SWIZZLE_Y_BITS>(static_cast<u32>(y));
-        u32 swizzled_x = pdep<SWIZZLE_X_BITS>(dst_x);
-        for (std::size_t x = dst_x; x < width && count < copy_size;
-             ++x, incrpdep<SWIZZLE_X_BITS, 1>(swizzled_x)) {
-            const std::size_t gob_address =
-                gob_address_y + (x / GOB_SIZE_X) * GOB_SIZE * block_height;
-            const std::size_t swizzled_offset = gob_address + (swizzled_x | swizzled_y);
-            const u8* source_line = source_data + count;
-            u8* dest_addr = swizzle_data + swizzled_offset;
-            count++;
-
-            *dest_addr = *source_line;
-        }
-    }
-}
-
 std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
                           u32 block_height, u32 block_depth) {
     if (tiled) {
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index 31a11708f..e70407692 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -40,7 +40,6 @@ constexpr SwizzleTable MakeSwizzleTable() {
     }
     return table;
 }
-constexpr SwizzleTable SWIZZLE_TABLE = MakeSwizzleTable();
 
 /// Unswizzles a block linear texture into linear memory.
 void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel,
@@ -57,34 +56,14 @@ std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height
                           u32 block_height, u32 block_depth);
 
 /// Copies an untiled subrectangle into a tiled surface.
-void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
-                    u32 bytes_per_pixel, u8* swizzled_data, const u8* unswizzled_data,
-                    u32 block_height_bit, u32 offset_x, u32 offset_y);
+void SwizzleSubrect(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
+                    u32 height, u32 depth, u32 origin_x, u32 origin_y, u32 extent_x, u32 extent_y,
+                    u32 block_height, u32 block_depth, u32 pitch_linear);
 
 /// Copies a tiled subrectangle into a linear surface.
-void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 bytes_per_pixel,
-                      u32 block_height, u32 origin_x, u32 origin_y, u8* output, const u8* input);
-
-/// @brief Swizzles a 2D array of pixels into a 3D texture
-/// @param line_length_in  Number of pixels per line
-/// @param line_count      Number of lines
-/// @param pitch           Number of bytes per line
-/// @param width           Width of the swizzled texture
-/// @param height          Height of the swizzled texture
-/// @param bytes_per_pixel Number of bytes used per pixel
-/// @param block_height    Block height shift
-/// @param block_depth     Block depth shift
-/// @param origin_x        Column offset in pixels of the swizzled texture
-/// @param origin_y        Row offset in pixels of the swizzled texture
-/// @param output          Pointer to the pixels of the swizzled texture
-/// @param input           Pointer to the 2D array of pixels used as input
-/// @pre input and output points to an array large enough to hold the number of bytes used
-void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 height,
-                         u32 bytes_per_pixel, u32 block_height, u32 block_depth, u32 origin_x,
-                         u32 origin_y, u8* output, const u8* input);
-
-void SwizzleKepler(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height,
-                   std::size_t copy_size, const u8* source_data, u8* swizzle_data);
+void UnswizzleSubrect(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel,
+                      u32 width, u32 height, u32 depth, u32 origin_x, u32 origin_y, u32 extent_x,
+                      u32 extent_y, u32 block_height, u32 block_depth, u32 pitch_linear);
 
 /// Obtains the offset of the gob for positions 'dst_x' & 'dst_y'
 u64 GetGOBOffset(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height,
-- 
cgit v1.2.3


From afab6c143cb486c7d14f1509cd04049ad08d3a65 Mon Sep 17 00:00:00 2001
From: Liam White <yteslice@airmail.cc>
Date: Wed, 13 Apr 2022 21:02:55 +0200
Subject: General: Fix compilation for GCC

---
 src/common/address_space.h                         | 10 +++---
 src/common/algorithm.h                             |  2 +-
 src/common/bit_field.h                             | 13 +++-----
 src/common/multi_level_page_table.cpp              |  4 +--
 src/common/multi_level_page_table.inc              |  2 +-
 src/core/hle/service/nvdrv/core/nvmap.cpp          | 38 ++++++++++++++--------
 src/core/hle/service/nvdrv/core/nvmap.h            |  1 +
 .../hle/service/nvdrv/devices/nvhost_as_gpu.cpp    |  3 ++
 src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp |  8 ++---
 .../hle/service/nvdrv/devices/nvhost_nvdec.cpp     |  4 +--
 src/core/hle/service/nvdrv/devices/nvhost_vic.cpp  |  4 +--
 src/core/hle/service/nvdrv/devices/nvmap.cpp       |  2 +-
 src/core/hle/service/nvdrv/nvdrv.h                 |  1 +
 src/core/hle/service/vi/vi.cpp                     |  1 +
 src/shader_recompiler/ir_opt/texture_pass.cpp      |  2 +-
 src/video_core/buffer_cache/buffer_cache.h         |  3 +-
 16 files changed, 56 insertions(+), 42 deletions(-)

(limited to 'src/common')

diff --git a/src/common/address_space.h b/src/common/address_space.h
index fd2f32b7d..8e13935af 100644
--- a/src/common/address_space.h
+++ b/src/common/address_space.h
@@ -22,7 +22,8 @@ struct EmptyStruct {};
  */
 template <typename VaType, VaType UnmappedVa, typename PaType, PaType UnmappedPa,
           bool PaContigSplit, size_t AddressSpaceBits, typename ExtraBlockInfo = EmptyStruct>
-requires AddressSpaceValid<VaType, AddressSpaceBits> class FlatAddressSpaceMap {
+requires AddressSpaceValid<VaType, AddressSpaceBits>
+class FlatAddressSpaceMap {
 private:
     std::function<void(VaType, VaType)>
         unmapCallback{}; //!< Callback called when the mappings in an region have changed
@@ -40,8 +41,8 @@ protected:
 
         Block() = default;
 
-        Block(VaType virt, PaType phys, ExtraBlockInfo extraInfo)
-            : virt(virt), phys(phys), extraInfo(extraInfo) {}
+        Block(VaType virt_, PaType phys_, ExtraBlockInfo extraInfo_)
+            : virt(virt_), phys(phys_), extraInfo(extraInfo_) {}
 
         constexpr bool Valid() {
             return virt != UnmappedVa;
@@ -102,7 +103,8 @@ public:
  * initial, fast linear pass and a subsequent slower pass that iterates until it finds a free block
  */
 template <typename VaType, VaType UnmappedVa, size_t AddressSpaceBits>
-requires AddressSpaceValid<VaType, AddressSpaceBits> class FlatAllocator
+requires AddressSpaceValid<VaType, AddressSpaceBits>
+class FlatAllocator
     : public FlatAddressSpaceMap<VaType, UnmappedVa, bool, false, false, AddressSpaceBits> {
 private:
     using Base = FlatAddressSpaceMap<VaType, UnmappedVa, bool, false, false, AddressSpaceBits>;
diff --git a/src/common/algorithm.h b/src/common/algorithm.h
index 055dca142..c27c9241d 100644
--- a/src/common/algorithm.h
+++ b/src/common/algorithm.h
@@ -27,7 +27,7 @@ template <class ForwardIt, class T, class Compare = std::less<>>
 template <typename T, typename Func, typename... Args>
 T FoldRight(T initial_value, Func&& func, Args&&... args) {
     T value{initial_value};
-    const auto high_func = [&value, &func]<typename T>(T x) { value = func(value, x); };
+    const auto high_func = [&value, &func]<typename U>(U x) { value = func(value, x); };
     (std::invoke(high_func, std::forward<Args>(args)), ...);
     return value;
 }
diff --git a/src/common/bit_field.h b/src/common/bit_field.h
index 368b7b98c..7e1df62b1 100644
--- a/src/common/bit_field.h
+++ b/src/common/bit_field.h
@@ -127,14 +127,11 @@ public:
         }
     }
 
-    BitField(T val) {
-        Assign(val);
-    }
-
-    BitField& operator=(T val) {
-        Assign(val);
-        return *this;
-    }
+    // This constructor and assignment operator might be considered ambiguous:
+    // Would they initialize the storage or just the bitfield?
+    // Hence, delete them. Use the Assign method to set bitfield values!
+    BitField(T val) = delete;
+    BitField& operator=(T val) = delete;
 
     constexpr BitField() noexcept = default;
 
diff --git a/src/common/multi_level_page_table.cpp b/src/common/multi_level_page_table.cpp
index aed04d0b5..3a7a75aa7 100644
--- a/src/common/multi_level_page_table.cpp
+++ b/src/common/multi_level_page_table.cpp
@@ -1,8 +1,6 @@
 #include "common/multi_level_page_table.inc"
 
 namespace Common {
-template class Common::MultiLevelPageTable<GPUVAddr>;
-template class Common::MultiLevelPageTable<VAddr>;
-template class Common::MultiLevelPageTable<PAddr>;
+template class Common::MultiLevelPageTable<u64>;
 template class Common::MultiLevelPageTable<u32>;
 } // namespace Common
diff --git a/src/common/multi_level_page_table.inc b/src/common/multi_level_page_table.inc
index 9a68cad93..4def6dba8 100644
--- a/src/common/multi_level_page_table.inc
+++ b/src/common/multi_level_page_table.inc
@@ -30,7 +30,7 @@ MultiLevelPageTable<BaseAddr>::MultiLevelPageTable(std::size_t address_space_bit
 #ifdef _WIN32
     void* base{VirtualAlloc(nullptr, alloc_size, MEM_RESERVE, PAGE_READWRITE)};
 #else
-    void* base{mmap(nullptr, alloc_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)};
+    void* base{mmap(nullptr, alloc_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)};
 
     if (base == MAP_FAILED) {
         base = nullptr;
diff --git a/src/core/hle/service/nvdrv/core/nvmap.cpp b/src/core/hle/service/nvdrv/core/nvmap.cpp
index 86d825af9..b02dbb9c9 100644
--- a/src/core/hle/service/nvdrv/core/nvmap.cpp
+++ b/src/core/hle/service/nvdrv/core/nvmap.cpp
@@ -13,7 +13,8 @@
 using Core::Memory::YUZU_PAGESIZE;
 
 namespace Service::Nvidia::NvCore {
-NvMap::Handle::Handle(u64 size, Id id) : size(size), aligned_size(size), orig_size(size), id(id) {
+NvMap::Handle::Handle(u64 size_, Id id_)
+    : size(size_), aligned_size(size), orig_size(size), id(id_) {
     flags.raw = 0;
 }
 
@@ -21,19 +22,21 @@ NvResult NvMap::Handle::Alloc(Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress)
     std::scoped_lock lock(mutex);
 
     // Handles cannot be allocated twice
-    if (allocated)
+    if (allocated) {
         return NvResult::AccessDenied;
+    }
 
     flags = pFlags;
     kind = pKind;
     align = pAlign < YUZU_PAGESIZE ? YUZU_PAGESIZE : pAlign;
 
     // This flag is only applicable for handles with an address passed
-    if (pAddress)
-        flags.keep_uncached_after_free = 0;
-    else
+    if (pAddress) {
+        flags.keep_uncached_after_free.Assign(0);
+    } else {
         LOG_CRITICAL(Service_NVDRV,
                      "Mapping nvmap handles without a CPU side address is unimplemented!");
+    }
 
     size = Common::AlignUp(size, YUZU_PAGESIZE);
     aligned_size = Common::AlignUp(size, align);
@@ -48,17 +51,19 @@ NvResult NvMap::Handle::Alloc(Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress)
 
 NvResult NvMap::Handle::Duplicate(bool internal_session) {
     // Unallocated handles cannot be duplicated as duplication requires memory accounting (in HOS)
-    if (!allocated) [[unlikely]]
+    if (!allocated) [[unlikely]] {
         return NvResult::BadValue;
+    }
 
     std::scoped_lock lock(mutex);
 
     // If we internally use FromId the duplication tracking of handles won't work accurately due to
     // us not implementing per-process handle refs.
-    if (internal_session)
+    if (internal_session) {
         internal_dupes++;
-    else
+    } else {
         dupes++;
+    }
 
     return NvResult::Success;
 }
@@ -92,8 +97,9 @@ bool NvMap::TryRemoveHandle(const Handle& handle_description) {
         std::scoped_lock lock(handles_lock);
 
         auto it{handles.find(handle_description.id)};
-        if (it != handles.end())
+        if (it != handles.end()) {
             handles.erase(it);
+        }
 
         return true;
     } else {
@@ -102,8 +108,9 @@ bool NvMap::TryRemoveHandle(const Handle& handle_description) {
 }
 
 NvResult NvMap::CreateHandle(u64 size, std::shared_ptr<NvMap::Handle>& result_out) {
-    if (!size) [[unlikely]]
+    if (!size) [[unlikely]] {
         return NvResult::BadValue;
+    }
 
     u32 id{next_handle_id.fetch_add(HandleIdIncrement, std::memory_order_relaxed)};
     auto handle_description{std::make_shared<Handle>(size, id)};
@@ -133,8 +140,9 @@ VAddr NvMap::GetHandleAddress(Handle::Id handle) {
 
 u32 NvMap::PinHandle(NvMap::Handle::Id handle) {
     auto handle_description{GetHandle(handle)};
-    if (!handle_description) [[unlikely]]
+    if (!handle_description) [[unlikely]] {
         return 0;
+    }
 
     std::scoped_lock lock(handle_description->mutex);
     if (!handle_description->pins) {
@@ -183,8 +191,9 @@ u32 NvMap::PinHandle(NvMap::Handle::Id handle) {
 
 void NvMap::UnpinHandle(Handle::Id handle) {
     auto handle_description{GetHandle(handle)};
-    if (!handle_description)
+    if (!handle_description) {
         return;
+    }
 
     std::scoped_lock lock(handle_description->mutex);
     if (--handle_description->pins < 0) {
@@ -226,12 +235,13 @@ std::optional<NvMap::FreeInfo> NvMap::FreeHandle(Handle::Id handle, bool interna
 
         // Try to remove the shared ptr to the handle from the map, if nothing else is using the
         // handle then it will now be freed when `handle_description` goes out of scope
-        if (TryRemoveHandle(*handle_description))
+        if (TryRemoveHandle(*handle_description)) {
             LOG_DEBUG(Service_NVDRV, "Removed nvmap handle: {}", handle);
-        else
+        } else {
             LOG_DEBUG(Service_NVDRV,
                       "Tried to free nvmap handle: {} but didn't as it still has duplicates",
                       handle);
+        }
 
         freeInfo = {
             .address = handle_description->address,
diff --git a/src/core/hle/service/nvdrv/core/nvmap.h b/src/core/hle/service/nvdrv/core/nvmap.h
index 4f37dcf43..1082bb58d 100644
--- a/src/core/hle/service/nvdrv/core/nvmap.h
+++ b/src/core/hle/service/nvdrv/core/nvmap.h
@@ -5,6 +5,7 @@
 
 #pragma once
 
+#include <atomic>
 #include <list>
 #include <memory>
 #include <mutex>
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
index d95a88393..d1beefba6 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -188,6 +188,7 @@ NvResult nvhost_as_gpu::AllocateSpace(const std::vector<u8>& input, std::vector<
 
     allocation_map[params.offset] = {
         .size = size,
+        .mappings{},
         .page_size = params.page_size,
         .sparse = (params.flags & MappingFlags::Sparse) != MappingFlags::None,
         .big_pages = params.page_size != VM::YUZU_PAGESIZE,
@@ -474,11 +475,13 @@ void nvhost_as_gpu::GetVARegionsImpl(IoctlGetVaRegions& params) {
         VaRegion{
             .offset = vm.small_page_allocator->vaStart << VM::PAGE_SIZE_BITS,
             .page_size = VM::YUZU_PAGESIZE,
+            ._pad0_{},
             .pages = vm.small_page_allocator->vaLimit - vm.small_page_allocator->vaStart,
         },
         VaRegion{
             .offset = vm.big_page_allocator->vaStart << vm.big_page_size_bits,
             .page_size = vm.big_page_size,
+            ._pad0_{},
             .pages = vm.big_page_allocator->vaLimit - vm.big_page_allocator->vaStart,
         },
     };
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
index a84e4d425..7fffb8e48 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
@@ -204,12 +204,12 @@ NvResult nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector
 
     event.wait_handle =
         host1x_syncpoint_manager.RegisterHostAction(fence_id, target_value, [this, slot]() {
-            auto& event = events[slot];
-            if (event.status.exchange(EventState::Signalling, std::memory_order_acq_rel) ==
+            auto& event_ = events[slot];
+            if (event_.status.exchange(EventState::Signalling, std::memory_order_acq_rel) ==
                 EventState::Waiting) {
-                event.kevent->GetWritableEvent().Signal();
+                event_.kevent->GetWritableEvent().Signal();
             }
-            event.status.store(EventState::Signalled, std::memory_order_release);
+            event_.status.store(EventState::Signalled, std::memory_order_release);
         });
     return NvResult::Timeout;
 }
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
index 5e3820085..fed537039 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
@@ -12,8 +12,8 @@ namespace Service::Nvidia::Devices {
 
 u32 nvhost_nvdec::next_id{};
 
-nvhost_nvdec::nvhost_nvdec(Core::System& system_, NvCore::Container& core)
-    : nvhost_nvdec_common{system_, core, NvCore::ChannelType::NvDec} {}
+nvhost_nvdec::nvhost_nvdec(Core::System& system_, NvCore::Container& core_)
+    : nvhost_nvdec_common{system_, core_, NvCore::ChannelType::NvDec} {}
 nvhost_nvdec::~nvhost_nvdec() = default;
 
 NvResult nvhost_nvdec::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
index 490e399f4..2e4ff988c 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
@@ -11,8 +11,8 @@ namespace Service::Nvidia::Devices {
 
 u32 nvhost_vic::next_id{};
 
-nvhost_vic::nvhost_vic(Core::System& system_, NvCore::Container& core)
-    : nvhost_nvdec_common{system_, core, NvCore::ChannelType::VIC} {}
+nvhost_vic::nvhost_vic(Core::System& system_, NvCore::Container& core_)
+    : nvhost_nvdec_common{system_, core_, NvCore::ChannelType::VIC} {}
 
 nvhost_vic::~nvhost_vic() = default;
 
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.cpp b/src/core/hle/service/nvdrv/devices/nvmap.cpp
index 992c117f1..f84fc8c37 100644
--- a/src/core/hle/service/nvdrv/devices/nvmap.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvmap.cpp
@@ -269,7 +269,7 @@ NvResult nvmap::IocFree(const std::vector<u8>& input, std::vector<u8>& output) {
         params.address = freeInfo->address;
         params.size = static_cast<u32>(freeInfo->size);
         params.flags.raw = 0;
-        params.flags.map_uncached = freeInfo->was_uncached;
+        params.flags.map_uncached.Assign(freeInfo->was_uncached);
     } else {
         // This is possible when there's internel dups or other duplicates.
     }
diff --git a/src/core/hle/service/nvdrv/nvdrv.h b/src/core/hle/service/nvdrv/nvdrv.h
index 31c45236e..b26254753 100644
--- a/src/core/hle/service/nvdrv/nvdrv.h
+++ b/src/core/hle/service/nvdrv/nvdrv.h
@@ -6,6 +6,7 @@
 #pragma once
 
 #include <functional>
+#include <list>
 #include <memory>
 #include <string>
 #include <unordered_map>
diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp
index f083811ec..9c917cacf 100644
--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -58,6 +58,7 @@ static_assert(sizeof(DisplayInfo) == 0x60, "DisplayInfo has wrong size");
 class NativeWindow final {
 public:
     constexpr explicit NativeWindow(u32 id_) : id{id_} {}
+    constexpr explicit NativeWindow(const NativeWindow& other) = default;
 
 private:
     const u32 magic = 2;
diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp
index 0726d4d21..e8be58357 100644
--- a/src/shader_recompiler/ir_opt/texture_pass.cpp
+++ b/src/shader_recompiler/ir_opt/texture_pass.cpp
@@ -269,7 +269,7 @@ std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst, Environme
         }
         std::optional lhs{Track(op1, env)};
         if (lhs) {
-            lhs->shift_left = std::countr_zero(op2.U32());
+            lhs->shift_left = static_cast<u32>(std::countr_zero(op2.U32()));
         }
         return lhs;
         break;
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 2e616cee4..8e26b3f95 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -1323,7 +1323,8 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) {
         return;
     }
     if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) {
-        address_size = gpu_memory->MaxContinousRange(gpu_addr_begin, address_size);
+        address_size =
+            static_cast<u32>(gpu_memory->MaxContinousRange(gpu_addr_begin, address_size));
     }
     const u32 size = address_size; // TODO: Analyze stride and number of vertices
     vertex_buffers[index] = Binding{
-- 
cgit v1.2.3


From 1a9b71b1c6a5b6fb2a41fc485a986e9c505b2856 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Thu, 16 Jun 2022 02:00:29 +0200
Subject: Common: Fix variable shadowing.

---
 src/common/address_space.inc | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'src/common')

diff --git a/src/common/address_space.inc b/src/common/address_space.inc
index 907c55d88..e1241d099 100644
--- a/src/common/address_space.inc
+++ b/src/common/address_space.inc
@@ -30,9 +30,9 @@
         FlatAllocator<VaType, UnmappedVa, AddressSpaceBits>
 
 namespace Common {
-MAP_MEMBER_CONST()::FlatAddressSpaceMap(VaType vaLimit,
-                                        std::function<void(VaType, VaType)> unmapCallback)
-    : unmapCallback(std::move(unmapCallback)), vaLimit(vaLimit) {
+MAP_MEMBER_CONST()::FlatAddressSpaceMap(VaType vaLimit_,
+                                        std::function<void(VaType, VaType)> unmapCallback_)
+    : unmapCallback(std::move(unmapCallback_)), vaLimit(vaLimit_) {
     if (vaLimit > VaMaximum)
         UNREACHABLE_MSG("Invalid VA limit!");
 }
@@ -261,8 +261,8 @@ MAP_MEMBER(void)::UnmapLocked(VaType virt, VaType size) {
         unmapCallback(virt, size);
 }
 
-ALLOC_MEMBER_CONST()::FlatAllocator(VaType vaStart, VaType vaLimit)
-    : Base(vaLimit), currentLinearAllocEnd(vaStart), vaStart(vaStart) {}
+ALLOC_MEMBER_CONST()::FlatAllocator(VaType vaStart_, VaType vaLimit)
+    : Base(vaLimit), currentLinearAllocEnd(vaStart_), vaStart(vaStart_) {}
 
 ALLOC_MEMBER(VaType)::Allocate(VaType size) {
     std::scoped_lock lock(this->blockMutex);
-- 
cgit v1.2.3


From fe24c65153349d3a759a2eef02ec703851a96847 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Thu, 16 Jun 2022 02:12:21 +0200
Subject: General: Fix clang format.

---
 src/common/address_space.inc                       |  4 ++--
 src/video_core/control/channel_state_cache.h       |  1 +
 src/video_core/host1x/codecs/vp9.cpp               |  5 +++--
 src/video_core/host1x/syncpoint_manager.h          |  4 ++--
 src/video_core/host1x/vic.cpp                      |  4 ++--
 src/video_core/memory_manager.cpp                  |  1 -
 src/video_core/renderer_vulkan/renderer_vulkan.cpp | 13 ++++---------
 7 files changed, 14 insertions(+), 18 deletions(-)

(limited to 'src/common')

diff --git a/src/common/address_space.inc b/src/common/address_space.inc
index e1241d099..7cfbb150b 100644
--- a/src/common/address_space.inc
+++ b/src/common/address_space.inc
@@ -261,8 +261,8 @@ MAP_MEMBER(void)::UnmapLocked(VaType virt, VaType size) {
         unmapCallback(virt, size);
 }
 
-ALLOC_MEMBER_CONST()::FlatAllocator(VaType vaStart_, VaType vaLimit)
-    : Base(vaLimit), currentLinearAllocEnd(vaStart_), vaStart(vaStart_) {}
+ALLOC_MEMBER_CONST()::FlatAllocator(VaType vaStart_, VaType vaLimit_)
+    : Base(vaLimit_), currentLinearAllocEnd(vaStart_), vaStart(vaStart_) {}
 
 ALLOC_MEMBER(VaType)::Allocate(VaType size) {
     std::scoped_lock lock(this->blockMutex);
diff --git a/src/video_core/control/channel_state_cache.h b/src/video_core/control/channel_state_cache.h
index dbf833de7..102947adb 100644
--- a/src/video_core/control/channel_state_cache.h
+++ b/src/video_core/control/channel_state_cache.h
@@ -9,6 +9,7 @@
 #include <mutex>
 #include <optional>
 #include <unordered_map>
+#include <vector>
 
 #include "common/common_types.h"
 
diff --git a/src/video_core/host1x/codecs/vp9.cpp b/src/video_core/host1x/codecs/vp9.cpp
index 667aadc6a..cf40c9012 100644
--- a/src/video_core/host1x/codecs/vp9.cpp
+++ b/src/video_core/host1x/codecs/vp9.cpp
@@ -382,8 +382,9 @@ Vp9FrameContainer VP9::GetCurrentFrame(const Host1x::NvdecCommon::NvdecRegisters
         // gpu.SyncGuestHost(); epic, why?
         current_frame.info = GetVp9PictureInfo(state);
         current_frame.bit_stream.resize(current_frame.info.bitstream_size);
-        host1x.MemoryManager().ReadBlock(state.frame_bitstream_offset, current_frame.bit_stream.data(),
-                                      current_frame.info.bitstream_size);
+        host1x.MemoryManager().ReadBlock(state.frame_bitstream_offset,
+                                         current_frame.bit_stream.data(),
+                                         current_frame.info.bitstream_size);
     }
     if (!next_frame.bit_stream.empty()) {
         Vp9FrameContainer temp{
diff --git a/src/video_core/host1x/syncpoint_manager.h b/src/video_core/host1x/syncpoint_manager.h
index 0ecc040ab..440b1508a 100644
--- a/src/video_core/host1x/syncpoint_manager.h
+++ b/src/video_core/host1x/syncpoint_manager.h
@@ -49,9 +49,9 @@ public:
                               expected_value, func);
     }
 
-    void DeregisterGuestAction(u32 syncpoint_id,ActionHandle& handle);
+    void DeregisterGuestAction(u32 syncpoint_id, ActionHandle& handle);
 
-    void DeregisterHostAction(u32 syncpoint_id,ActionHandle& handle);
+    void DeregisterHostAction(u32 syncpoint_id, ActionHandle& handle);
 
     void IncrementGuest(u32 syncpoint_id);
 
diff --git a/src/video_core/host1x/vic.cpp b/src/video_core/host1x/vic.cpp
index b9ac41529..ac0b7d20e 100644
--- a/src/video_core/host1x/vic.cpp
+++ b/src/video_core/host1x/vic.cpp
@@ -157,8 +157,8 @@ void Vic::WriteRGBFrame(const AVFrame* frame, const VicConfig& config) {
         const auto size = Texture::CalculateSize(true, 4, width, height, 1, block_height, 0);
         luma_buffer.resize(size);
         std::span<const u8> frame_buff(converted_frame_buf_addr, 4 * width * height);
-        Texture::SwizzleSubrect(luma_buffer, frame_buff, 4, width, height, 1,
-                                0, 0, width, height, block_height, 0, width * 4);
+        Texture::SwizzleSubrect(luma_buffer, frame_buff, 4, width, height, 1, 0, 0, width, height,
+                                block_height, 0, width * 4);
 
         host1x.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), size);
     } else {
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 3cb2d9224..cca401c74 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -619,7 +619,6 @@ bool MemoryManager::IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const
 }
 
 bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const {
-    std::optional<VAddr> old_page_addr{};
     bool result{true};
     auto fail = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset,
                     [[maybe_unused]] std::size_t copy_amount) {
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 51d9e8f68..d8131232a 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -95,19 +95,14 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
                                Core::Frontend::EmuWindow& emu_window,
                                Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
                                std::unique_ptr<Core::Frontend::GraphicsContext> context_) try
-    : RendererBase(emu_window, std::move(context_)),
-      telemetry_session(telemetry_session_),
-      cpu_memory(cpu_memory_),
-      gpu(gpu_),
-      library(OpenLibrary()),
+    : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_),
+      cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()),
       instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
                               true, Settings::values.renderer_debug.GetValue())),
       debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr),
       surface(CreateSurface(instance, render_window)),
-      device(CreateDevice(instance, dld, *surface)),
-      memory_allocator(device, false),
-      state_tracker(),
-      scheduler(device, state_tracker),
+      device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false),
+      state_tracker(), scheduler(device, state_tracker),
       swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width,
                 render_window.GetFramebufferLayout().height, false),
       blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler,
-- 
cgit v1.2.3


From fedd983f96bcbcc0c39f651db1cca0503d582fd9 Mon Sep 17 00:00:00 2001
From: Morph <39850852+Morph1984@users.noreply.github.com>
Date: Wed, 29 Jun 2022 19:27:49 -0400
Subject: general: Format licenses as per SPDX guidelines

---
 src/common/address_space.cpp                          | 5 ++---
 src/common/address_space.h                            | 5 ++---
 src/common/address_space.inc                          | 4 ++--
 src/common/multi_level_page_table.cpp                 | 3 +++
 src/common/multi_level_page_table.h                   | 5 ++---
 src/common/multi_level_page_table.inc                 | 5 ++---
 src/core/hle/service/nvdrv/core/container.cpp         | 7 +++----
 src/core/hle/service/nvdrv/core/container.h           | 7 +++----
 src/core/hle/service/nvdrv/core/nvmap.cpp             | 7 +++----
 src/core/hle/service/nvdrv/core/nvmap.h               | 7 +++----
 src/core/hle/service/nvdrv/core/syncpoint_manager.cpp | 7 +++----
 src/core/hle/service/nvdrv/core/syncpoint_manager.h   | 7 +++----
 src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp  | 7 +++----
 src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h    | 7 +++----
 src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp    | 7 +++----
 src/core/hle/service/nvdrv/devices/nvhost_ctrl.h      | 7 +++----
 src/core/hle/service/nvdrv/nvdata.h                   | 7 +++----
 src/core/hle/service/nvdrv/nvdrv.cpp                  | 7 +++----
 src/core/hle/service/nvdrv/nvdrv.h                    | 7 +++----
 src/core/hle/service/nvdrv/nvdrv_interface.cpp        | 7 +++----
 src/video_core/control/channel_state.cpp              | 5 ++---
 src/video_core/control/channel_state.h                | 5 ++---
 src/video_core/control/channel_state_cache.cpp        | 3 +++
 src/video_core/control/channel_state_cache.h          | 5 ++---
 src/video_core/control/channel_state_cache.inc        | 2 ++
 src/video_core/control/scheduler.cpp                  | 5 ++---
 src/video_core/control/scheduler.h                    | 5 ++---
 src/video_core/engines/puller.cpp                     | 5 ++---
 src/video_core/engines/puller.h                       | 5 ++---
 src/video_core/host1x/control.cpp                     | 5 ++---
 src/video_core/host1x/control.h                       | 7 +++----
 src/video_core/host1x/host1x.cpp                      | 5 ++---
 src/video_core/host1x/host1x.h                        | 5 ++---
 src/video_core/host1x/syncpoint_manager.cpp           | 5 ++---
 src/video_core/host1x/syncpoint_manager.h             | 5 ++---
 src/video_core/texture_cache/texture_cache.cpp        | 5 ++---
 src/video_core/texture_cache/texture_cache.h          | 6 ++----
 src/video_core/texture_cache/texture_cache_base.h     | 6 ++----
 38 files changed, 93 insertions(+), 121 deletions(-)

(limited to 'src/common')

diff --git a/src/common/address_space.cpp b/src/common/address_space.cpp
index 6db85be87..866e78dbe 100644
--- a/src/common/address_space.cpp
+++ b/src/common/address_space.cpp
@@ -1,6 +1,5 @@
-// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
-// Licensed under GPLv3 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors
+// SPDX-License-Identifier: GPL-3.0-or-later
 
 #include "common/address_space.inc"
 
diff --git a/src/common/address_space.h b/src/common/address_space.h
index 8e13935af..5b3832f07 100644
--- a/src/common/address_space.h
+++ b/src/common/address_space.h
@@ -1,6 +1,5 @@
-// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
-// Licensed under GPLv3 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors
+// SPDX-License-Identifier: GPL-3.0-or-later
 
 #pragma once
 
diff --git a/src/common/address_space.inc b/src/common/address_space.inc
index 7cfbb150b..a063782b3 100644
--- a/src/common/address_space.inc
+++ b/src/common/address_space.inc
@@ -1,5 +1,5 @@
-// SPDX-License-Identifier: GPLv3 or later
-// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
+// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors
+// SPDX-License-Identifier: GPL-3.0-or-later
 
 #include "common/address_space.h"
 #include "common/assert.h"
diff --git a/src/common/multi_level_page_table.cpp b/src/common/multi_level_page_table.cpp
index 3a7a75aa7..46e362f3b 100644
--- a/src/common/multi_level_page_table.cpp
+++ b/src/common/multi_level_page_table.cpp
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
 #include "common/multi_level_page_table.inc"
 
 namespace Common {
diff --git a/src/common/multi_level_page_table.h b/src/common/multi_level_page_table.h
index dde1cc962..08092c89a 100644
--- a/src/common/multi_level_page_table.h
+++ b/src/common/multi_level_page_table.h
@@ -1,6 +1,5 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
 
 #pragma once
 
diff --git a/src/common/multi_level_page_table.inc b/src/common/multi_level_page_table.inc
index 4def6dba8..8ac506fa0 100644
--- a/src/common/multi_level_page_table.inc
+++ b/src/common/multi_level_page_table.inc
@@ -1,6 +1,5 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
 
 #ifdef _WIN32
 #include <windows.h>
diff --git a/src/core/hle/service/nvdrv/core/container.cpp b/src/core/hle/service/nvdrv/core/container.cpp
index 4175d3d9c..d2a632646 100644
--- a/src/core/hle/service/nvdrv/core/container.cpp
+++ b/src/core/hle/service/nvdrv/core/container.cpp
@@ -1,7 +1,6 @@
-// SPDX-FileCopyrightText: 2022 yuzu emulator team and Skyline Team and Contributors
-// (https://github.com/skyline-emu/)
-// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3
-// or any later version Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
+// SPDX-FileCopyrightText: 2022 Skyline Team and Contributors
+// SPDX-License-Identifier: GPL-3.0-or-later
 
 #include "core/hle/service/nvdrv/core/container.h"
 #include "core/hle/service/nvdrv/core/nvmap.h"
diff --git a/src/core/hle/service/nvdrv/core/container.h b/src/core/hle/service/nvdrv/core/container.h
index e069ade4e..5c8b95803 100644
--- a/src/core/hle/service/nvdrv/core/container.h
+++ b/src/core/hle/service/nvdrv/core/container.h
@@ -1,7 +1,6 @@
-// SPDX-FileCopyrightText: 2022 yuzu emulator team and Skyline Team and Contributors
-// (https://github.com/skyline-emu/)
-// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3
-// or any later version Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
+// SPDX-FileCopyrightText: 2022 Skyline Team and Contributors
+// SPDX-License-Identifier: GPL-3.0-or-later
 
 #pragma once
 
diff --git a/src/core/hle/service/nvdrv/core/nvmap.cpp b/src/core/hle/service/nvdrv/core/nvmap.cpp
index 9b21da6b1..e63ec7717 100644
--- a/src/core/hle/service/nvdrv/core/nvmap.cpp
+++ b/src/core/hle/service/nvdrv/core/nvmap.cpp
@@ -1,7 +1,6 @@
-// SPDX-FileCopyrightText: 2022 yuzu emulator team and Skyline Team and Contributors
-// (https://github.com/skyline-emu/)
-// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3
-// or any later version Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
+// SPDX-FileCopyrightText: 2022 Skyline Team and Contributors
+// SPDX-License-Identifier: GPL-3.0-or-later
 
 #include "common/alignment.h"
 #include "common/assert.h"
diff --git a/src/core/hle/service/nvdrv/core/nvmap.h b/src/core/hle/service/nvdrv/core/nvmap.h
index ef2df3ad7..6d6dac023 100644
--- a/src/core/hle/service/nvdrv/core/nvmap.h
+++ b/src/core/hle/service/nvdrv/core/nvmap.h
@@ -1,7 +1,6 @@
-// SPDX-FileCopyrightText: 2022 yuzu emulator team and Skyline Team and Contributors
-// (https://github.com/skyline-emu/)
-// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3
-// or any later version Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
+// SPDX-FileCopyrightText: 2022 Skyline Team and Contributors
+// SPDX-License-Identifier: GPL-3.0-or-later
 
 #pragma once
 
diff --git a/src/core/hle/service/nvdrv/core/syncpoint_manager.cpp b/src/core/hle/service/nvdrv/core/syncpoint_manager.cpp
index fc4ff3c2f..0bb2aec97 100644
--- a/src/core/hle/service/nvdrv/core/syncpoint_manager.cpp
+++ b/src/core/hle/service/nvdrv/core/syncpoint_manager.cpp
@@ -1,7 +1,6 @@
-// SPDX-FileCopyrightText: 2022 yuzu emulator team and Skyline Team and Contributors
-// (https://github.com/skyline-emu/)
-// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3
-// or any later version Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
+// SPDX-FileCopyrightText: 2022 Skyline Team and Contributors
+// SPDX-License-Identifier: GPL-3.0-or-later
 
 #include "common/assert.h"
 #include "core/hle/service/nvdrv/core/syncpoint_manager.h"
diff --git a/src/core/hle/service/nvdrv/core/syncpoint_manager.h b/src/core/hle/service/nvdrv/core/syncpoint_manager.h
index da456f206..6b71cd33d 100644
--- a/src/core/hle/service/nvdrv/core/syncpoint_manager.h
+++ b/src/core/hle/service/nvdrv/core/syncpoint_manager.h
@@ -1,7 +1,6 @@
-// SPDX-FileCopyrightText: 2022 yuzu emulator team and Skyline Team and Contributors
-// (https://github.com/skyline-emu/)
-// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3
-// or any later version Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
+// SPDX-FileCopyrightText: 2022 Skyline Team and Contributors
+// SPDX-License-Identifier: GPL-3.0-or-later
 
 #pragma once
 
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
index d1beefba6..b48f7fcaf 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -1,7 +1,6 @@
-// SPDX-FileCopyrightText: 2021 yuzu emulator team, Skyline Team and Contributors
-// (https://github.com/skyline-emu/)
-// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3
-// or any later version Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors
+// SPDX-License-Identifier: GPL-3.0-or-later
 
 #include <cstring>
 #include <utility>
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
index 12e881f0d..86fe71c75 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
@@ -1,7 +1,6 @@
-// SPDX-FileCopyrightText: 2021 yuzu emulator team, Skyline Team and Contributors
-// (https://github.com/skyline-emu/)
-// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3
-// or any later version Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors
+// SPDX-License-Identifier: GPL-3.0-or-later
 
 #pragma once
 
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
index 7fffb8e48..5bee4a3d3 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
@@ -1,7 +1,6 @@
-// SPDX-FileCopyrightText: 2021 yuzu emulator team, Skyline Team and Contributors
-// (https://github.com/skyline-emu/)
-// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3
-// or any later version Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors
+// SPDX-License-Identifier: GPL-3.0-or-later
 
 #include <bit>
 #include <cstdlib>
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
index f511c0296..4aa738b41 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
@@ -1,7 +1,6 @@
-// SPDX-FileCopyrightText: 2021 yuzu emulator team, Skyline Team and Contributors
-// (https://github.com/skyline-emu/)
-// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3
-// or any later version Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors
+// SPDX-License-Identifier: GPL-3.0-or-later
 
 #pragma once
 
diff --git a/src/core/hle/service/nvdrv/nvdata.h b/src/core/hle/service/nvdrv/nvdata.h
index 2ee91f9c4..0e2f47075 100644
--- a/src/core/hle/service/nvdrv/nvdata.h
+++ b/src/core/hle/service/nvdrv/nvdata.h
@@ -1,7 +1,6 @@
-// SPDX-FileCopyrightText: 2021 yuzu emulator team and Skyline Team and Contributors
-// (https://github.com/skyline-emu/)
-// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3
-// or any later version Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors
+// SPDX-License-Identifier: GPL-3.0-or-later
 
 #pragma once
 
diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp
index 20bf24ec8..7929443d2 100644
--- a/src/core/hle/service/nvdrv/nvdrv.cpp
+++ b/src/core/hle/service/nvdrv/nvdrv.cpp
@@ -1,7 +1,6 @@
-// SPDX-FileCopyrightText: 2021 yuzu emulator team and Skyline Team and Contributors
-// (https://github.com/skyline-emu/)
-// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3
-// or any later version Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors
+// SPDX-License-Identifier: GPL-3.0-or-later
 
 #include <utility>
 
diff --git a/src/core/hle/service/nvdrv/nvdrv.h b/src/core/hle/service/nvdrv/nvdrv.h
index 22836529d..a2aeb80b4 100644
--- a/src/core/hle/service/nvdrv/nvdrv.h
+++ b/src/core/hle/service/nvdrv/nvdrv.h
@@ -1,7 +1,6 @@
-// SPDX-FileCopyrightText: 2021 yuzu emulator team and Skyline Team and Contributors
-// (https://github.com/skyline-emu/)
-// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3
-// or any later version Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors
+// SPDX-License-Identifier: GPL-3.0-or-later
 
 #pragma once
 
diff --git a/src/core/hle/service/nvdrv/nvdrv_interface.cpp b/src/core/hle/service/nvdrv/nvdrv_interface.cpp
index 5e50a04e8..edbdfee43 100644
--- a/src/core/hle/service/nvdrv/nvdrv_interface.cpp
+++ b/src/core/hle/service/nvdrv/nvdrv_interface.cpp
@@ -1,7 +1,6 @@
-// SPDX-FileCopyrightText: 2021 yuzu emulator team and Skyline Team and Contributors
-// (https://github.com/skyline-emu/)
-// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3
-// or any later version Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors
+// SPDX-License-Identifier: GPL-3.0-or-later
 
 #include <cinttypes>
 #include "common/logging/log.h"
diff --git a/src/video_core/control/channel_state.cpp b/src/video_core/control/channel_state.cpp
index 3613c4992..b04922ac0 100644
--- a/src/video_core/control/channel_state.cpp
+++ b/src/video_core/control/channel_state.cpp
@@ -1,6 +1,5 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv3 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
 
 #include "common/assert.h"
 #include "video_core/control/channel_state.h"
diff --git a/src/video_core/control/channel_state.h b/src/video_core/control/channel_state.h
index 08a7591e1..305b21cba 100644
--- a/src/video_core/control/channel_state.h
+++ b/src/video_core/control/channel_state.h
@@ -1,6 +1,5 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv3 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
 
 #pragma once
 
diff --git a/src/video_core/control/channel_state_cache.cpp b/src/video_core/control/channel_state_cache.cpp
index ec7ba907c..4ebeb6356 100644
--- a/src/video_core/control/channel_state_cache.cpp
+++ b/src/video_core/control/channel_state_cache.cpp
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
 #include "video_core/control/channel_state_cache.inc"
 
 namespace VideoCommon {
diff --git a/src/video_core/control/channel_state_cache.h b/src/video_core/control/channel_state_cache.h
index 102947adb..5246192a8 100644
--- a/src/video_core/control/channel_state_cache.h
+++ b/src/video_core/control/channel_state_cache.h
@@ -1,6 +1,5 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv3 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
 
 #pragma once
 
diff --git a/src/video_core/control/channel_state_cache.inc b/src/video_core/control/channel_state_cache.inc
index d3ae758b2..460313893 100644
--- a/src/video_core/control/channel_state_cache.inc
+++ b/src/video_core/control/channel_state_cache.inc
@@ -1,3 +1,5 @@
+// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
 
 #include <algorithm>
 
diff --git a/src/video_core/control/scheduler.cpp b/src/video_core/control/scheduler.cpp
index a9bb00aa7..733042690 100644
--- a/src/video_core/control/scheduler.cpp
+++ b/src/video_core/control/scheduler.cpp
@@ -1,6 +1,5 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv3 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
 
 #include <memory>
 
diff --git a/src/video_core/control/scheduler.h b/src/video_core/control/scheduler.h
index c1a773946..305a01e0a 100644
--- a/src/video_core/control/scheduler.h
+++ b/src/video_core/control/scheduler.h
@@ -1,6 +1,5 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv3 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
 
 #pragma once
 
diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp
index c3ed11c13..cca890792 100644
--- a/src/video_core/engines/puller.cpp
+++ b/src/video_core/engines/puller.cpp
@@ -1,6 +1,5 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
 
 #include "common/assert.h"
 #include "common/logging/log.h"
diff --git a/src/video_core/engines/puller.h b/src/video_core/engines/puller.h
index b4619e9a8..d4175ee94 100644
--- a/src/video_core/engines/puller.h
+++ b/src/video_core/engines/puller.h
@@ -1,6 +1,5 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
 
 #pragma once
 
diff --git a/src/video_core/host1x/control.cpp b/src/video_core/host1x/control.cpp
index a81c635ae..dceefdb7f 100644
--- a/src/video_core/host1x/control.cpp
+++ b/src/video_core/host1x/control.cpp
@@ -1,6 +1,5 @@
-// Copyright 2022 yuzu Emulator Project
-// Licensed under GPLv3 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
 
 #include "common/assert.h"
 #include "video_core/host1x/control.h"
diff --git a/src/video_core/host1x/control.h b/src/video_core/host1x/control.h
index 18a9b56c0..e117888a3 100644
--- a/src/video_core/host1x/control.h
+++ b/src/video_core/host1x/control.h
@@ -1,7 +1,6 @@
-// SPDX-FileCopyrightText: 2021 yuzu emulator team and Skyline Team and Contributors
-// (https://github.com/skyline-emu/)
-// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3
-// or any later version Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors
+// SPDX-License-Identifier: GPL-3.0-or-later
 
 #pragma once
 
diff --git a/src/video_core/host1x/host1x.cpp b/src/video_core/host1x/host1x.cpp
index eb00f4855..7c317a85d 100644
--- a/src/video_core/host1x/host1x.cpp
+++ b/src/video_core/host1x/host1x.cpp
@@ -1,6 +1,5 @@
-// Copyright 2022 yuzu Emulator Project
-// Licensed under GPLv3 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
 
 #include "core/core.h"
 #include "video_core/host1x/host1x.h"
diff --git a/src/video_core/host1x/host1x.h b/src/video_core/host1x/host1x.h
index e4b69d75a..7ecf853d9 100644
--- a/src/video_core/host1x/host1x.h
+++ b/src/video_core/host1x/host1x.h
@@ -1,6 +1,5 @@
-// Copyright 2022 yuzu Emulator Project
-// Licensed under GPLv3 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
 
 #pragma once
 
diff --git a/src/video_core/host1x/syncpoint_manager.cpp b/src/video_core/host1x/syncpoint_manager.cpp
index 825bd551e..4471bacae 100644
--- a/src/video_core/host1x/syncpoint_manager.cpp
+++ b/src/video_core/host1x/syncpoint_manager.cpp
@@ -1,6 +1,5 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv3 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
 
 #include "common/microprofile.h"
 #include "video_core/host1x/syncpoint_manager.h"
diff --git a/src/video_core/host1x/syncpoint_manager.h b/src/video_core/host1x/syncpoint_manager.h
index 440b1508a..72220a09a 100644
--- a/src/video_core/host1x/syncpoint_manager.h
+++ b/src/video_core/host1x/syncpoint_manager.h
@@ -1,6 +1,5 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv3 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
 
 #pragma once
 
diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp
index bc905a1a4..8a9a32f44 100644
--- a/src/video_core/texture_cache/texture_cache.cpp
+++ b/src/video_core/texture_cache/texture_cache.cpp
@@ -1,6 +1,5 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv3 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
 
 #include "video_core/control/channel_state_cache.inc"
 #include "video_core/texture_cache/texture_cache_base.h"
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 9a835cefc..eaf4a1c95 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -1,7 +1,5 @@
-// SPDX-FileCopyrightText: 2021 yuzu emulator team
-// (https://github.com/skyline-emu/)
-// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3
-// or any later version Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
 
 #pragma once
 
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index 2f4db5047..2fa8445eb 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -1,7 +1,5 @@
-// SPDX-FileCopyrightText: 2021 yuzu emulator team
-// (https://github.com/skyline-emu/)
-// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3
-// or any later version Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
 
 #pragma once
 
-- 
cgit v1.2.3


From fa342cae227666c861806b9bf63e4286aff1e4d7 Mon Sep 17 00:00:00 2001
From: Morph <39850852+Morph1984@users.noreply.github.com>
Date: Wed, 29 Jun 2022 20:33:04 -0400
Subject: address_space: Address feedback

---
 src/common/address_space.h                         | 105 ++++---
 src/common/address_space.inc                       | 319 +++++++++++----------
 .../hle/service/nvdrv/devices/nvhost_as_gpu.cpp    |   8 +-
 3 files changed, 237 insertions(+), 195 deletions(-)

(limited to 'src/common')

diff --git a/src/common/address_space.h b/src/common/address_space.h
index 5b3832f07..bf649018c 100644
--- a/src/common/address_space.h
+++ b/src/common/address_space.h
@@ -23,9 +23,29 @@ template <typename VaType, VaType UnmappedVa, typename PaType, PaType UnmappedPa
           bool PaContigSplit, size_t AddressSpaceBits, typename ExtraBlockInfo = EmptyStruct>
 requires AddressSpaceValid<VaType, AddressSpaceBits>
 class FlatAddressSpaceMap {
-private:
-    std::function<void(VaType, VaType)>
-        unmapCallback{}; //!< Callback called when the mappings in an region have changed
+public:
+    /// The maximum VA that this AS can technically reach
+    static constexpr VaType VaMaximum{(1ULL << (AddressSpaceBits - 1)) +
+                                      ((1ULL << (AddressSpaceBits - 1)) - 1)};
+
+    explicit FlatAddressSpaceMap(VaType va_limit,
+                                 std::function<void(VaType, VaType)> unmap_callback = {});
+
+    FlatAddressSpaceMap() = default;
+
+    void Map(VaType virt, PaType phys, VaType size, ExtraBlockInfo extra_info = {}) {
+        std::scoped_lock lock(block_mutex);
+        MapLocked(virt, phys, size, extra_info);
+    }
+
+    void Unmap(VaType virt, VaType size) {
+        std::scoped_lock lock(block_mutex);
+        UnmapLocked(virt, size);
+    }
+
+    VaType GetVALimit() const {
+        return va_limit;
+    }
 
 protected:
     /**
@@ -33,68 +53,55 @@ protected:
      * another block with a different phys address is hit
      */
     struct Block {
-        VaType virt{UnmappedVa}; //!< VA of the block
-        PaType phys{UnmappedPa}; //!< PA of the block, will increase 1-1 with VA until a new block
-                                 //!< is encountered
-        [[no_unique_address]] ExtraBlockInfo extraInfo;
+        /// VA of the block
+        VaType virt{UnmappedVa};
+        /// PA of the block, will increase 1-1 with VA until a new block is encountered
+        PaType phys{UnmappedPa};
+        [[no_unique_address]] ExtraBlockInfo extra_info;
 
         Block() = default;
 
-        Block(VaType virt_, PaType phys_, ExtraBlockInfo extraInfo_)
-            : virt(virt_), phys(phys_), extraInfo(extraInfo_) {}
+        Block(VaType virt_, PaType phys_, ExtraBlockInfo extra_info_)
+            : virt(virt_), phys(phys_), extra_info(extra_info_) {}
 
-        constexpr bool Valid() {
+        bool Valid() const {
             return virt != UnmappedVa;
         }
 
-        constexpr bool Mapped() {
+        bool Mapped() const {
             return phys != UnmappedPa;
         }
 
-        constexpr bool Unmapped() {
+        bool Unmapped() const {
             return phys == UnmappedPa;
         }
 
-        bool operator<(const VaType& pVirt) const {
-            return virt < pVirt;
+        bool operator<(const VaType& p_virt) const {
+            return virt < p_virt;
         }
     };
 
-    std::mutex blockMutex;
-    std::vector<Block> blocks{Block{}};
-
     /**
      * @brief Maps a PA range into the given AS region
-     * @note blockMutex MUST be locked when calling this
+     * @note block_mutex MUST be locked when calling this
      */
-    void MapLocked(VaType virt, PaType phys, VaType size, ExtraBlockInfo extraInfo);
+    void MapLocked(VaType virt, PaType phys, VaType size, ExtraBlockInfo extra_info);
 
     /**
      * @brief Unmaps the given range and merges it with other unmapped regions
-     * @note blockMutex MUST be locked when calling this
+     * @note block_mutex MUST be locked when calling this
      */
     void UnmapLocked(VaType virt, VaType size);
 
-public:
-    static constexpr VaType VaMaximum{(1ULL << (AddressSpaceBits - 1)) +
-                                      ((1ULL << (AddressSpaceBits - 1)) -
-                                       1)}; //!< The maximum VA that this AS can technically reach
-
-    VaType vaLimit{VaMaximum}; //!< A soft limit on the maximum VA of the AS
-
-    FlatAddressSpaceMap(VaType vaLimit, std::function<void(VaType, VaType)> unmapCallback = {});
-
-    FlatAddressSpaceMap() = default;
+    std::mutex block_mutex;
+    std::vector<Block> blocks{Block{}};
 
-    void Map(VaType virt, PaType phys, VaType size, ExtraBlockInfo extraInfo = {}) {
-        std::scoped_lock lock(blockMutex);
-        MapLocked(virt, phys, size, extraInfo);
-    }
+    /// a soft limit on the maximum VA of the AS
+    VaType va_limit{VaMaximum};
 
-    void Unmap(VaType virt, VaType size) {
-        std::scoped_lock lock(blockMutex);
-        UnmapLocked(virt, size);
-    }
+private:
+    /// Callback called when the mappings in an region have changed
+    std::function<void(VaType, VaType)> unmap_callback{};
 };
 
 /**
@@ -108,14 +115,8 @@ class FlatAllocator
 private:
     using Base = FlatAddressSpaceMap<VaType, UnmappedVa, bool, false, false, AddressSpaceBits>;
 
-    VaType currentLinearAllocEnd; //!< The end address for the initial linear allocation pass, once
-                                  //!< this reaches the AS limit the slower allocation path will be
-                                  //!< used
-
 public:
-    VaType vaStart; //!< The base VA of the allocator, no allocations will be below this
-
-    FlatAllocator(VaType vaStart, VaType vaLimit = Base::VaMaximum);
+    explicit FlatAllocator(VaType va_start, VaType va_limit = Base::VaMaximum);
 
     /**
      * @brief Allocates a region in the AS of the given size and returns its address
@@ -131,5 +132,19 @@ public:
      * @brief Frees an AS region so it can be used again
      */
     void Free(VaType virt, VaType size);
+
+    VaType GetVAStart() const {
+        return va_start;
+    }
+
+private:
+    /// The base VA of the allocator, no allocations will be below this
+    VaType va_start;
+
+    /**
+     * The end address for the initial linear allocation pass
+     * Once this reaches the AS limit the slower allocation path will be used
+     */
+    VaType current_linear_alloc_end;
 };
 } // namespace Common
diff --git a/src/common/address_space.inc b/src/common/address_space.inc
index a063782b3..3661b298e 100644
--- a/src/common/address_space.inc
+++ b/src/common/address_space.inc
@@ -30,137 +30,151 @@
         FlatAllocator<VaType, UnmappedVa, AddressSpaceBits>
 
 namespace Common {
-MAP_MEMBER_CONST()::FlatAddressSpaceMap(VaType vaLimit_,
-                                        std::function<void(VaType, VaType)> unmapCallback_)
-    : unmapCallback(std::move(unmapCallback_)), vaLimit(vaLimit_) {
-    if (vaLimit > VaMaximum)
+MAP_MEMBER_CONST()::FlatAddressSpaceMap(VaType va_limit_,
+                                        std::function<void(VaType, VaType)> unmap_callback_)
+    : va_limit{va_limit_}, unmap_callback{std::move(unmap_callback_)} {
+    if (va_limit > VaMaximum) {
         UNREACHABLE_MSG("Invalid VA limit!");
+    }
 }
 
-MAP_MEMBER(void)::MapLocked(VaType virt, PaType phys, VaType size, ExtraBlockInfo extraInfo) {
-    VaType virtEnd{virt + size};
+MAP_MEMBER(void)::MapLocked(VaType virt, PaType phys, VaType size, ExtraBlockInfo extra_info) {
+    VaType virt_end{virt + size};
 
-    if (virtEnd > vaLimit)
-        UNREACHABLE_MSG("Trying to map a block past the VA limit: virtEnd: 0x{:X}, vaLimit: 0x{:X}",
-                        virtEnd, vaLimit);
+    if (virt_end > va_limit) {
+        UNREACHABLE_MSG(
+            "Trying to map a block past the VA limit: virt_end: 0x{:X}, va_limit: 0x{:X}", virt_end,
+            va_limit);
+    }
 
-    auto blockEndSuccessor{std::lower_bound(blocks.begin(), blocks.end(), virtEnd)};
-    if (blockEndSuccessor == blocks.begin())
-        UNREACHABLE_MSG("Trying to map a block before the VA start: virtEnd: 0x{:X}", virtEnd);
+    auto block_end_successor{std::lower_bound(blocks.begin(), blocks.end(), virt_end)};
+    if (block_end_successor == blocks.begin()) {
+        UNREACHABLE_MSG("Trying to map a block before the VA start: virt_end: 0x{:X}", virt_end);
+    }
 
-    auto blockEndPredecessor{std::prev(blockEndSuccessor)};
+    auto block_end_predecessor{std::prev(block_end_successor)};
 
-    if (blockEndSuccessor != blocks.end()) {
+    if (block_end_successor != blocks.end()) {
         // We have blocks in front of us, if one is directly in front then we don't have to add a
         // tail
-        if (blockEndSuccessor->virt != virtEnd) {
+        if (block_end_successor->virt != virt_end) {
             PaType tailPhys{[&]() -> PaType {
                 if constexpr (!PaContigSplit) {
-                    return blockEndPredecessor
-                        ->phys; // Always propagate unmapped regions rather than calculating offset
+                    // Always propagate unmapped regions rather than calculating offset
+                    return block_end_predecessor->phys;
                 } else {
-                    if (blockEndPredecessor->Unmapped())
-                        return blockEndPredecessor->phys; // Always propagate unmapped regions
-                                                          // rather than calculating offset
-                    else
-                        return blockEndPredecessor->phys + virtEnd - blockEndPredecessor->virt;
+                    if (block_end_predecessor->Unmapped()) {
+                        // Always propagate unmapped regions rather than calculating offset
+                        return block_end_predecessor->phys;
+                    } else {
+                        return block_end_predecessor->phys + virt_end - block_end_predecessor->virt;
+                    }
                 }
             }()};
 
-            if (blockEndPredecessor->virt >= virt) {
+            if (block_end_predecessor->virt >= virt) {
                 // If this block's start would be overlapped by the map then reuse it as a tail
                 // block
-                blockEndPredecessor->virt = virtEnd;
-                blockEndPredecessor->phys = tailPhys;
-                blockEndPredecessor->extraInfo = blockEndPredecessor->extraInfo;
+                block_end_predecessor->virt = virt_end;
+                block_end_predecessor->phys = tailPhys;
+                block_end_predecessor->extra_info = block_end_predecessor->extra_info;
 
                 // No longer predecessor anymore
-                blockEndSuccessor = blockEndPredecessor--;
+                block_end_successor = block_end_predecessor--;
             } else {
                 // Else insert a new one and we're done
-                blocks.insert(blockEndSuccessor,
-                              {Block(virt, phys, extraInfo),
-                               Block(virtEnd, tailPhys, blockEndPredecessor->extraInfo)});
-                if (unmapCallback)
-                    unmapCallback(virt, size);
+                blocks.insert(block_end_successor,
+                              {Block(virt, phys, extra_info),
+                               Block(virt_end, tailPhys, block_end_predecessor->extra_info)});
+                if (unmap_callback) {
+                    unmap_callback(virt, size);
+                }
 
                 return;
             }
         }
     } else {
-        // blockEndPredecessor will always be unmapped as blocks has to be terminated by an unmapped
-        // chunk
-        if (blockEndPredecessor != blocks.begin() && blockEndPredecessor->virt >= virt) {
+        // block_end_predecessor will always be unmapped as blocks has to be terminated by an
+        // unmapped chunk
+        if (block_end_predecessor != blocks.begin() && block_end_predecessor->virt >= virt) {
             // Move the unmapped block start backwards
-            blockEndPredecessor->virt = virtEnd;
+            block_end_predecessor->virt = virt_end;
 
             // No longer predecessor anymore
-            blockEndSuccessor = blockEndPredecessor--;
+            block_end_successor = block_end_predecessor--;
         } else {
             // Else insert a new one and we're done
-            blocks.insert(blockEndSuccessor,
-                          {Block(virt, phys, extraInfo), Block(virtEnd, UnmappedPa, {})});
-            if (unmapCallback)
-                unmapCallback(virt, size);
+            blocks.insert(block_end_successor,
+                          {Block(virt, phys, extra_info), Block(virt_end, UnmappedPa, {})});
+            if (unmap_callback) {
+                unmap_callback(virt, size);
+            }
 
             return;
         }
     }
 
-    auto blockStartSuccessor{blockEndSuccessor};
+    auto block_start_successor{block_end_successor};
 
     // Walk the block vector to find the start successor as this is more efficient than another
     // binary search in most scenarios
-    while (std::prev(blockStartSuccessor)->virt >= virt)
-        blockStartSuccessor--;
+    while (std::prev(block_start_successor)->virt >= virt) {
+        block_start_successor--;
+    }
 
     // Check that the start successor is either the end block or something in between
-    if (blockStartSuccessor->virt > virtEnd) {
-        UNREACHABLE_MSG("Unsorted block in AS map: virt: 0x{:X}", blockStartSuccessor->virt);
-    } else if (blockStartSuccessor->virt == virtEnd) {
+    if (block_start_successor->virt > virt_end) {
+        UNREACHABLE_MSG("Unsorted block in AS map: virt: 0x{:X}", block_start_successor->virt);
+    } else if (block_start_successor->virt == virt_end) {
         // We need to create a new block as there are none spare that we would overwrite
-        blocks.insert(blockStartSuccessor, Block(virt, phys, extraInfo));
+        blocks.insert(block_start_successor, Block(virt, phys, extra_info));
     } else {
         // Erase overwritten blocks
-        if (auto eraseStart{std::next(blockStartSuccessor)}; eraseStart != blockEndSuccessor)
-            blocks.erase(eraseStart, blockEndSuccessor);
+        if (auto eraseStart{std::next(block_start_successor)}; eraseStart != block_end_successor) {
+            blocks.erase(eraseStart, block_end_successor);
+        }
 
         // Reuse a block that would otherwise be overwritten as a start block
-        blockStartSuccessor->virt = virt;
-        blockStartSuccessor->phys = phys;
-        blockStartSuccessor->extraInfo = extraInfo;
+        block_start_successor->virt = virt;
+        block_start_successor->phys = phys;
+        block_start_successor->extra_info = extra_info;
     }
 
-    if (unmapCallback)
-        unmapCallback(virt, size);
+    if (unmap_callback) {
+        unmap_callback(virt, size);
+    }
 }
 
 MAP_MEMBER(void)::UnmapLocked(VaType virt, VaType size) {
-    VaType virtEnd{virt + size};
+    VaType virt_end{virt + size};
 
-    if (virtEnd > vaLimit)
-        UNREACHABLE_MSG("Trying to map a block past the VA limit: virtEnd: 0x{:X}, vaLimit: 0x{:X}",
-                        virtEnd, vaLimit);
+    if (virt_end > va_limit) {
+        UNREACHABLE_MSG(
+            "Trying to map a block past the VA limit: virt_end: 0x{:X}, va_limit: 0x{:X}", virt_end,
+            va_limit);
+    }
 
-    auto blockEndSuccessor{std::lower_bound(blocks.begin(), blocks.end(), virtEnd)};
-    if (blockEndSuccessor == blocks.begin())
-        UNREACHABLE_MSG("Trying to unmap a block before the VA start: virtEnd: 0x{:X}", virtEnd);
+    auto block_end_successor{std::lower_bound(blocks.begin(), blocks.end(), virt_end)};
+    if (block_end_successor == blocks.begin()) {
+        UNREACHABLE_MSG("Trying to unmap a block before the VA start: virt_end: 0x{:X}", virt_end);
+    }
 
-    auto blockEndPredecessor{std::prev(blockEndSuccessor)};
+    auto block_end_predecessor{std::prev(block_end_successor)};
 
-    auto walkBackToPredecessor{[&](auto iter) {
-        while (iter->virt >= virt)
+    auto walk_back_to_predecessor{[&](auto iter) {
+        while (iter->virt >= virt) {
             iter--;
+        }
 
         return iter;
     }};
 
-    auto eraseBlocksWithEndUnmapped{[&](auto unmappedEnd) {
-        auto blockStartPredecessor{walkBackToPredecessor(unmappedEnd)};
-        auto blockStartSuccessor{std::next(blockStartPredecessor)};
+    auto erase_blocks_with_end_unmapped{[&](auto unmappedEnd) {
+        auto block_start_predecessor{walk_back_to_predecessor(unmappedEnd)};
+        auto block_start_successor{std::next(block_start_predecessor)};
 
         auto eraseEnd{[&]() {
-            if (blockStartPredecessor->Unmapped()) {
+            if (block_start_predecessor->Unmapped()) {
                 // If the start predecessor is unmapped then we can erase everything in our region
                 // and be done
                 return std::next(unmappedEnd);
@@ -174,158 +188,171 @@ MAP_MEMBER(void)::UnmapLocked(VaType virt, VaType size) {
 
         // We can't have two unmapped regions after each other
         if (eraseEnd != blocks.end() &&
-            (eraseEnd == blockStartSuccessor ||
-             (blockStartPredecessor->Unmapped() && eraseEnd->Unmapped())))
+            (eraseEnd == block_start_successor ||
+             (block_start_predecessor->Unmapped() && eraseEnd->Unmapped()))) {
             UNREACHABLE_MSG("Multiple contiguous unmapped regions are unsupported!");
+        }
 
-        blocks.erase(blockStartSuccessor, eraseEnd);
+        blocks.erase(block_start_successor, eraseEnd);
     }};
 
     // We can avoid any splitting logic if these are the case
-    if (blockEndPredecessor->Unmapped()) {
-        if (blockEndPredecessor->virt > virt)
-            eraseBlocksWithEndUnmapped(blockEndPredecessor);
+    if (block_end_predecessor->Unmapped()) {
+        if (block_end_predecessor->virt > virt) {
+            erase_blocks_with_end_unmapped(block_end_predecessor);
+        }
 
-        if (unmapCallback)
-            unmapCallback(virt, size);
+        if (unmap_callback) {
+            unmap_callback(virt, size);
+        }
 
         return; // The region is unmapped, bail out early
-    } else if (blockEndSuccessor->virt == virtEnd && blockEndSuccessor->Unmapped()) {
-        eraseBlocksWithEndUnmapped(blockEndSuccessor);
+    } else if (block_end_successor->virt == virt_end && block_end_successor->Unmapped()) {
+        erase_blocks_with_end_unmapped(block_end_successor);
 
-        if (unmapCallback)
-            unmapCallback(virt, size);
+        if (unmap_callback) {
+            unmap_callback(virt, size);
+        }
 
         return; // The region is unmapped here and doesn't need splitting, bail out early
-    } else if (blockEndSuccessor == blocks.end()) {
+    } else if (block_end_successor == blocks.end()) {
         // This should never happen as the end should always follow an unmapped block
         UNREACHABLE_MSG("Unexpected Memory Manager state!");
-    } else if (blockEndSuccessor->virt != virtEnd) {
+    } else if (block_end_successor->virt != virt_end) {
         // If one block is directly in front then we don't have to add a tail
 
         // The previous block is mapped so we will need to add a tail with an offset
         PaType tailPhys{[&]() {
-            if constexpr (PaContigSplit)
-                return blockEndPredecessor->phys + virtEnd - blockEndPredecessor->virt;
-            else
-                return blockEndPredecessor->phys;
+            if constexpr (PaContigSplit) {
+                return block_end_predecessor->phys + virt_end - block_end_predecessor->virt;
+            } else {
+                return block_end_predecessor->phys;
+            }
         }()};
 
-        if (blockEndPredecessor->virt >= virt) {
+        if (block_end_predecessor->virt >= virt) {
             // If this block's start would be overlapped by the unmap then reuse it as a tail block
-            blockEndPredecessor->virt = virtEnd;
-            blockEndPredecessor->phys = tailPhys;
+            block_end_predecessor->virt = virt_end;
+            block_end_predecessor->phys = tailPhys;
 
             // No longer predecessor anymore
-            blockEndSuccessor = blockEndPredecessor--;
+            block_end_successor = block_end_predecessor--;
         } else {
-            blocks.insert(blockEndSuccessor,
+            blocks.insert(block_end_successor,
                           {Block(virt, UnmappedPa, {}),
-                           Block(virtEnd, tailPhys, blockEndPredecessor->extraInfo)});
-            if (unmapCallback)
-                unmapCallback(virt, size);
+                           Block(virt_end, tailPhys, block_end_predecessor->extra_info)});
+            if (unmap_callback) {
+                unmap_callback(virt, size);
+            }
 
-            return; // The previous block is mapped and ends before
+            // The previous block is mapped and ends before
+            return;
         }
     }
 
     // Walk the block vector to find the start predecessor as this is more efficient than another
     // binary search in most scenarios
-    auto blockStartPredecessor{walkBackToPredecessor(blockEndSuccessor)};
-    auto blockStartSuccessor{std::next(blockStartPredecessor)};
+    auto block_start_predecessor{walk_back_to_predecessor(block_end_successor)};
+    auto block_start_successor{std::next(block_start_predecessor)};
 
-    if (blockStartSuccessor->virt > virtEnd) {
-        UNREACHABLE_MSG("Unsorted block in AS map: virt: 0x{:X}", blockStartSuccessor->virt);
-    } else if (blockStartSuccessor->virt == virtEnd) {
+    if (block_start_successor->virt > virt_end) {
+        UNREACHABLE_MSG("Unsorted block in AS map: virt: 0x{:X}", block_start_successor->virt);
+    } else if (block_start_successor->virt == virt_end) {
         // There are no blocks between the start and the end that would let us skip inserting a new
         // one for head
 
         // The previous block is may be unmapped, if so we don't need to insert any unmaps after it
-        if (blockStartPredecessor->Mapped())
-            blocks.insert(blockStartSuccessor, Block(virt, UnmappedPa, {}));
-    } else if (blockStartPredecessor->Unmapped()) {
+        if (block_start_predecessor->Mapped()) {
+            blocks.insert(block_start_successor, Block(virt, UnmappedPa, {}));
+        }
+    } else if (block_start_predecessor->Unmapped()) {
         // If the previous block is unmapped
-        blocks.erase(blockStartSuccessor, blockEndPredecessor);
+        blocks.erase(block_start_successor, block_end_predecessor);
     } else {
         // Erase overwritten blocks, skipping the first one as we have written the unmapped start
         // block there
-        if (auto eraseStart{std::next(blockStartSuccessor)}; eraseStart != blockEndSuccessor)
-            blocks.erase(eraseStart, blockEndSuccessor);
+        if (auto eraseStart{std::next(block_start_successor)}; eraseStart != block_end_successor) {
+            blocks.erase(eraseStart, block_end_successor);
+        }
 
         // Add in the unmapped block header
-        blockStartSuccessor->virt = virt;
-        blockStartSuccessor->phys = UnmappedPa;
+        block_start_successor->virt = virt;
+        block_start_successor->phys = UnmappedPa;
     }
 
-    if (unmapCallback)
-        unmapCallback(virt, size);
+    if (unmap_callback)
+        unmap_callback(virt, size);
 }
 
-ALLOC_MEMBER_CONST()::FlatAllocator(VaType vaStart_, VaType vaLimit_)
-    : Base(vaLimit_), currentLinearAllocEnd(vaStart_), vaStart(vaStart_) {}
+ALLOC_MEMBER_CONST()::FlatAllocator(VaType va_start_, VaType va_limit_)
+    : Base{va_limit_}, va_start{va_start_}, current_linear_alloc_end{va_start_} {}
 
 ALLOC_MEMBER(VaType)::Allocate(VaType size) {
-    std::scoped_lock lock(this->blockMutex);
+    std::scoped_lock lock(this->block_mutex);
 
-    VaType allocStart{UnmappedVa};
-    VaType allocEnd{currentLinearAllocEnd + size};
+    VaType alloc_start{UnmappedVa};
+    VaType alloc_end{current_linear_alloc_end + size};
 
     // Avoid searching backwards in the address space if possible
-    if (allocEnd >= currentLinearAllocEnd && allocEnd <= this->vaLimit) {
-        auto allocEndSuccessor{
-            std::lower_bound(this->blocks.begin(), this->blocks.end(), allocEnd)};
-        if (allocEndSuccessor == this->blocks.begin())
+    if (alloc_end >= current_linear_alloc_end && alloc_end <= this->va_limit) {
+        auto alloc_end_successor{
+            std::lower_bound(this->blocks.begin(), this->blocks.end(), alloc_end)};
+        if (alloc_end_successor == this->blocks.begin()) {
             UNREACHABLE_MSG("First block in AS map is invalid!");
+        }
 
-        auto allocEndPredecessor{std::prev(allocEndSuccessor)};
-        if (allocEndPredecessor->virt <= currentLinearAllocEnd) {
-            allocStart = currentLinearAllocEnd;
+        auto alloc_end_predecessor{std::prev(alloc_end_successor)};
+        if (alloc_end_predecessor->virt <= current_linear_alloc_end) {
+            alloc_start = current_linear_alloc_end;
         } else {
             // Skip over fixed any mappings in front of us
-            while (allocEndSuccessor != this->blocks.end()) {
-                if (allocEndSuccessor->virt - allocEndPredecessor->virt < size ||
-                    allocEndPredecessor->Mapped()) {
-                    allocStart = allocEndPredecessor->virt;
+            while (alloc_end_successor != this->blocks.end()) {
+                if (alloc_end_successor->virt - alloc_end_predecessor->virt < size ||
+                    alloc_end_predecessor->Mapped()) {
+                    alloc_start = alloc_end_predecessor->virt;
                     break;
                 }
 
-                allocEndPredecessor = allocEndSuccessor++;
+                alloc_end_predecessor = alloc_end_successor++;
 
                 // Use the VA limit to calculate if we can fit in the final block since it has no
                 // successor
-                if (allocEndSuccessor == this->blocks.end()) {
-                    allocEnd = allocEndPredecessor->virt + size;
+                if (alloc_end_successor == this->blocks.end()) {
+                    alloc_end = alloc_end_predecessor->virt + size;
 
-                    if (allocEnd >= allocEndPredecessor->virt && allocEnd <= this->vaLimit)
-                        allocStart = allocEndPredecessor->virt;
+                    if (alloc_end >= alloc_end_predecessor->virt && alloc_end <= this->va_limit) {
+                        alloc_start = alloc_end_predecessor->virt;
+                    }
                 }
             }
         }
     }
 
-    if (allocStart != UnmappedVa) {
-        currentLinearAllocEnd = allocStart + size;
+    if (alloc_start != UnmappedVa) {
+        current_linear_alloc_end = alloc_start + size;
     } else { // If linear allocation overflows the AS then find a gap
-        if (this->blocks.size() <= 2)
+        if (this->blocks.size() <= 2) {
             UNREACHABLE_MSG("Unexpected allocator state!");
+        }
 
-        auto searchPredecessor{this->blocks.begin()};
-        auto searchSuccessor{std::next(searchPredecessor)};
+        auto search_predecessor{this->blocks.begin()};
+        auto search_successor{std::next(search_predecessor)};
 
-        while (searchSuccessor != this->blocks.end() &&
-               (searchSuccessor->virt - searchPredecessor->virt < size ||
-                searchPredecessor->Mapped())) {
-            searchPredecessor = searchSuccessor++;
+        while (search_successor != this->blocks.end() &&
+               (search_successor->virt - search_predecessor->virt < size ||
+                search_predecessor->Mapped())) {
+            search_predecessor = search_successor++;
         }
 
-        if (searchSuccessor != this->blocks.end())
-            allocStart = searchPredecessor->virt;
-        else
+        if (search_successor != this->blocks.end()) {
+            alloc_start = search_predecessor->virt;
+        } else {
             return {}; // AS is full
+        }
     }
 
-    this->MapLocked(allocStart, true, size, {});
-    return allocStart;
+    this->MapLocked(alloc_start, true, size, {});
+    return alloc_start;
 }
 
 ALLOC_MEMBER(void)::AllocateFixed(VaType virt, VaType size) {
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
index b48f7fcaf..7a95f5305 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -472,16 +472,16 @@ void nvhost_as_gpu::GetVARegionsImpl(IoctlGetVaRegions& params) {
 
     params.regions = std::array<VaRegion, 2>{
         VaRegion{
-            .offset = vm.small_page_allocator->vaStart << VM::PAGE_SIZE_BITS,
+            .offset = vm.small_page_allocator->GetVAStart() << VM::PAGE_SIZE_BITS,
             .page_size = VM::YUZU_PAGESIZE,
             ._pad0_{},
-            .pages = vm.small_page_allocator->vaLimit - vm.small_page_allocator->vaStart,
+            .pages = vm.small_page_allocator->GetVALimit() - vm.small_page_allocator->GetVAStart(),
         },
         VaRegion{
-            .offset = vm.big_page_allocator->vaStart << vm.big_page_size_bits,
+            .offset = vm.big_page_allocator->GetVAStart() << vm.big_page_size_bits,
             .page_size = vm.big_page_size,
             ._pad0_{},
-            .pages = vm.big_page_allocator->vaLimit - vm.big_page_allocator->vaStart,
+            .pages = vm.big_page_allocator->GetVALimit() - vm.big_page_allocator->GetVAStart(),
         },
     };
 }
-- 
cgit v1.2.3


From 11e1cbbdbde8269e7cdb0e150f25639223bdd3e6 Mon Sep 17 00:00:00 2001
From: Morph <39850852+Morph1984@users.noreply.github.com>
Date: Wed, 29 Jun 2022 20:36:39 -0400
Subject: address_space: Rename va_start to virt_start

Avoids conflicting with the va_start macro
---
 src/common/address_space.h   | 6 +++---
 src/common/address_space.inc | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'src/common')

diff --git a/src/common/address_space.h b/src/common/address_space.h
index bf649018c..9222b2fdc 100644
--- a/src/common/address_space.h
+++ b/src/common/address_space.h
@@ -116,7 +116,7 @@ private:
     using Base = FlatAddressSpaceMap<VaType, UnmappedVa, bool, false, false, AddressSpaceBits>;
 
 public:
-    explicit FlatAllocator(VaType va_start, VaType va_limit = Base::VaMaximum);
+    explicit FlatAllocator(VaType virt_start, VaType va_limit = Base::VaMaximum);
 
     /**
      * @brief Allocates a region in the AS of the given size and returns its address
@@ -134,12 +134,12 @@ public:
     void Free(VaType virt, VaType size);
 
     VaType GetVAStart() const {
-        return va_start;
+        return virt_start;
     }
 
 private:
     /// The base VA of the allocator, no allocations will be below this
-    VaType va_start;
+    VaType virt_start;
 
     /**
      * The end address for the initial linear allocation pass
diff --git a/src/common/address_space.inc b/src/common/address_space.inc
index 3661b298e..9f957c81d 100644
--- a/src/common/address_space.inc
+++ b/src/common/address_space.inc
@@ -284,8 +284,8 @@ MAP_MEMBER(void)::UnmapLocked(VaType virt, VaType size) {
         unmap_callback(virt, size);
 }
 
-ALLOC_MEMBER_CONST()::FlatAllocator(VaType va_start_, VaType va_limit_)
-    : Base{va_limit_}, va_start{va_start_}, current_linear_alloc_end{va_start_} {}
+ALLOC_MEMBER_CONST()::FlatAllocator(VaType virt_start_, VaType va_limit_)
+    : Base{va_limit_}, virt_start{virt_start_}, current_linear_alloc_end{virt_start_} {}
 
 ALLOC_MEMBER(VaType)::Allocate(VaType size) {
     std::scoped_lock lock(this->block_mutex);
-- 
cgit v1.2.3


From c80ed6d81fef5858508ac4b841defe8ee3a8663d Mon Sep 17 00:00:00 2001
From: Liam <byteslice@airmail.cc>
Date: Fri, 19 Aug 2022 21:58:25 -0400
Subject: general: rework usages of UNREACHABLE macro

---
 src/common/address_space.inc                       | 31 +++++++++++-----------
 .../hle/service/nvdrv/core/syncpoint_manager.cpp   | 14 +++++-----
 .../hle/service/nvdrv/devices/nvhost_as_gpu.cpp    | 10 +++----
 3 files changed, 28 insertions(+), 27 deletions(-)

(limited to 'src/common')

diff --git a/src/common/address_space.inc b/src/common/address_space.inc
index 9f957c81d..2195dabd5 100644
--- a/src/common/address_space.inc
+++ b/src/common/address_space.inc
@@ -34,7 +34,7 @@ MAP_MEMBER_CONST()::FlatAddressSpaceMap(VaType va_limit_,
                                         std::function<void(VaType, VaType)> unmap_callback_)
     : va_limit{va_limit_}, unmap_callback{std::move(unmap_callback_)} {
     if (va_limit > VaMaximum) {
-        UNREACHABLE_MSG("Invalid VA limit!");
+        ASSERT_MSG(false, "Invalid VA limit!");
     }
 }
 
@@ -42,14 +42,14 @@ MAP_MEMBER(void)::MapLocked(VaType virt, PaType phys, VaType size, ExtraBlockInf
     VaType virt_end{virt + size};
 
     if (virt_end > va_limit) {
-        UNREACHABLE_MSG(
-            "Trying to map a block past the VA limit: virt_end: 0x{:X}, va_limit: 0x{:X}", virt_end,
-            va_limit);
+        ASSERT_MSG(false,
+                   "Trying to map a block past the VA limit: virt_end: 0x{:X}, va_limit: 0x{:X}",
+                   virt_end, va_limit);
     }
 
     auto block_end_successor{std::lower_bound(blocks.begin(), blocks.end(), virt_end)};
     if (block_end_successor == blocks.begin()) {
-        UNREACHABLE_MSG("Trying to map a block before the VA start: virt_end: 0x{:X}", virt_end);
+        ASSERT_MSG(false, "Trying to map a block before the VA start: virt_end: 0x{:X}", virt_end);
     }
 
     auto block_end_predecessor{std::prev(block_end_successor)};
@@ -124,7 +124,7 @@ MAP_MEMBER(void)::MapLocked(VaType virt, PaType phys, VaType size, ExtraBlockInf
 
     // Check that the start successor is either the end block or something in between
     if (block_start_successor->virt > virt_end) {
-        UNREACHABLE_MSG("Unsorted block in AS map: virt: 0x{:X}", block_start_successor->virt);
+        ASSERT_MSG(false, "Unsorted block in AS map: virt: 0x{:X}", block_start_successor->virt);
     } else if (block_start_successor->virt == virt_end) {
         // We need to create a new block as there are none spare that we would overwrite
         blocks.insert(block_start_successor, Block(virt, phys, extra_info));
@@ -149,14 +149,15 @@ MAP_MEMBER(void)::UnmapLocked(VaType virt, VaType size) {
     VaType virt_end{virt + size};
 
     if (virt_end > va_limit) {
-        UNREACHABLE_MSG(
-            "Trying to map a block past the VA limit: virt_end: 0x{:X}, va_limit: 0x{:X}", virt_end,
-            va_limit);
+        ASSERT_MSG(false,
+                   "Trying to map a block past the VA limit: virt_end: 0x{:X}, va_limit: 0x{:X}",
+                   virt_end, va_limit);
     }
 
     auto block_end_successor{std::lower_bound(blocks.begin(), blocks.end(), virt_end)};
     if (block_end_successor == blocks.begin()) {
-        UNREACHABLE_MSG("Trying to unmap a block before the VA start: virt_end: 0x{:X}", virt_end);
+        ASSERT_MSG(false, "Trying to unmap a block before the VA start: virt_end: 0x{:X}",
+                   virt_end);
     }
 
     auto block_end_predecessor{std::prev(block_end_successor)};
@@ -190,7 +191,7 @@ MAP_MEMBER(void)::UnmapLocked(VaType virt, VaType size) {
         if (eraseEnd != blocks.end() &&
             (eraseEnd == block_start_successor ||
              (block_start_predecessor->Unmapped() && eraseEnd->Unmapped()))) {
-            UNREACHABLE_MSG("Multiple contiguous unmapped regions are unsupported!");
+            ASSERT_MSG(false, "Multiple contiguous unmapped regions are unsupported!");
         }
 
         blocks.erase(block_start_successor, eraseEnd);
@@ -217,7 +218,7 @@ MAP_MEMBER(void)::UnmapLocked(VaType virt, VaType size) {
         return; // The region is unmapped here and doesn't need splitting, bail out early
     } else if (block_end_successor == blocks.end()) {
         // This should never happen as the end should always follow an unmapped block
-        UNREACHABLE_MSG("Unexpected Memory Manager state!");
+        ASSERT_MSG(false, "Unexpected Memory Manager state!");
     } else if (block_end_successor->virt != virt_end) {
         // If one block is directly in front then we don't have to add a tail
 
@@ -256,7 +257,7 @@ MAP_MEMBER(void)::UnmapLocked(VaType virt, VaType size) {
     auto block_start_successor{std::next(block_start_predecessor)};
 
     if (block_start_successor->virt > virt_end) {
-        UNREACHABLE_MSG("Unsorted block in AS map: virt: 0x{:X}", block_start_successor->virt);
+        ASSERT_MSG(false, "Unsorted block in AS map: virt: 0x{:X}", block_start_successor->virt);
     } else if (block_start_successor->virt == virt_end) {
         // There are no blocks between the start and the end that would let us skip inserting a new
         // one for head
@@ -298,7 +299,7 @@ ALLOC_MEMBER(VaType)::Allocate(VaType size) {
         auto alloc_end_successor{
             std::lower_bound(this->blocks.begin(), this->blocks.end(), alloc_end)};
         if (alloc_end_successor == this->blocks.begin()) {
-            UNREACHABLE_MSG("First block in AS map is invalid!");
+            ASSERT_MSG(false, "First block in AS map is invalid!");
         }
 
         auto alloc_end_predecessor{std::prev(alloc_end_successor)};
@@ -332,7 +333,7 @@ ALLOC_MEMBER(VaType)::Allocate(VaType size) {
         current_linear_alloc_end = alloc_start + size;
     } else { // If linear allocation overflows the AS then find a gap
         if (this->blocks.size() <= 2) {
-            UNREACHABLE_MSG("Unexpected allocator state!");
+            ASSERT_MSG(false, "Unexpected allocator state!");
         }
 
         auto search_predecessor{this->blocks.begin()};
diff --git a/src/core/hle/service/nvdrv/core/syncpoint_manager.cpp b/src/core/hle/service/nvdrv/core/syncpoint_manager.cpp
index 0bb2aec97..072b3a22f 100644
--- a/src/core/hle/service/nvdrv/core/syncpoint_manager.cpp
+++ b/src/core/hle/service/nvdrv/core/syncpoint_manager.cpp
@@ -29,7 +29,7 @@ SyncpointManager::~SyncpointManager() = default;
 
 u32 SyncpointManager::ReserveSyncpoint(u32 id, bool clientManaged) {
     if (syncpoints.at(id).reserved) {
-        UNREACHABLE_MSG("Requested syncpoint is in use");
+        ASSERT_MSG(false, "Requested syncpoint is in use");
         return 0;
     }
 
@@ -45,7 +45,7 @@ u32 SyncpointManager::FindFreeSyncpoint() {
             return i;
         }
     }
-    UNREACHABLE_MSG("Failed to find a free syncpoint!");
+    ASSERT_MSG(false, "Failed to find a free syncpoint!");
     return 0;
 }
 
@@ -68,7 +68,7 @@ bool SyncpointManager::HasSyncpointExpired(u32 id, u32 threshold) {
     const SyncpointInfo& syncpoint{syncpoints.at(id)};
 
     if (!syncpoint.reserved) {
-        UNREACHABLE();
+        ASSERT(false);
         return 0;
     }
 
@@ -83,7 +83,7 @@ bool SyncpointManager::HasSyncpointExpired(u32 id, u32 threshold) {
 
 u32 SyncpointManager::IncrementSyncpointMaxExt(u32 id, u32 amount) {
     if (!syncpoints.at(id).reserved) {
-        UNREACHABLE();
+        ASSERT(false);
         return 0;
     }
 
@@ -92,7 +92,7 @@ u32 SyncpointManager::IncrementSyncpointMaxExt(u32 id, u32 amount) {
 
 u32 SyncpointManager::ReadSyncpointMinValue(u32 id) {
     if (!syncpoints.at(id).reserved) {
-        UNREACHABLE();
+        ASSERT(false);
         return 0;
     }
 
@@ -101,7 +101,7 @@ u32 SyncpointManager::ReadSyncpointMinValue(u32 id) {
 
 u32 SyncpointManager::UpdateMin(u32 id) {
     if (!syncpoints.at(id).reserved) {
-        UNREACHABLE();
+        ASSERT(false);
         return 0;
     }
 
@@ -111,7 +111,7 @@ u32 SyncpointManager::UpdateMin(u32 id) {
 
 NvFence SyncpointManager::GetSyncpointFence(u32 id) {
     if (!syncpoints.at(id).reserved) {
-        UNREACHABLE();
+        ASSERT(false);
         return NvFence{};
     }
 
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
index 7a95f5305..192503ffc 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -96,7 +96,7 @@ NvResult nvhost_as_gpu::AllocAsEx(const std::vector<u8>& input, std::vector<u8>&
     std::scoped_lock lock(mutex);
 
     if (vm.initialised) {
-        UNREACHABLE_MSG("Cannot initialise an address space twice!");
+        ASSERT_MSG(false, "Cannot initialise an address space twice!");
         return NvResult::InvalidState;
     }
 
@@ -174,7 +174,7 @@ NvResult nvhost_as_gpu::AllocateSpace(const std::vector<u8>& input, std::vector<
     } else {
         params.offset = static_cast<u64>(allocator.Allocate(params.pages)) << page_size_bits;
         if (!params.offset) {
-            UNREACHABLE_MSG("Failed to allocate free space in the GPU AS!");
+            ASSERT_MSG(false, "Failed to allocate free space in the GPU AS!");
             return NvResult::InsufficientMemory;
         }
     }
@@ -372,7 +372,7 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8
         else if (Common::IsAligned(handle->align, VM::YUZU_PAGESIZE))
             return false;
         else {
-            UNREACHABLE();
+            ASSERT(false);
             return false;
         }
     }()};
@@ -382,7 +382,7 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8
 
         if (alloc-- == allocation_map.begin() ||
             (params.offset - alloc->first) + size > alloc->second.size) {
-            UNREACHABLE_MSG("Cannot perform a fixed mapping into an unallocated region!");
+            ASSERT_MSG(false, "Cannot perform a fixed mapping into an unallocated region!");
             return NvResult::BadValue;
         }
 
@@ -403,7 +403,7 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8
                             static_cast<u32>(Common::AlignUp(size, page_size) >> page_size_bits)))
                         << page_size_bits;
         if (!params.offset) {
-            UNREACHABLE_MSG("Failed to allocate free space in the GPU AS!");
+            ASSERT_MSG(false, "Failed to allocate free space in the GPU AS!");
             return NvResult::InsufficientMemory;
         }
 
-- 
cgit v1.2.3


From ca3db0d7c94a20668781830ff852dbf512598efb Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Thu, 1 Sep 2022 05:45:22 +0200
Subject: General: address feedback

---
 src/common/multi_level_page_table.h                |  8 +--
 src/core/hle/service/nvdrv/core/container.cpp      | 12 ++++-
 src/core/hle/service/nvdrv/core/container.h        | 23 ++++++---
 src/core/hle/service/nvdrv/core/nvmap.cpp          |  4 +-
 src/core/hle/service/nvdrv/core/nvmap.h            | 59 +++++++++++-----------
 .../hle/service/nvdrv/core/syncpoint_manager.cpp   | 32 ++++++------
 .../hle/service/nvdrv/core/syncpoint_manager.h     | 34 ++++++-------
 .../hle/service/nvdrv/devices/nvhost_as_gpu.cpp    | 28 +++++-----
 src/core/hle/service/nvdrv/devices/nvhost_ctrl.h   |  2 +-
 src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp  |  2 +-
 .../hle/service/nvdrv/devices/nvhost_nvdec.cpp     | 13 ++---
 src/core/hle/service/nvdrv/devices/nvhost_nvdec.h  |  3 --
 .../service/nvdrv/devices/nvhost_nvdec_common.cpp  | 13 ++---
 .../service/nvdrv/devices/nvhost_nvdec_common.h    |  5 --
 src/core/hle/service/nvdrv/devices/nvhost_vic.cpp  | 16 +++---
 src/core/hle/service/nvdrv/devices/nvhost_vic.h    |  3 --
 src/core/hle/service/nvdrv/devices/nvmap.h         |  4 +-
 src/core/hle/service/nvdrv/nvdrv.cpp               |  4 +-
 src/core/hle/service/nvdrv/nvdrv.h                 |  2 +-
 src/video_core/control/channel_state.cpp           |  7 +--
 src/video_core/control/channel_state.h             |  4 +-
 src/video_core/control/channel_state_cache.h       |  2 +-
 src/video_core/control/scheduler.cpp               |  6 ++-
 src/video_core/control/scheduler.h                 |  2 +-
 src/video_core/engines/maxwell_dma.h               |  2 +-
 src/video_core/gpu.cpp                             |  2 +-
 src/video_core/host1x/host1x.h                     |  2 +-
 src/video_core/host1x/syncpoint_manager.cpp        | 12 ++---
 src/video_core/host1x/syncpoint_manager.h          | 24 ++++-----
 src/video_core/memory_manager.h                    |  2 +-
 30 files changed, 167 insertions(+), 165 deletions(-)

(limited to 'src/common')

diff --git a/src/common/multi_level_page_table.h b/src/common/multi_level_page_table.h
index 08092c89a..31f6676a0 100644
--- a/src/common/multi_level_page_table.h
+++ b/src/common/multi_level_page_table.h
@@ -46,19 +46,19 @@ public:
 
     void ReserveRange(u64 start, std::size_t size);
 
-    [[nodiscard]] constexpr const BaseAddr& operator[](std::size_t index) const {
+    [[nodiscard]] const BaseAddr& operator[](std::size_t index) const {
         return base_ptr[index];
     }
 
-    [[nodiscard]] constexpr BaseAddr& operator[](std::size_t index) {
+    [[nodiscard]] BaseAddr& operator[](std::size_t index) {
         return base_ptr[index];
     }
 
-    [[nodiscard]] constexpr BaseAddr* data() {
+    [[nodiscard]] BaseAddr* data() {
         return base_ptr;
     }
 
-    [[nodiscard]] constexpr const BaseAddr* data() const {
+    [[nodiscard]] const BaseAddr* data() const {
         return base_ptr;
     }
 
diff --git a/src/core/hle/service/nvdrv/core/container.cpp b/src/core/hle/service/nvdrv/core/container.cpp
index d2a632646..37ca24f5d 100644
--- a/src/core/hle/service/nvdrv/core/container.cpp
+++ b/src/core/hle/service/nvdrv/core/container.cpp
@@ -10,9 +10,11 @@
 namespace Service::Nvidia::NvCore {
 
 struct ContainerImpl {
-    ContainerImpl(Tegra::Host1x::Host1x& host1x_) : file{host1x_}, manager{host1x_} {}
+    explicit ContainerImpl(Tegra::Host1x::Host1x& host1x_)
+        : file{host1x_}, manager{host1x_}, device_file_data{} {}
     NvMap file;
     SyncpointManager manager;
+    Container::Host1xDeviceFileData device_file_data;
 };
 
 Container::Container(Tegra::Host1x::Host1x& host1x_) {
@@ -29,6 +31,14 @@ const NvMap& Container::GetNvMapFile() const {
     return impl->file;
 }
 
+Container::Host1xDeviceFileData& Container::Host1xDeviceFile() {
+    return impl->device_file_data;
+}
+
+const Container::Host1xDeviceFileData& Container::Host1xDeviceFile() const {
+    return impl->device_file_data;
+}
+
 SyncpointManager& Container::GetSyncpointManager() {
     return impl->manager;
 }
diff --git a/src/core/hle/service/nvdrv/core/container.h b/src/core/hle/service/nvdrv/core/container.h
index 5c8b95803..b4b63ac90 100644
--- a/src/core/hle/service/nvdrv/core/container.h
+++ b/src/core/hle/service/nvdrv/core/container.h
@@ -4,15 +4,15 @@
 
 #pragma once
 
+#include <deque>
 #include <memory>
+#include <unordered_map>
 
-namespace Tegra {
+#include "core/hle/service/nvdrv/nvdata.h"
 
-namespace Host1x {
+namespace Tegra::Host1x {
 class Host1x;
-} // namespace Host1x
-
-} // namespace Tegra
+} // namespace Tegra::Host1x
 
 namespace Service::Nvidia::NvCore {
 
@@ -23,7 +23,7 @@ struct ContainerImpl;
 
 class Container {
 public:
-    Container(Tegra::Host1x::Host1x& host1x);
+    explicit Container(Tegra::Host1x::Host1x& host1x);
     ~Container();
 
     NvMap& GetNvMapFile();
@@ -34,6 +34,17 @@ public:
 
     const SyncpointManager& GetSyncpointManager() const;
 
+    struct Host1xDeviceFileData {
+        std::unordered_map<DeviceFD, u32> fd_to_id{};
+        std::deque<u32> syncpts_accumulated{};
+        u32 nvdec_next_id{};
+        u32 vic_next_id{};
+    };
+
+    Host1xDeviceFileData& Host1xDeviceFile();
+
+    const Host1xDeviceFileData& Host1xDeviceFile() const;
+
 private:
     std::unique_ptr<ContainerImpl> impl;
 };
diff --git a/src/core/hle/service/nvdrv/core/nvmap.cpp b/src/core/hle/service/nvdrv/core/nvmap.cpp
index e63ec7717..fbd8a74a5 100644
--- a/src/core/hle/service/nvdrv/core/nvmap.cpp
+++ b/src/core/hle/service/nvdrv/core/nvmap.cpp
@@ -119,7 +119,7 @@ std::shared_ptr<NvMap::Handle> NvMap::GetHandle(Handle::Id handle) {
     std::scoped_lock lock(handles_lock);
     try {
         return handles.at(handle);
-    } catch ([[maybe_unused]] std::out_of_range& e) {
+    } catch (std::out_of_range&) {
         return nullptr;
     }
 }
@@ -128,7 +128,7 @@ VAddr NvMap::GetHandleAddress(Handle::Id handle) {
     std::scoped_lock lock(handles_lock);
     try {
         return handles.at(handle)->address;
-    } catch ([[maybe_unused]] std::out_of_range& e) {
+    } catch (std::out_of_range&) {
         return 0;
     }
 }
diff --git a/src/core/hle/service/nvdrv/core/nvmap.h b/src/core/hle/service/nvdrv/core/nvmap.h
index 6d6dac023..b9dd3801f 100644
--- a/src/core/hle/service/nvdrv/core/nvmap.h
+++ b/src/core/hle/service/nvdrv/core/nvmap.h
@@ -98,35 +98,6 @@ public:
         }
     };
 
-private:
-    std::list<std::shared_ptr<Handle>> unmap_queue{};
-    std::mutex unmap_queue_lock{}; //!< Protects access to `unmap_queue`
-
-    std::unordered_map<Handle::Id, std::shared_ptr<Handle>>
-        handles{};           //!< Main owning map of handles
-    std::mutex handles_lock; //!< Protects access to `handles`
-
-    static constexpr u32 HandleIdIncrement{
-        4}; //!< Each new handle ID is an increment of 4 from the previous
-    std::atomic<u32> next_handle_id{HandleIdIncrement};
-    Tegra::Host1x::Host1x& host1x;
-
-    void AddHandle(std::shared_ptr<Handle> handle);
-
-    /**
-     * @brief Unmaps and frees the SMMU memory region a handle is mapped to
-     * @note Both `unmap_queue_lock` and `handle_description.mutex` MUST be locked when calling this
-     */
-    void UnmapHandle(Handle& handle_description);
-
-    /**
-     * @brief Removes a handle from the map taking its dupes into account
-     * @note handle_description.mutex MUST be locked when calling this
-     * @return If the handle was removed from the map
-     */
-    bool TryRemoveHandle(const Handle& handle_description);
-
-public:
     /**
      * @brief Encapsulates the result of a FreeHandle operation
      */
@@ -136,7 +107,7 @@ public:
         bool was_uncached; //!< If the handle was allocated as uncached
     };
 
-    NvMap(Tegra::Host1x::Host1x& host1x);
+    explicit NvMap(Tegra::Host1x::Host1x& host1x);
 
     /**
      * @brief Creates an unallocated handle of the given size
@@ -172,5 +143,33 @@ public:
      * describing the prior state of the handle
      */
     std::optional<FreeInfo> FreeHandle(Handle::Id handle, bool internal_session);
+
+private:
+    std::list<std::shared_ptr<Handle>> unmap_queue{};
+    std::mutex unmap_queue_lock{}; //!< Protects access to `unmap_queue`
+
+    std::unordered_map<Handle::Id, std::shared_ptr<Handle>>
+        handles{};           //!< Main owning map of handles
+    std::mutex handles_lock; //!< Protects access to `handles`
+
+    static constexpr u32 HandleIdIncrement{
+        4}; //!< Each new handle ID is an increment of 4 from the previous
+    std::atomic<u32> next_handle_id{HandleIdIncrement};
+    Tegra::Host1x::Host1x& host1x;
+
+    void AddHandle(std::shared_ptr<Handle> handle);
+
+    /**
+     * @brief Unmaps and frees the SMMU memory region a handle is mapped to
+     * @note Both `unmap_queue_lock` and `handle_description.mutex` MUST be locked when calling this
+     */
+    void UnmapHandle(Handle& handle_description);
+
+    /**
+     * @brief Removes a handle from the map taking its dupes into account
+     * @note handle_description.mutex MUST be locked when calling this
+     * @return If the handle was removed from the map
+     */
+    bool TryRemoveHandle(const Handle& handle_description);
 };
 } // namespace Service::Nvidia::NvCore
diff --git a/src/core/hle/service/nvdrv/core/syncpoint_manager.cpp b/src/core/hle/service/nvdrv/core/syncpoint_manager.cpp
index 072b3a22f..eda2041a0 100644
--- a/src/core/hle/service/nvdrv/core/syncpoint_manager.cpp
+++ b/src/core/hle/service/nvdrv/core/syncpoint_manager.cpp
@@ -18,23 +18,23 @@ SyncpointManager::SyncpointManager(Tegra::Host1x::Host1x& host1x_) : host1x{host
     ReserveSyncpoint(VBlank0SyncpointId, true);
     ReserveSyncpoint(VBlank1SyncpointId, true);
 
-    for (u32 syncpointId : channel_syncpoints) {
-        if (syncpointId) {
-            ReserveSyncpoint(syncpointId, false);
+    for (u32 syncpoint_id : channel_syncpoints) {
+        if (syncpoint_id) {
+            ReserveSyncpoint(syncpoint_id, false);
         }
     }
 }
 
 SyncpointManager::~SyncpointManager() = default;
 
-u32 SyncpointManager::ReserveSyncpoint(u32 id, bool clientManaged) {
+u32 SyncpointManager::ReserveSyncpoint(u32 id, bool client_managed) {
     if (syncpoints.at(id).reserved) {
         ASSERT_MSG(false, "Requested syncpoint is in use");
         return 0;
     }
 
     syncpoints.at(id).reserved = true;
-    syncpoints.at(id).interfaceManaged = clientManaged;
+    syncpoints.at(id).interface_managed = client_managed;
 
     return id;
 }
@@ -49,9 +49,9 @@ u32 SyncpointManager::FindFreeSyncpoint() {
     return 0;
 }
 
-u32 SyncpointManager::AllocateSyncpoint(bool clientManaged) {
+u32 SyncpointManager::AllocateSyncpoint(bool client_managed) {
     std::lock_guard lock(reservation_lock);
-    return ReserveSyncpoint(FindFreeSyncpoint(), clientManaged);
+    return ReserveSyncpoint(FindFreeSyncpoint(), client_managed);
 }
 
 void SyncpointManager::FreeSyncpoint(u32 id) {
@@ -64,7 +64,7 @@ bool SyncpointManager::IsSyncpointAllocated(u32 id) {
     return (id <= SyncpointCount) && syncpoints[id].reserved;
 }
 
-bool SyncpointManager::HasSyncpointExpired(u32 id, u32 threshold) {
+bool SyncpointManager::HasSyncpointExpired(u32 id, u32 threshold) const {
     const SyncpointInfo& syncpoint{syncpoints.at(id)};
 
     if (!syncpoint.reserved) {
@@ -74,10 +74,10 @@ bool SyncpointManager::HasSyncpointExpired(u32 id, u32 threshold) {
 
     // If the interface manages counters then we don't keep track of the maximum value as it handles
     // sanity checking the values then
-    if (syncpoint.interfaceManaged) {
-        return static_cast<s32>(syncpoint.counterMin - threshold) >= 0;
+    if (syncpoint.interface_managed) {
+        return static_cast<s32>(syncpoint.counter_min - threshold) >= 0;
     } else {
-        return (syncpoint.counterMax - threshold) >= (syncpoint.counterMin - threshold);
+        return (syncpoint.counter_max - threshold) >= (syncpoint.counter_min - threshold);
     }
 }
 
@@ -87,7 +87,7 @@ u32 SyncpointManager::IncrementSyncpointMaxExt(u32 id, u32 amount) {
         return 0;
     }
 
-    return syncpoints.at(id).counterMax += amount;
+    return syncpoints.at(id).counter_max += amount;
 }
 
 u32 SyncpointManager::ReadSyncpointMinValue(u32 id) {
@@ -96,7 +96,7 @@ u32 SyncpointManager::ReadSyncpointMinValue(u32 id) {
         return 0;
     }
 
-    return syncpoints.at(id).counterMin;
+    return syncpoints.at(id).counter_min;
 }
 
 u32 SyncpointManager::UpdateMin(u32 id) {
@@ -105,8 +105,8 @@ u32 SyncpointManager::UpdateMin(u32 id) {
         return 0;
     }
 
-    syncpoints.at(id).counterMin = host1x.GetSyncpointManager().GetHostSyncpointValue(id);
-    return syncpoints.at(id).counterMin;
+    syncpoints.at(id).counter_min = host1x.GetSyncpointManager().GetHostSyncpointValue(id);
+    return syncpoints.at(id).counter_min;
 }
 
 NvFence SyncpointManager::GetSyncpointFence(u32 id) {
@@ -115,7 +115,7 @@ NvFence SyncpointManager::GetSyncpointFence(u32 id) {
         return NvFence{};
     }
 
-    return {.id = static_cast<s32>(id), .value = syncpoints.at(id).counterMax};
+    return {.id = static_cast<s32>(id), .value = syncpoints.at(id).counter_max};
 }
 
 } // namespace Service::Nvidia::NvCore
diff --git a/src/core/hle/service/nvdrv/core/syncpoint_manager.h b/src/core/hle/service/nvdrv/core/syncpoint_manager.h
index 6b71cd33d..b76ef9032 100644
--- a/src/core/hle/service/nvdrv/core/syncpoint_manager.h
+++ b/src/core/hle/service/nvdrv/core/syncpoint_manager.h
@@ -11,13 +11,9 @@
 #include "common/common_types.h"
 #include "core/hle/service/nvdrv/nvdata.h"
 
-namespace Tegra {
-
-namespace Host1x {
+namespace Tegra::Host1x {
 class Host1x;
-} // namespace Host1x
-
-} // namespace Tegra
+} // namespace Tegra::Host1x
 
 namespace Service::Nvidia::NvCore {
 
@@ -54,15 +50,15 @@ public:
      * @brief Finds a free syncpoint and reserves it
      * @return The ID of the reserved syncpoint
      */
-    u32 AllocateSyncpoint(bool clientManaged);
+    u32 AllocateSyncpoint(bool client_managed);
 
     /**
      * @url
      * https://github.com/Jetson-TX1-AndroidTV/android_kernel_jetson_tx1_hdmi_primary/blob/8f74a72394efb871cb3f886a3de2998cd7ff2990/drivers/gpu/host1x/syncpt.c#L259
      */
-    bool HasSyncpointExpired(u32 id, u32 threshold);
+    bool HasSyncpointExpired(u32 id, u32 threshold) const;
 
-    bool IsFenceSignalled(NvFence fence) {
+    bool IsFenceSignalled(NvFence fence) const {
         return HasSyncpointExpired(fence.id, fence.value);
     }
 
@@ -107,7 +103,7 @@ private:
     /**
      * @note reservation_lock should be locked when calling this
      */
-    u32 ReserveSyncpoint(u32 id, bool clientManaged);
+    u32 ReserveSyncpoint(u32 id, bool client_managed);
 
     /**
      * @return The ID of the first free syncpoint
@@ -115,15 +111,15 @@ private:
     u32 FindFreeSyncpoint();
 
     struct SyncpointInfo {
-        std::atomic<u32> counterMin; //!< The least value the syncpoint can be (The value it was
-                                     //!< when it was last synchronized with host1x)
-        std::atomic<u32> counterMax; //!< The maximum value the syncpoint can reach according to the
-                                     //!< current usage
-        bool interfaceManaged; //!< If the syncpoint is managed by a host1x client interface, a
-                               //!< client interface is a HW block that can handle host1x
-                               //!< transactions on behalf of a host1x client (Which would otherwise
-                               //!< need to be manually synced using PIO which is synchronous and
-                               //!< requires direct cooperation of the CPU)
+        std::atomic<u32> counter_min; //!< The least value the syncpoint can be (The value it was
+                                      //!< when it was last synchronized with host1x)
+        std::atomic<u32> counter_max; //!< The maximum value the syncpoint can reach according to
+                                      //!< the current usage
+        bool interface_managed; //!< If the syncpoint is managed by a host1x client interface, a
+                                //!< client interface is a HW block that can handle host1x
+                                //!< transactions on behalf of a host1x client (Which would
+                                //!< otherwise need to be manually synced using PIO which is
+                                //!< synchronous and requires direct cooperation of the CPU)
         bool reserved; //!< If the syncpoint is reserved or not, not to be confused with a reserved
                        //!< value
     };
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
index 192503ffc..6411dbf43 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -106,7 +106,7 @@ NvResult nvhost_as_gpu::AllocAsEx(const std::vector<u8>& input, std::vector<u8>&
             return NvResult::BadValue;
         }
 
-        if (!(params.big_page_size & VM::SUPPORTED_BIG_PAGE_SIZES)) {
+        if ((params.big_page_size & VM::SUPPORTED_BIG_PAGE_SIZES) == 0) {
             LOG_ERROR(Service_NVDRV, "Unsupported big page size: 0x{:X}!", params.big_page_size);
             return NvResult::BadValue;
         }
@@ -124,12 +124,13 @@ NvResult nvhost_as_gpu::AllocAsEx(const std::vector<u8>& input, std::vector<u8>&
         vm.va_range_end = params.va_range_end;
     }
 
-    const u64 start_pages{vm.va_range_start >> VM::PAGE_SIZE_BITS};
-    const u64 end_pages{vm.va_range_split >> VM::PAGE_SIZE_BITS};
+    const auto start_pages{static_cast<u32>(vm.va_range_start >> VM::PAGE_SIZE_BITS)};
+    const auto end_pages{static_cast<u32>(vm.va_range_split >> VM::PAGE_SIZE_BITS)};
     vm.small_page_allocator = std::make_shared<VM::Allocator>(start_pages, end_pages);
 
-    const u64 start_big_pages{vm.va_range_split >> vm.big_page_size_bits};
-    const u64 end_big_pages{(vm.va_range_end - vm.va_range_split) >> vm.big_page_size_bits};
+    const auto start_big_pages{static_cast<u32>(vm.va_range_split >> vm.big_page_size_bits)};
+    const auto end_big_pages{
+        static_cast<u32>((vm.va_range_end - vm.va_range_split) >> vm.big_page_size_bits)};
     vm.big_page_allocator = std::make_unique<VM::Allocator>(start_big_pages, end_big_pages);
 
     gmmu = std::make_shared<Tegra::MemoryManager>(system, 40, vm.big_page_size_bits,
@@ -210,10 +211,11 @@ void nvhost_as_gpu::FreeMappingLocked(u64 offset) {
 
     // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state
     // Only FreeSpace can unmap them fully
-    if (mapping->sparse_alloc)
+    if (mapping->sparse_alloc) {
         gmmu->MapSparse(offset, mapping->size, mapping->big_page);
-    else
+    } else {
         gmmu->Unmap(offset, mapping->size);
+    }
 
     mapping_map.erase(offset);
 }
@@ -256,7 +258,7 @@ NvResult nvhost_as_gpu::FreeSpace(const std::vector<u8>& input, std::vector<u8>&
         allocator.Free(static_cast<u32>(params.offset >> page_size_bits),
                        static_cast<u32>(allocation.size >> page_size_bits));
         allocation_map.erase(params.offset);
-    } catch ([[maybe_unused]] const std::out_of_range& e) {
+    } catch (const std::out_of_range&) {
         return NvResult::BadValue;
     }
 
@@ -351,7 +353,7 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8
             gmmu->Map(gpu_address, cpu_address, params.mapping_size, mapping->big_page);
 
             return NvResult::Success;
-        } catch ([[maybe_unused]] const std::out_of_range& e) {
+        } catch (const std::out_of_range&) {
             LOG_WARNING(Service_NVDRV, "Cannot remap an unmapped GPU address space region: 0x{:X}",
                         params.offset);
             return NvResult::BadValue;
@@ -367,11 +369,11 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8
     u64 size{params.mapping_size ? params.mapping_size : handle->orig_size};
 
     bool big_page{[&]() {
-        if (Common::IsAligned(handle->align, vm.big_page_size))
+        if (Common::IsAligned(handle->align, vm.big_page_size)) {
             return true;
-        else if (Common::IsAligned(handle->align, VM::YUZU_PAGESIZE))
+        } else if (Common::IsAligned(handle->align, VM::YUZU_PAGESIZE)) {
             return false;
-        else {
+        } else {
             ASSERT(false);
             return false;
         }
@@ -450,7 +452,7 @@ NvResult nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8
         }
 
         mapping_map.erase(params.offset);
-    } catch ([[maybe_unused]] const std::out_of_range& e) {
+    } catch (const std::out_of_range&) {
         LOG_WARNING(Service_NVDRV, "Couldn't find region to unmap at 0x{:X}", params.offset);
     }
 
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
index 4aa738b41..0b56d7070 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
@@ -79,7 +79,7 @@ private:
         // Used for waiting on a syncpoint & canceling it.
         Tegra::Host1x::SyncpointManager::ActionHandle wait_handle{};
 
-        bool IsBeingUsed() {
+        bool IsBeingUsed() const {
             const auto current_status = status.load(std::memory_order_acquire);
             return current_status == EventState::Waiting ||
                    current_status == EventState::Cancelling ||
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
index 32e45540d..45a759fa8 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -184,7 +184,7 @@ NvResult nvhost_gpu::AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8
                 params.num_entries, params.flags, params.unk0, params.unk1, params.unk2,
                 params.unk3);
 
-    if (channel_state->initiated) {
+    if (channel_state->initialized) {
         LOG_CRITICAL(Service_NVDRV, "Already allocated!");
         return NvResult::AlreadyAllocated;
     }
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
index fed537039..1703f9cc3 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
@@ -5,13 +5,12 @@
 #include "common/assert.h"
 #include "common/logging/log.h"
 #include "core/core.h"
+#include "core/hle/service/nvdrv/core/container.h"
 #include "core/hle/service/nvdrv/devices/nvhost_nvdec.h"
 #include "video_core/renderer_base.h"
 
 namespace Service::Nvidia::Devices {
 
-u32 nvhost_nvdec::next_id{};
-
 nvhost_nvdec::nvhost_nvdec(Core::System& system_, NvCore::Container& core_)
     : nvhost_nvdec_common{system_, core_, NvCore::ChannelType::NvDec} {}
 nvhost_nvdec::~nvhost_nvdec() = default;
@@ -22,8 +21,9 @@ NvResult nvhost_nvdec::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>&
     case 0x0:
         switch (command.cmd) {
         case 0x1: {
-            if (!fd_to_id.contains(fd)) {
-                fd_to_id[fd] = next_id++;
+            auto& host1x_file = core.Host1xDeviceFile();
+            if (!host1x_file.fd_to_id.contains(fd)) {
+                host1x_file.fd_to_id[fd] = host1x_file.nvdec_next_id++;
             }
             return Submit(fd, input, output);
         }
@@ -74,8 +74,9 @@ void nvhost_nvdec::OnOpen(DeviceFD fd) {
 
 void nvhost_nvdec::OnClose(DeviceFD fd) {
     LOG_INFO(Service_NVDRV, "NVDEC video stream ended");
-    const auto iter = fd_to_id.find(fd);
-    if (iter != fd_to_id.end()) {
+    auto& host1x_file = core.Host1xDeviceFile();
+    const auto iter = host1x_file.fd_to_id.find(fd);
+    if (iter != host1x_file.fd_to_id.end()) {
         system.GPU().ClearCdmaInstance(iter->second);
     }
     system.AudioCore().SetNVDECActive(false);
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
index 3261ce1d4..c1b4e53e8 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
@@ -22,9 +22,6 @@ public:
 
     void OnOpen(DeviceFD fd) override;
     void OnClose(DeviceFD fd) override;
-
-private:
-    static u32 next_id;
 };
 
 } // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
index 2ec1ad3e9..99eede702 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
@@ -46,13 +46,11 @@ std::size_t WriteVectors(std::vector<u8>& dst, const std::vector<T>& src, std::s
 }
 } // Anonymous namespace
 
-std::unordered_map<DeviceFD, u32> nvhost_nvdec_common::fd_to_id{};
-std::deque<u32> nvhost_nvdec_common::syncpts_accumulated{};
-
 nvhost_nvdec_common::nvhost_nvdec_common(Core::System& system_, NvCore::Container& core_,
                                          NvCore::ChannelType channel_type_)
     : nvdevice{system_}, core{core_}, syncpoint_manager{core.GetSyncpointManager()},
       nvmap{core.GetNvMapFile()}, channel_type{channel_type_} {
+    auto& syncpts_accumulated = core.Host1xDeviceFile().syncpts_accumulated;
     if (syncpts_accumulated.empty()) {
         channel_syncpoint = syncpoint_manager.AllocateSyncpoint(false);
     } else {
@@ -60,8 +58,9 @@ nvhost_nvdec_common::nvhost_nvdec_common(Core::System& system_, NvCore::Containe
         syncpts_accumulated.pop_front();
     }
 }
+
 nvhost_nvdec_common::~nvhost_nvdec_common() {
-    syncpts_accumulated.push_back(channel_syncpoint);
+    core.Host1xDeviceFile().syncpts_accumulated.push_back(channel_syncpoint);
 }
 
 NvResult nvhost_nvdec_common::SetNVMAPfd(const std::vector<u8>& input) {
@@ -108,7 +107,7 @@ NvResult nvhost_nvdec_common::Submit(DeviceFD fd, const std::vector<u8>& input,
         Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count);
         system.Memory().ReadBlock(object->address + cmd_buffer.offset, cmdlist.data(),
                                   cmdlist.size() * sizeof(u32));
-        gpu.PushCommandBuffer(fd_to_id[fd], cmdlist);
+        gpu.PushCommandBuffer(core.Host1xDeviceFile().fd_to_id[fd], cmdlist);
     }
     std::memcpy(output.data(), &params, sizeof(IoctlSubmit));
     // Some games expect command_buffers to be written back
@@ -186,8 +185,4 @@ Kernel::KEvent* nvhost_nvdec_common::QueryEvent(u32 event_id) {
     return nullptr;
 }
 
-void nvhost_nvdec_common::Reset() {
-    fd_to_id.clear();
-}
-
 } // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
index 93990bb9b..fe76100c8 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
@@ -25,8 +25,6 @@ public:
                                  NvCore::ChannelType channel_type);
     ~nvhost_nvdec_common() override;
 
-    static void Reset();
-
 protected:
     struct IoctlSetNvmapFD {
         s32_le nvmap_fd{};
@@ -119,7 +117,6 @@ protected:
 
     Kernel::KEvent* QueryEvent(u32 event_id) override;
 
-    static std::unordered_map<DeviceFD, u32> fd_to_id;
     u32 channel_syncpoint;
     s32_le nvmap_fd{};
     u32_le submit_timeout{};
@@ -128,8 +125,6 @@ protected:
     NvCore::NvMap& nvmap;
     NvCore::ChannelType channel_type;
     std::array<u32, MaxSyncPoints> device_syncpoints{};
-
-    static std::deque<u32> syncpts_accumulated;
 };
 }; // namespace Devices
 } // namespace Service::Nvidia
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
index 2e4ff988c..73f97136e 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
@@ -4,13 +4,12 @@
 #include "common/assert.h"
 #include "common/logging/log.h"
 #include "core/core.h"
+#include "core/hle/service/nvdrv/core/container.h"
 #include "core/hle/service/nvdrv/devices/nvhost_vic.h"
 #include "video_core/renderer_base.h"
 
 namespace Service::Nvidia::Devices {
 
-u32 nvhost_vic::next_id{};
-
 nvhost_vic::nvhost_vic(Core::System& system_, NvCore::Container& core_)
     : nvhost_nvdec_common{system_, core_, NvCore::ChannelType::VIC} {}
 
@@ -21,11 +20,13 @@ NvResult nvhost_vic::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& i
     switch (command.group) {
     case 0x0:
         switch (command.cmd) {
-        case 0x1:
-            if (!fd_to_id.contains(fd)) {
-                fd_to_id[fd] = next_id++;
+        case 0x1: {
+            auto& host1x_file = core.Host1xDeviceFile();
+            if (!host1x_file.fd_to_id.contains(fd)) {
+                host1x_file.fd_to_id[fd] = host1x_file.vic_next_id++;
             }
             return Submit(fd, input, output);
+        }
         case 0x2:
             return GetSyncpoint(input, output);
         case 0x3:
@@ -69,8 +70,9 @@ NvResult nvhost_vic::Ioctl3(DeviceFD fd, Ioctl command, const std::vector<u8>& i
 void nvhost_vic::OnOpen(DeviceFD fd) {}
 
 void nvhost_vic::OnClose(DeviceFD fd) {
-    const auto iter = fd_to_id.find(fd);
-    if (iter != fd_to_id.end()) {
+    auto& host1x_file = core.Host1xDeviceFile();
+    const auto iter = host1x_file.fd_to_id.find(fd);
+    if (iter != host1x_file.fd_to_id.end()) {
         system.GPU().ClearCdmaInstance(iter->second);
     }
 }
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.h b/src/core/hle/service/nvdrv/devices/nvhost_vic.h
index 59e23b41e..f164caafb 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_vic.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.h
@@ -21,8 +21,5 @@ public:
 
     void OnOpen(DeviceFD fd) override;
     void OnClose(DeviceFD fd) override;
-
-private:
-    static u32 next_id;
 };
 } // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.h b/src/core/hle/service/nvdrv/devices/nvmap.h
index 52e1d7cff..e9bfd0358 100644
--- a/src/core/hle/service/nvdrv/devices/nvmap.h
+++ b/src/core/hle/service/nvdrv/devices/nvmap.h
@@ -23,8 +23,8 @@ public:
     explicit nvmap(Core::System& system_, NvCore::Container& container);
     ~nvmap() override;
 
-    nvmap(nvmap const&) = delete;
-    nvmap& operator=(nvmap const&) = delete;
+    nvmap(const nvmap&) = delete;
+    nvmap& operator=(const nvmap&) = delete;
 
     NvResult Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
                     std::vector<u8>& output) override;
diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp
index 7929443d2..5e7b7468f 100644
--- a/src/core/hle/service/nvdrv/nvdrv.cpp
+++ b/src/core/hle/service/nvdrv/nvdrv.cpp
@@ -101,9 +101,7 @@ Module::Module(Core::System& system)
     };
 }
 
-Module::~Module() {
-    Devices::nvhost_nvdec_common::Reset();
-}
+Module::~Module() {}
 
 NvResult Module::VerifyFD(DeviceFD fd) const {
     if (fd < 0) {
diff --git a/src/core/hle/service/nvdrv/nvdrv.h b/src/core/hle/service/nvdrv/nvdrv.h
index a2aeb80b4..146d046a9 100644
--- a/src/core/hle/service/nvdrv/nvdrv.h
+++ b/src/core/hle/service/nvdrv/nvdrv.h
@@ -46,7 +46,7 @@ class Module;
 
 class EventInterface {
 public:
-    EventInterface(Module& module_);
+    explicit EventInterface(Module& module_);
     ~EventInterface();
 
     Kernel::KEvent* CreateEvent(std::string name);
diff --git a/src/video_core/control/channel_state.cpp b/src/video_core/control/channel_state.cpp
index b04922ac0..cdecc3a91 100644
--- a/src/video_core/control/channel_state.cpp
+++ b/src/video_core/control/channel_state.cpp
@@ -14,10 +14,7 @@
 
 namespace Tegra::Control {
 
-ChannelState::ChannelState(s32 bind_id_) {
-    bind_id = bind_id_;
-    initiated = false;
-}
+ChannelState::ChannelState(s32 bind_id_) : bind_id{bind_id_}, initialized{} {}
 
 void ChannelState::Init(Core::System& system, GPU& gpu) {
     ASSERT(memory_manager);
@@ -27,7 +24,7 @@ void ChannelState::Init(Core::System& system, GPU& gpu) {
     kepler_compute = std::make_unique<Engines::KeplerCompute>(system, *memory_manager);
     maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager);
     kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager);
-    initiated = true;
+    initialized = true;
 }
 
 void ChannelState::BindRasterizer(VideoCore::RasterizerInterface* rasterizer) {
diff --git a/src/video_core/control/channel_state.h b/src/video_core/control/channel_state.h
index 305b21cba..3a7b9872c 100644
--- a/src/video_core/control/channel_state.h
+++ b/src/video_core/control/channel_state.h
@@ -34,7 +34,7 @@ class DmaPusher;
 namespace Control {
 
 struct ChannelState {
-    ChannelState(s32 bind_id);
+    explicit ChannelState(s32 bind_id);
     ChannelState(const ChannelState& state) = delete;
     ChannelState& operator=(const ChannelState&) = delete;
     ChannelState(ChannelState&& other) noexcept = default;
@@ -60,7 +60,7 @@ struct ChannelState {
 
     std::unique_ptr<DmaPusher> dma_pusher;
 
-    bool initiated{};
+    bool initialized{};
 };
 
 } // namespace Control
diff --git a/src/video_core/control/channel_state_cache.h b/src/video_core/control/channel_state_cache.h
index 5246192a8..584a0c26c 100644
--- a/src/video_core/control/channel_state_cache.h
+++ b/src/video_core/control/channel_state_cache.h
@@ -32,7 +32,7 @@ namespace VideoCommon {
 class ChannelInfo {
 public:
     ChannelInfo() = delete;
-    ChannelInfo(Tegra::Control::ChannelState& state);
+    explicit ChannelInfo(Tegra::Control::ChannelState& state);
     ChannelInfo(const ChannelInfo& state) = delete;
     ChannelInfo& operator=(const ChannelInfo&) = delete;
     ChannelInfo(ChannelInfo&& other) = default;
diff --git a/src/video_core/control/scheduler.cpp b/src/video_core/control/scheduler.cpp
index 733042690..f7cbe204e 100644
--- a/src/video_core/control/scheduler.cpp
+++ b/src/video_core/control/scheduler.cpp
@@ -3,6 +3,7 @@
 
 #include <memory>
 
+#include "common/assert.h"
 #include "video_core/control/channel_state.h"
 #include "video_core/control/scheduler.h"
 #include "video_core/gpu.h"
@@ -13,8 +14,9 @@ Scheduler::Scheduler(GPU& gpu_) : gpu{gpu_} {}
 Scheduler::~Scheduler() = default;
 
 void Scheduler::Push(s32 channel, CommandList&& entries) {
-    std::unique_lock<std::mutex> lk(scheduling_guard);
+    std::unique_lock lk(scheduling_guard);
     auto it = channels.find(channel);
+    ASSERT(it != channels.end());
     auto channel_state = it->second;
     gpu.BindChannel(channel_state->bind_id);
     channel_state->dma_pusher->Push(std::move(entries));
@@ -23,7 +25,7 @@ void Scheduler::Push(s32 channel, CommandList&& entries) {
 
 void Scheduler::DeclareChannel(std::shared_ptr<ChannelState> new_channel) {
     s32 channel = new_channel->bind_id;
-    std::unique_lock<std::mutex> lk(scheduling_guard);
+    std::unique_lock lk(scheduling_guard);
     channels.emplace(channel, new_channel);
 }
 
diff --git a/src/video_core/control/scheduler.h b/src/video_core/control/scheduler.h
index 305a01e0a..44addf61c 100644
--- a/src/video_core/control/scheduler.h
+++ b/src/video_core/control/scheduler.h
@@ -19,7 +19,7 @@ struct ChannelState;
 
 class Scheduler {
 public:
-    Scheduler(GPU& gpu_);
+    explicit Scheduler(GPU& gpu_);
     ~Scheduler();
 
     void Push(s32 channel, CommandList&& entries);
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index 9c5d567a6..bc48320ce 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -195,7 +195,7 @@ public:
             BitField<24, 2, u32> num_dst_components_minus_one;
         };
 
-        Swizzle GetComponent(size_t i) {
+        Swizzle GetComponent(size_t i) const {
             const u32 raw = dst_components_raw;
             return static_cast<Swizzle>((raw >> (i * 3)) & 0x7);
         }
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index d7a3dd96b..28b38273e 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -355,7 +355,7 @@ struct GPU::Impl {
 
     std::condition_variable sync_cv;
 
-    std::list<std::function<void(void)>> sync_requests;
+    std::list<std::function<void()>> sync_requests;
     std::atomic<u64> current_sync_fence{};
     u64 last_sync_fence{};
     std::mutex sync_request_mutex;
diff --git a/src/video_core/host1x/host1x.h b/src/video_core/host1x/host1x.h
index 7ecf853d9..57082ae54 100644
--- a/src/video_core/host1x/host1x.h
+++ b/src/video_core/host1x/host1x.h
@@ -19,7 +19,7 @@ namespace Host1x {
 
 class Host1x {
 public:
-    Host1x(Core::System& system);
+    explicit Host1x(Core::System& system);
 
     SyncpointManager& GetSyncpointManager() {
         return syncpoint_manager;
diff --git a/src/video_core/host1x/syncpoint_manager.cpp b/src/video_core/host1x/syncpoint_manager.cpp
index 4471bacae..326e8355a 100644
--- a/src/video_core/host1x/syncpoint_manager.cpp
+++ b/src/video_core/host1x/syncpoint_manager.cpp
@@ -12,13 +12,13 @@ MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
 
 SyncpointManager::ActionHandle SyncpointManager::RegisterAction(
     std::atomic<u32>& syncpoint, std::list<RegisteredAction>& action_storage, u32 expected_value,
-    std::function<void(void)>& action) {
+    std::function<void()>&& action) {
     if (syncpoint.load(std::memory_order_acquire) >= expected_value) {
         action();
         return {};
     }
 
-    std::unique_lock<std::mutex> lk(guard);
+    std::unique_lock lk(guard);
     if (syncpoint.load(std::memory_order_relaxed) >= expected_value) {
         action();
         return {};
@@ -30,12 +30,12 @@ SyncpointManager::ActionHandle SyncpointManager::RegisterAction(
         }
         ++it;
     }
-    return action_storage.emplace(it, expected_value, action);
+    return action_storage.emplace(it, expected_value, std::move(action));
 }
 
 void SyncpointManager::DeregisterAction(std::list<RegisteredAction>& action_storage,
                                         ActionHandle& handle) {
-    std::unique_lock<std::mutex> lk(guard);
+    std::unique_lock lk(guard);
     action_storage.erase(handle);
 }
 
@@ -68,7 +68,7 @@ void SyncpointManager::Increment(std::atomic<u32>& syncpoint, std::condition_var
                                  std::list<RegisteredAction>& action_storage) {
     auto new_value{syncpoint.fetch_add(1, std::memory_order_acq_rel) + 1};
 
-    std::unique_lock<std::mutex> lk(guard);
+    std::unique_lock lk(guard);
     auto it = action_storage.begin();
     while (it != action_storage.end()) {
         if (it->expected_value > new_value) {
@@ -87,7 +87,7 @@ void SyncpointManager::Wait(std::atomic<u32>& syncpoint, std::condition_variable
         return;
     }
 
-    std::unique_lock<std::mutex> lk(guard);
+    std::unique_lock lk(guard);
     wait_cv.wait(lk, pred);
 }
 
diff --git a/src/video_core/host1x/syncpoint_manager.h b/src/video_core/host1x/syncpoint_manager.h
index 72220a09a..50a264e23 100644
--- a/src/video_core/host1x/syncpoint_manager.h
+++ b/src/video_core/host1x/syncpoint_manager.h
@@ -18,34 +18,34 @@ namespace Host1x {
 
 class SyncpointManager {
 public:
-    u32 GetGuestSyncpointValue(u32 id) {
+    u32 GetGuestSyncpointValue(u32 id) const {
         return syncpoints_guest[id].load(std::memory_order_acquire);
     }
 
-    u32 GetHostSyncpointValue(u32 id) {
+    u32 GetHostSyncpointValue(u32 id) const {
         return syncpoints_host[id].load(std::memory_order_acquire);
     }
 
     struct RegisteredAction {
-        RegisteredAction(u32 expected_value_, std::function<void(void)>& action_)
-            : expected_value{expected_value_}, action{action_} {}
+        explicit RegisteredAction(u32 expected_value_, std::function<void()>&& action_)
+            : expected_value{expected_value_}, action{std::move(action_)} {}
         u32 expected_value;
-        std::function<void(void)> action;
+        std::function<void()> action;
     };
     using ActionHandle = std::list<RegisteredAction>::iterator;
 
     template <typename Func>
     ActionHandle RegisterGuestAction(u32 syncpoint_id, u32 expected_value, Func&& action) {
-        std::function<void(void)> func(action);
+        std::function<void()> func(action);
         return RegisterAction(syncpoints_guest[syncpoint_id], guest_action_storage[syncpoint_id],
-                              expected_value, func);
+                              expected_value, std::move(func));
     }
 
     template <typename Func>
     ActionHandle RegisterHostAction(u32 syncpoint_id, u32 expected_value, Func&& action) {
-        std::function<void(void)> func(action);
+        std::function<void()> func(action);
         return RegisterAction(syncpoints_host[syncpoint_id], host_action_storage[syncpoint_id],
-                              expected_value, func);
+                              expected_value, std::move(func));
     }
 
     void DeregisterGuestAction(u32 syncpoint_id, ActionHandle& handle);
@@ -60,11 +60,11 @@ public:
 
     void WaitHost(u32 syncpoint_id, u32 expected_value);
 
-    bool IsReadyGuest(u32 syncpoint_id, u32 expected_value) {
+    bool IsReadyGuest(u32 syncpoint_id, u32 expected_value) const {
         return syncpoints_guest[syncpoint_id].load(std::memory_order_acquire) >= expected_value;
     }
 
-    bool IsReadyHost(u32 syncpoint_id, u32 expected_value) {
+    bool IsReadyHost(u32 syncpoint_id, u32 expected_value) const {
         return syncpoints_host[syncpoint_id].load(std::memory_order_acquire) >= expected_value;
     }
 
@@ -74,7 +74,7 @@ private:
 
     ActionHandle RegisterAction(std::atomic<u32>& syncpoint,
                                 std::list<RegisteredAction>& action_storage, u32 expected_value,
-                                std::function<void(void)>& action);
+                                std::function<void()>&& action);
 
     void DeregisterAction(std::list<RegisteredAction>& action_storage, ActionHandle& handle);
 
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index ae4fd98df..f992e29f3 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -126,7 +126,7 @@ private:
     void WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
 
     template <bool is_big_page>
-    [[nodiscard]] inline std::size_t PageEntryIndex(GPUVAddr gpu_addr) const {
+    [[nodiscard]] std::size_t PageEntryIndex(GPUVAddr gpu_addr) const {
         if constexpr (is_big_page) {
             return (gpu_addr >> big_page_bits) & big_page_table_mask;
         } else {
-- 
cgit v1.2.3