From 3cbe352c18f69596d91c4862382d61a3d6515140 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 1 Nov 2021 18:53:32 +0100 Subject: NVDRV: Refactor and add new NvMap. --- src/common/bit_field.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'src/common') diff --git a/src/common/bit_field.h b/src/common/bit_field.h index 7e1df62b1..368b7b98c 100644 --- a/src/common/bit_field.h +++ b/src/common/bit_field.h @@ -127,11 +127,14 @@ public: } } - // This constructor and assignment operator might be considered ambiguous: - // Would they initialize the storage or just the bitfield? - // Hence, delete them. Use the Assign method to set bitfield values! - BitField(T val) = delete; - BitField& operator=(T val) = delete; + BitField(T val) { + Assign(val); + } + + BitField& operator=(T val) { + Assign(val); + return *this; + } constexpr BitField() noexcept = default; -- cgit v1.2.3 From b617874724c461cba270a00c0f8e67fc4a6d553a Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 10 Nov 2021 17:37:17 +0100 Subject: Common: implement MultiLevelPageTable. --- src/common/CMakeLists.txt | 2 + src/common/multi_level_page_table.cpp | 7 +++ src/common/multi_level_page_table.h | 79 +++++++++++++++++++++++++++++++++ src/common/multi_level_page_table.inc | 83 +++++++++++++++++++++++++++++++++++ 4 files changed, 171 insertions(+) create mode 100644 src/common/multi_level_page_table.cpp create mode 100644 src/common/multi_level_page_table.h create mode 100644 src/common/multi_level_page_table.inc (limited to 'src/common') diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 3447fabd8..2db414819 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -81,6 +81,8 @@ add_library(common STATIC microprofile.cpp microprofile.h microprofileui.h + multi_level_page_table.cpp + multi_level_page_table.h nvidia_flags.cpp nvidia_flags.h page_table.cpp diff --git a/src/common/multi_level_page_table.cpp b/src/common/multi_level_page_table.cpp new file mode 100644 index 000000000..561785ca7 --- /dev/null +++ b/src/common/multi_level_page_table.cpp @@ -0,0 +1,7 @@ +#include "common/multi_level_page_table.inc" + +namespace Common { +template class Common::MultiLevelPageTable; +template class Common::MultiLevelPageTable; +template class Common::MultiLevelPageTable; +} // namespace Common diff --git a/src/common/multi_level_page_table.h b/src/common/multi_level_page_table.h new file mode 100644 index 000000000..dde1cc962 --- /dev/null +++ b/src/common/multi_level_page_table.h @@ -0,0 +1,79 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include "common/common_types.h" + +namespace Common { + +template +class MultiLevelPageTable final { +public: + constexpr MultiLevelPageTable() = default; + explicit MultiLevelPageTable(std::size_t address_space_bits, std::size_t first_level_bits, + std::size_t page_bits); + + ~MultiLevelPageTable() noexcept; + + MultiLevelPageTable(const MultiLevelPageTable&) = delete; + MultiLevelPageTable& operator=(const MultiLevelPageTable&) = delete; + + MultiLevelPageTable(MultiLevelPageTable&& other) noexcept + : address_space_bits{std::exchange(other.address_space_bits, 0)}, + first_level_bits{std::exchange(other.first_level_bits, 0)}, page_bits{std::exchange( + other.page_bits, 0)}, + first_level_shift{std::exchange(other.first_level_shift, 0)}, + first_level_chunk_size{std::exchange(other.first_level_chunk_size, 0)}, + first_level_map{std::move(other.first_level_map)}, base_ptr{std::exchange(other.base_ptr, + nullptr)} {} + + MultiLevelPageTable& operator=(MultiLevelPageTable&& other) noexcept { + address_space_bits = std::exchange(other.address_space_bits, 0); + first_level_bits = std::exchange(other.first_level_bits, 0); + page_bits = std::exchange(other.page_bits, 0); + first_level_shift = std::exchange(other.first_level_shift, 0); + first_level_chunk_size = std::exchange(other.first_level_chunk_size, 0); + alloc_size = std::exchange(other.alloc_size, 0); + first_level_map = std::move(other.first_level_map); + base_ptr = std::exchange(other.base_ptr, nullptr); + return *this; + } + + void ReserveRange(u64 start, std::size_t size); + + [[nodiscard]] constexpr const BaseAddr& operator[](std::size_t index) const { + return base_ptr[index]; + } + + [[nodiscard]] constexpr BaseAddr& operator[](std::size_t index) { + return base_ptr[index]; + } + + [[nodiscard]] constexpr BaseAddr* data() { + return base_ptr; + } + + [[nodiscard]] constexpr const BaseAddr* data() const { + return base_ptr; + } + +private: + void AllocateLevel(u64 level); + + std::size_t address_space_bits{}; + std::size_t first_level_bits{}; + std::size_t page_bits{}; + std::size_t first_level_shift{}; + std::size_t first_level_chunk_size{}; + std::size_t alloc_size{}; + std::vector first_level_map{}; + BaseAddr* base_ptr{}; +}; + +} // namespace Common diff --git a/src/common/multi_level_page_table.inc b/src/common/multi_level_page_table.inc new file mode 100644 index 000000000..a75e61f9d --- /dev/null +++ b/src/common/multi_level_page_table.inc @@ -0,0 +1,83 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#ifdef _WIN32 +#include +#else +#include +#endif + +#include "common/assert.h" +#include "common/multi_level_page_table.h" + +namespace Common { + +template +MultiLevelPageTable::MultiLevelPageTable(std::size_t address_space_bits_, + std::size_t first_level_bits_, + std::size_t page_bits_) + : address_space_bits{address_space_bits_}, + first_level_bits{first_level_bits_}, page_bits{page_bits_} { + first_level_shift = address_space_bits - first_level_bits; + first_level_chunk_size = 1ULL << (first_level_shift - page_bits); + alloc_size = (1ULL << (address_space_bits - page_bits)) * sizeof(BaseAddr); + std::size_t first_level_size = 1ULL << first_level_bits; + first_level_map.resize(first_level_size, nullptr); +#ifdef _WIN32 + void* base{VirtualAlloc(nullptr, alloc_size, MEM_RESERVE, PAGE_READWRITE)}; +#else + void* base{mmap(nullptr, alloc_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)}; + + if (base == MAP_FAILED) { + base = nullptr; + } +#endif + + ASSERT(base); + base_ptr = reinterpret_cast(base); +} + +template +MultiLevelPageTable::~MultiLevelPageTable() noexcept { + if (!base_ptr) { + return; + } +#ifdef _WIN32 + ASSERT(VirtualFree(base_ptr, 0, MEM_RELEASE)); +#else + ASSERT(munmap(base_ptr, alloc_size) == 0); +#endif +} + +template +void MultiLevelPageTable::ReserveRange(u64 start, std::size_t size) { + const u64 new_start = start >> first_level_shift; + const u64 new_end = + (start + size + (first_level_chunk_size << page_bits) - 1) >> first_level_shift; + for (u64 i = new_start; i <= new_end; i++) { + if (!first_level_map[i]) { + AllocateLevel(i); + } + } +} + +template +void MultiLevelPageTable::AllocateLevel(u64 level) { + void* ptr = reinterpret_cast(base_ptr) + level * first_level_chunk_size; +#ifdef _WIN32 + void* base{VirtualAlloc(ptr, first_level_chunk_size, MEM_COMMIT, PAGE_READWRITE)}; +#else + void* base{mmap(ptr, first_level_chunk_size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0)}; + + if (base == MAP_FAILED) { + base = nullptr; + } +#endif + ASSERT(base); + + first_level_map[level] = base; +} + +} // namespace Common -- cgit v1.2.3 From cbaf3fb433a351f7d9509f17f88d4896ba66afd1 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 11 Nov 2021 21:24:40 +0100 Subject: VideoCore: Update MemoryManager --- src/common/multi_level_page_table.cpp | 1 + src/common/multi_level_page_table.inc | 7 +- src/video_core/memory_manager.cpp | 147 +++++++++++++--------------------- src/video_core/memory_manager.h | 98 +++++++---------------- 4 files changed, 86 insertions(+), 167 deletions(-) (limited to 'src/common') diff --git a/src/common/multi_level_page_table.cpp b/src/common/multi_level_page_table.cpp index 561785ca7..aed04d0b5 100644 --- a/src/common/multi_level_page_table.cpp +++ b/src/common/multi_level_page_table.cpp @@ -4,4 +4,5 @@ namespace Common { template class Common::MultiLevelPageTable; template class Common::MultiLevelPageTable; template class Common::MultiLevelPageTable; +template class Common::MultiLevelPageTable; } // namespace Common diff --git a/src/common/multi_level_page_table.inc b/src/common/multi_level_page_table.inc index a75e61f9d..7fbcb908a 100644 --- a/src/common/multi_level_page_table.inc +++ b/src/common/multi_level_page_table.inc @@ -20,7 +20,7 @@ MultiLevelPageTable::MultiLevelPageTable(std::size_t address_space_bit : address_space_bits{address_space_bits_}, first_level_bits{first_level_bits_}, page_bits{page_bits_} { first_level_shift = address_space_bits - first_level_bits; - first_level_chunk_size = 1ULL << (first_level_shift - page_bits); + first_level_chunk_size = (1ULL << (first_level_shift - page_bits)) * sizeof(BaseAddr); alloc_size = (1ULL << (address_space_bits - page_bits)) * sizeof(BaseAddr); std::size_t first_level_size = 1ULL << first_level_bits; first_level_map.resize(first_level_size, nullptr); @@ -53,8 +53,7 @@ MultiLevelPageTable::~MultiLevelPageTable() noexcept { template void MultiLevelPageTable::ReserveRange(u64 start, std::size_t size) { const u64 new_start = start >> first_level_shift; - const u64 new_end = - (start + size + (first_level_chunk_size << page_bits) - 1) >> first_level_shift; + const u64 new_end = (start + size) >> first_level_shift; for (u64 i = new_start; i <= new_end; i++) { if (!first_level_map[i]) { AllocateLevel(i); @@ -64,7 +63,7 @@ void MultiLevelPageTable::ReserveRange(u64 start, std::size_t size) { template void MultiLevelPageTable::AllocateLevel(u64 level) { - void* ptr = reinterpret_cast(base_ptr) + level * first_level_chunk_size; + void* ptr = reinterpret_cast(base_ptr) + level * first_level_chunk_size; #ifdef _WIN32 void* base{VirtualAlloc(ptr, first_level_chunk_size, MEM_COMMIT, PAGE_READWRITE)}; #else diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index a3efd365e..1e090279f 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -16,36 +16,63 @@ namespace Tegra { -MemoryManager::MemoryManager(Core::System& system_) - : system{system_}, page_table(page_table_size) {} +MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 page_bits_) + : system{system_}, address_space_bits{address_space_bits_}, page_bits{page_bits_}, entries{}, + page_table{address_space_bits, address_space_bits + page_bits - 38, page_bits} { + address_space_size = 1ULL << address_space_bits; + allocate_start = address_space_bits > 32 ? 1ULL << 32 : 0; + page_size = 1ULL << page_bits; + page_mask = page_size - 1ULL; + const u64 page_table_bits = address_space_bits - cpu_page_bits; + const u64 page_table_size = 1ULL << page_table_bits; + page_table_mask = page_table_size - 1; + + entries.resize(page_table_size / 32, 0); +} MemoryManager::~MemoryManager() = default; -void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { - rasterizer = rasterizer_; +MemoryManager::EntryType MemoryManager::GetEntry(size_t position) const { + position = position >> page_bits; + const u64 entry_mask = entries[position / 32]; + const size_t sub_index = position % 32; + return static_cast((entry_mask >> (2 * sub_index)) & 0x03ULL); +} + +void MemoryManager::SetEntry(size_t position, MemoryManager::EntryType entry) { + position = position >> page_bits; + const u64 entry_mask = entries[position / 32]; + const size_t sub_index = position % 32; + entries[position / 32] = + (~(3ULL << sub_index * 2) & entry_mask) | (static_cast(entry) << sub_index * 2); } -GPUVAddr MemoryManager::UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size) { +template +GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, + size_t size) { u64 remaining_size{size}; + if constexpr (entry_type == EntryType::Mapped) { + page_table.ReserveRange(gpu_addr, size); + } for (u64 offset{}; offset < size; offset += page_size) { - if (remaining_size < page_size) { - SetPageEntry(gpu_addr + offset, page_entry + offset, remaining_size); - } else { - SetPageEntry(gpu_addr + offset, page_entry + offset); + const GPUVAddr current_gpu_addr = gpu_addr + offset; + SetEntry(current_gpu_addr, entry_type); + if constexpr (entry_type == EntryType::Mapped) { + const VAddr current_cpu_addr = cpu_addr + offset; + const auto index = PageEntryIndex(current_gpu_addr); + page_table[index] = static_cast(current_cpu_addr >> 12ULL); } remaining_size -= page_size; } return gpu_addr; } +void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { + rasterizer = rasterizer_; +} + GPUVAddr MemoryManager::Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size) { - const auto it = std::ranges::lower_bound(map_ranges, gpu_addr, {}, &MapRange::first); - if (it != map_ranges.end() && it->first == gpu_addr) { - it->second = size; - } else { - map_ranges.insert(it, MapRange{gpu_addr, size}); - } - return UpdateRange(gpu_addr, cpu_addr, size); + return PageTableOp(gpu_addr, cpu_addr, size); } GPUVAddr MemoryManager::MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align) { @@ -62,13 +89,6 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { if (size == 0) { return; } - const auto it = std::ranges::lower_bound(map_ranges, gpu_addr, {}, &MapRange::first); - if (it != map_ranges.end()) { - ASSERT(it->first == gpu_addr); - map_ranges.erase(it); - } else { - ASSERT_MSG(false, "Unmapping non-existent GPU address=0x{:x}", gpu_addr); - } const auto submapped_ranges = GetSubmappedRange(gpu_addr, size); for (const auto& [map_addr, map_size] : submapped_ranges) { @@ -79,63 +99,23 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { rasterizer->UnmapMemory(*cpu_addr, map_size); } - UpdateRange(gpu_addr, PageEntry::State::Unmapped, size); + PageTableOp(gpu_addr, 0, size); } std::optional MemoryManager::AllocateFixed(GPUVAddr gpu_addr, std::size_t size) { for (u64 offset{}; offset < size; offset += page_size) { - if (!GetPageEntry(gpu_addr + offset).IsUnmapped()) { + if (GetEntry(gpu_addr + offset) != EntryType::Free) { return std::nullopt; } } - return UpdateRange(gpu_addr, PageEntry::State::Allocated, size); + return PageTableOp(gpu_addr, 0, size); } GPUVAddr MemoryManager::Allocate(std::size_t size, std::size_t align) { return *AllocateFixed(*FindFreeRange(size, align), size); } -void MemoryManager::TryLockPage(PageEntry page_entry, std::size_t size) { - if (!page_entry.IsValid()) { - return; - } - - ASSERT(system.CurrentProcess() - ->PageTable() - .LockForDeviceAddressSpace(page_entry.ToAddress(), size) - .IsSuccess()); -} - -void MemoryManager::TryUnlockPage(PageEntry page_entry, std::size_t size) { - if (!page_entry.IsValid()) { - return; - } - - ASSERT(system.CurrentProcess() - ->PageTable() - .UnlockForDeviceAddressSpace(page_entry.ToAddress(), size) - .IsSuccess()); -} - -PageEntry MemoryManager::GetPageEntry(GPUVAddr gpu_addr) const { - return page_table[PageEntryIndex(gpu_addr)]; -} - -void MemoryManager::SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size) { - // TODO(bunnei): We should lock/unlock device regions. This currently causes issues due to - // improper tracking, but should be fixed in the future. - - //// Unlock the old page - // TryUnlockPage(page_table[PageEntryIndex(gpu_addr)], size); - - //// Lock the new page - // TryLockPage(page_entry, size); - auto& current_page = page_table[PageEntryIndex(gpu_addr)]; - - current_page = page_entry; -} - std::optional MemoryManager::FindFreeRange(std::size_t size, std::size_t align, bool start_32bit_address) const { if (!align) { @@ -145,9 +125,9 @@ std::optional MemoryManager::FindFreeRange(std::size_t size, std::size } u64 available_size{}; - GPUVAddr gpu_addr{start_32bit_address ? address_space_start_low : address_space_start}; + GPUVAddr gpu_addr{allocate_start}; while (gpu_addr + available_size < address_space_size) { - if (GetPageEntry(gpu_addr + available_size).IsUnmapped()) { + if (GetEntry(gpu_addr + available_size) == EntryType::Free) { available_size += page_size; if (available_size >= size) { @@ -168,15 +148,12 @@ std::optional MemoryManager::FindFreeRange(std::size_t size, std::size } std::optional MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const { - if (gpu_addr == 0) { - return std::nullopt; - } - const auto page_entry{GetPageEntry(gpu_addr)}; - if (!page_entry.IsValid()) { + if (GetEntry(gpu_addr) != EntryType::Mapped) { return std::nullopt; } - return page_entry.ToAddress() + (gpu_addr & page_mask); + const VAddr cpu_addr_base = static_cast(page_table[PageEntryIndex(gpu_addr)]) << 12ULL; + return cpu_addr_base + (gpu_addr & page_mask); } std::optional MemoryManager::GpuToCpuAddress(GPUVAddr addr, std::size_t size) const { @@ -227,10 +204,6 @@ template void MemoryManager::Write(GPUVAddr addr, u32 data); template void MemoryManager::Write(GPUVAddr addr, u64 data); u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) { - if (!GetPageEntry(gpu_addr).IsValid()) { - return {}; - } - const auto address{GpuToCpuAddress(gpu_addr)}; if (!address) { return {}; @@ -240,10 +213,6 @@ u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) { } const u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) const { - if (!GetPageEntry(gpu_addr).IsValid()) { - return {}; - } - const auto address{GpuToCpuAddress(gpu_addr)}; if (!address) { return {}; @@ -252,12 +221,6 @@ const u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) const { return system.Memory().GetPointer(*address); } -size_t MemoryManager::BytesToMapEnd(GPUVAddr gpu_addr) const noexcept { - auto it = std::ranges::upper_bound(map_ranges, gpu_addr, {}, &MapRange::first); - --it; - return it->second - (gpu_addr - it->first); -} - void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, bool is_safe) const { std::size_t remaining_size{size}; @@ -268,7 +231,7 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std: const std::size_t copy_amount{ std::min(static_cast(page_size) - page_offset, remaining_size)}; const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; - if (page_addr && *page_addr != 0) { + if (page_addr) { const auto src_addr{*page_addr + page_offset}; if (is_safe) { // Flush must happen on the rasterizer interface, such that memory is always @@ -307,7 +270,7 @@ void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffe const std::size_t copy_amount{ std::min(static_cast(page_size) - page_offset, remaining_size)}; const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; - if (page_addr && *page_addr != 0) { + if (page_addr) { const auto dest_addr{*page_addr + page_offset}; if (is_safe) { @@ -392,7 +355,7 @@ bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) cons size_t page_index{gpu_addr >> page_bits}; const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits}; while (page_index < page_last) { - if (!page_table[page_index].IsValid() || page_table[page_index].ToAddress() == 0) { + if (GetEntry(page_index << page_bits) == EntryType::Free) { return false; } ++page_index; @@ -408,7 +371,7 @@ std::vector> MemoryManager::GetSubmappedRange( size_t page_offset{gpu_addr & page_mask}; std::optional> last_segment{}; std::optional old_page_addr{}; - const auto extend_size = [&last_segment, &page_index, &page_offset](std::size_t bytes) { + const auto extend_size = [this, &last_segment, &page_index, &page_offset](std::size_t bytes) { if (!last_segment) { const GPUVAddr new_base_addr = (page_index << page_bits) + page_offset; last_segment = {new_base_addr, bytes}; diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 74f9ce175..0a763fd19 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -8,6 +8,7 @@ #include #include "common/common_types.h" +#include "common/multi_level_page_table.h" namespace VideoCore { class RasterizerInterface; @@ -19,55 +20,10 @@ class System; namespace Tegra { -class PageEntry final { -public: - enum class State : u32 { - Unmapped = static_cast(-1), - Allocated = static_cast(-2), - }; - - constexpr PageEntry() = default; - constexpr PageEntry(State state_) : state{state_} {} - constexpr PageEntry(VAddr addr) : state{static_cast(addr >> ShiftBits)} {} - - [[nodiscard]] constexpr bool IsUnmapped() const { - return state == State::Unmapped; - } - - [[nodiscard]] constexpr bool IsAllocated() const { - return state == State::Allocated; - } - - [[nodiscard]] constexpr bool IsValid() const { - return !IsUnmapped() && !IsAllocated(); - } - - [[nodiscard]] constexpr VAddr ToAddress() const { - if (!IsValid()) { - return {}; - } - - return static_cast(state) << ShiftBits; - } - - [[nodiscard]] constexpr PageEntry operator+(u64 offset) const { - // If this is a reserved value, offsets do not apply - if (!IsValid()) { - return *this; - } - return PageEntry{(static_cast(state) << ShiftBits) + offset}; - } - -private: - static constexpr std::size_t ShiftBits{12}; - - State state{State::Unmapped}; -}; -static_assert(sizeof(PageEntry) == 4, "PageEntry is too large"); - class MemoryManager final { public: - explicit MemoryManager(Core::System& system_); + explicit MemoryManager(Core::System& system_, u64 address_space_bits_ = 40, + u64 page_bits_ = 16); ~MemoryManager(); /// Binds a renderer to the memory manager. @@ -86,9 +42,6 @@ public: [[nodiscard]] u8* GetPointer(GPUVAddr addr); [[nodiscard]] const u8* GetPointer(GPUVAddr addr) const; - /// Returns the number of bytes until the end of the memory map containing the given GPU address - [[nodiscard]] size_t BytesToMapEnd(GPUVAddr gpu_addr) const noexcept; - /** * ReadBlock and WriteBlock are full read and write operations over virtual * GPU Memory. It's important to use these when GPU memory may not be continuous @@ -145,44 +98,47 @@ public: void FlushRegion(GPUVAddr gpu_addr, size_t size) const; private: - [[nodiscard]] PageEntry GetPageEntry(GPUVAddr gpu_addr) const; - void SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size = page_size); - GPUVAddr UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size); [[nodiscard]] std::optional FindFreeRange(std::size_t size, std::size_t align, bool start_32bit_address = false) const; - void TryLockPage(PageEntry page_entry, std::size_t size); - void TryUnlockPage(PageEntry page_entry, std::size_t size); - void ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, bool is_safe) const; void WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size, bool is_safe); - [[nodiscard]] static constexpr std::size_t PageEntryIndex(GPUVAddr gpu_addr) { + [[nodiscard]] inline std::size_t PageEntryIndex(GPUVAddr gpu_addr) const { return (gpu_addr >> page_bits) & page_table_mask; } - static constexpr u64 address_space_size = 1ULL << 40; - static constexpr u64 address_space_start = 1ULL << 32; - static constexpr u64 address_space_start_low = 1ULL << 16; - static constexpr u64 page_bits{16}; - static constexpr u64 page_size{1 << page_bits}; - static constexpr u64 page_mask{page_size - 1}; - static constexpr u64 page_table_bits{24}; - static constexpr u64 page_table_size{1 << page_table_bits}; - static constexpr u64 page_table_mask{page_table_size - 1}; - Core::System& system; + const u64 address_space_bits; + const u64 page_bits; + u64 address_space_size; + u64 allocate_start; + u64 page_size; + u64 page_mask; + u64 page_table_mask; + static constexpr u64 cpu_page_bits{12}; + VideoCore::RasterizerInterface* rasterizer = nullptr; - std::vector page_table; + enum class EntryType : u64 { + Free = 0, + Reserved = 1, + Mapped = 2, + }; + + std::vector entries; + + template + GPUVAddr PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size); + + EntryType GetEntry(size_t position) const; - using MapRange = std::pair; - std::vector map_ranges; + void SetEntry(size_t position, EntryType entry); - std::vector> cache_invalidate_queue; + Common::MultiLevelPageTable page_table; }; } // namespace Tegra -- cgit v1.2.3 From feb49c822d9cabc5bc7be9eab1f2bf4ba460176a Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 14 Nov 2021 20:55:52 +0100 Subject: NVDRV: Remake ASGPU --- src/common/CMakeLists.txt | 2 + src/common/address_space.cpp | 11 + src/common/address_space.h | 134 ++++++ src/common/address_space.inc | 338 +++++++++++++++ .../hle/service/nvdrv/devices/nvhost_as_gpu.cpp | 460 ++++++++++++++------- src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h | 163 ++++---- src/video_core/memory_manager.cpp | 10 +- src/video_core/memory_manager.h | 3 +- 8 files changed, 882 insertions(+), 239 deletions(-) create mode 100644 src/common/address_space.cpp create mode 100644 src/common/address_space.h create mode 100644 src/common/address_space.inc (limited to 'src/common') diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 2db414819..a02696873 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -17,6 +17,8 @@ endif () include(GenerateSCMRev) add_library(common STATIC + address_space.cpp + address_space.h algorithm.h alignment.h announce_multiplayer_room.h diff --git a/src/common/address_space.cpp b/src/common/address_space.cpp new file mode 100644 index 000000000..6db85be87 --- /dev/null +++ b/src/common/address_space.cpp @@ -0,0 +1,11 @@ +// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/) +// Licensed under GPLv3 or any later version +// Refer to the license.txt file included. + +#include "common/address_space.inc" + +namespace Common { + +template class Common::FlatAllocator; + +} diff --git a/src/common/address_space.h b/src/common/address_space.h new file mode 100644 index 000000000..fd2f32b7d --- /dev/null +++ b/src/common/address_space.h @@ -0,0 +1,134 @@ +// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/) +// Licensed under GPLv3 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include + +#include "common/common_types.h" + +namespace Common { +template +concept AddressSpaceValid = std::is_unsigned_v && sizeof(VaType) * 8 >= AddressSpaceBits; + +struct EmptyStruct {}; + +/** + * @brief FlatAddressSpaceMap provides a generic VA->PA mapping implementation using a sorted vector + */ +template +requires AddressSpaceValid class FlatAddressSpaceMap { +private: + std::function + unmapCallback{}; //!< Callback called when the mappings in an region have changed + +protected: + /** + * @brief Represents a block of memory in the AS, the physical mapping is contiguous until + * another block with a different phys address is hit + */ + struct Block { + VaType virt{UnmappedVa}; //!< VA of the block + PaType phys{UnmappedPa}; //!< PA of the block, will increase 1-1 with VA until a new block + //!< is encountered + [[no_unique_address]] ExtraBlockInfo extraInfo; + + Block() = default; + + Block(VaType virt, PaType phys, ExtraBlockInfo extraInfo) + : virt(virt), phys(phys), extraInfo(extraInfo) {} + + constexpr bool Valid() { + return virt != UnmappedVa; + } + + constexpr bool Mapped() { + return phys != UnmappedPa; + } + + constexpr bool Unmapped() { + return phys == UnmappedPa; + } + + bool operator<(const VaType& pVirt) const { + return virt < pVirt; + } + }; + + std::mutex blockMutex; + std::vector blocks{Block{}}; + + /** + * @brief Maps a PA range into the given AS region + * @note blockMutex MUST be locked when calling this + */ + void MapLocked(VaType virt, PaType phys, VaType size, ExtraBlockInfo extraInfo); + + /** + * @brief Unmaps the given range and merges it with other unmapped regions + * @note blockMutex MUST be locked when calling this + */ + void UnmapLocked(VaType virt, VaType size); + +public: + static constexpr VaType VaMaximum{(1ULL << (AddressSpaceBits - 1)) + + ((1ULL << (AddressSpaceBits - 1)) - + 1)}; //!< The maximum VA that this AS can technically reach + + VaType vaLimit{VaMaximum}; //!< A soft limit on the maximum VA of the AS + + FlatAddressSpaceMap(VaType vaLimit, std::function unmapCallback = {}); + + FlatAddressSpaceMap() = default; + + void Map(VaType virt, PaType phys, VaType size, ExtraBlockInfo extraInfo = {}) { + std::scoped_lock lock(blockMutex); + MapLocked(virt, phys, size, extraInfo); + } + + void Unmap(VaType virt, VaType size) { + std::scoped_lock lock(blockMutex); + UnmapLocked(virt, size); + } +}; + +/** + * @brief FlatMemoryManager specialises FlatAddressSpaceMap to work as an allocator, with an + * initial, fast linear pass and a subsequent slower pass that iterates until it finds a free block + */ +template +requires AddressSpaceValid class FlatAllocator + : public FlatAddressSpaceMap { +private: + using Base = FlatAddressSpaceMap; + + VaType currentLinearAllocEnd; //!< The end address for the initial linear allocation pass, once + //!< this reaches the AS limit the slower allocation path will be + //!< used + +public: + VaType vaStart; //!< The base VA of the allocator, no allocations will be below this + + FlatAllocator(VaType vaStart, VaType vaLimit = Base::VaMaximum); + + /** + * @brief Allocates a region in the AS of the given size and returns its address + */ + VaType Allocate(VaType size); + + /** + * @brief Marks the given region in the AS as allocated + */ + void AllocateFixed(VaType virt, VaType size); + + /** + * @brief Frees an AS region so it can be used again + */ + void Free(VaType virt, VaType size); +}; +} // namespace Common diff --git a/src/common/address_space.inc b/src/common/address_space.inc new file mode 100644 index 000000000..907c55d88 --- /dev/null +++ b/src/common/address_space.inc @@ -0,0 +1,338 @@ +// SPDX-License-Identifier: GPLv3 or later +// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/) + +#include "common/address_space.h" +#include "common/assert.h" + +#define MAP_MEMBER(returnType) \ + template \ + requires AddressSpaceValid returnType FlatAddressSpaceMap< \ + VaType, UnmappedVa, PaType, UnmappedPa, PaContigSplit, AddressSpaceBits, ExtraBlockInfo> +#define MAP_MEMBER_CONST() \ + template \ + requires AddressSpaceValid FlatAddressSpaceMap< \ + VaType, UnmappedVa, PaType, UnmappedPa, PaContigSplit, AddressSpaceBits, ExtraBlockInfo> + +#define MM_MEMBER(returnType) \ + template \ + requires AddressSpaceValid returnType \ + FlatMemoryManager + +#define ALLOC_MEMBER(returnType) \ + template \ + requires AddressSpaceValid returnType \ + FlatAllocator +#define ALLOC_MEMBER_CONST() \ + template \ + requires AddressSpaceValid \ + FlatAllocator + +namespace Common { +MAP_MEMBER_CONST()::FlatAddressSpaceMap(VaType vaLimit, + std::function unmapCallback) + : unmapCallback(std::move(unmapCallback)), vaLimit(vaLimit) { + if (vaLimit > VaMaximum) + UNREACHABLE_MSG("Invalid VA limit!"); +} + +MAP_MEMBER(void)::MapLocked(VaType virt, PaType phys, VaType size, ExtraBlockInfo extraInfo) { + VaType virtEnd{virt + size}; + + if (virtEnd > vaLimit) + UNREACHABLE_MSG("Trying to map a block past the VA limit: virtEnd: 0x{:X}, vaLimit: 0x{:X}", + virtEnd, vaLimit); + + auto blockEndSuccessor{std::lower_bound(blocks.begin(), blocks.end(), virtEnd)}; + if (blockEndSuccessor == blocks.begin()) + UNREACHABLE_MSG("Trying to map a block before the VA start: virtEnd: 0x{:X}", virtEnd); + + auto blockEndPredecessor{std::prev(blockEndSuccessor)}; + + if (blockEndSuccessor != blocks.end()) { + // We have blocks in front of us, if one is directly in front then we don't have to add a + // tail + if (blockEndSuccessor->virt != virtEnd) { + PaType tailPhys{[&]() -> PaType { + if constexpr (!PaContigSplit) { + return blockEndPredecessor + ->phys; // Always propagate unmapped regions rather than calculating offset + } else { + if (blockEndPredecessor->Unmapped()) + return blockEndPredecessor->phys; // Always propagate unmapped regions + // rather than calculating offset + else + return blockEndPredecessor->phys + virtEnd - blockEndPredecessor->virt; + } + }()}; + + if (blockEndPredecessor->virt >= virt) { + // If this block's start would be overlapped by the map then reuse it as a tail + // block + blockEndPredecessor->virt = virtEnd; + blockEndPredecessor->phys = tailPhys; + blockEndPredecessor->extraInfo = blockEndPredecessor->extraInfo; + + // No longer predecessor anymore + blockEndSuccessor = blockEndPredecessor--; + } else { + // Else insert a new one and we're done + blocks.insert(blockEndSuccessor, + {Block(virt, phys, extraInfo), + Block(virtEnd, tailPhys, blockEndPredecessor->extraInfo)}); + if (unmapCallback) + unmapCallback(virt, size); + + return; + } + } + } else { + // blockEndPredecessor will always be unmapped as blocks has to be terminated by an unmapped + // chunk + if (blockEndPredecessor != blocks.begin() && blockEndPredecessor->virt >= virt) { + // Move the unmapped block start backwards + blockEndPredecessor->virt = virtEnd; + + // No longer predecessor anymore + blockEndSuccessor = blockEndPredecessor--; + } else { + // Else insert a new one and we're done + blocks.insert(blockEndSuccessor, + {Block(virt, phys, extraInfo), Block(virtEnd, UnmappedPa, {})}); + if (unmapCallback) + unmapCallback(virt, size); + + return; + } + } + + auto blockStartSuccessor{blockEndSuccessor}; + + // Walk the block vector to find the start successor as this is more efficient than another + // binary search in most scenarios + while (std::prev(blockStartSuccessor)->virt >= virt) + blockStartSuccessor--; + + // Check that the start successor is either the end block or something in between + if (blockStartSuccessor->virt > virtEnd) { + UNREACHABLE_MSG("Unsorted block in AS map: virt: 0x{:X}", blockStartSuccessor->virt); + } else if (blockStartSuccessor->virt == virtEnd) { + // We need to create a new block as there are none spare that we would overwrite + blocks.insert(blockStartSuccessor, Block(virt, phys, extraInfo)); + } else { + // Erase overwritten blocks + if (auto eraseStart{std::next(blockStartSuccessor)}; eraseStart != blockEndSuccessor) + blocks.erase(eraseStart, blockEndSuccessor); + + // Reuse a block that would otherwise be overwritten as a start block + blockStartSuccessor->virt = virt; + blockStartSuccessor->phys = phys; + blockStartSuccessor->extraInfo = extraInfo; + } + + if (unmapCallback) + unmapCallback(virt, size); +} + +MAP_MEMBER(void)::UnmapLocked(VaType virt, VaType size) { + VaType virtEnd{virt + size}; + + if (virtEnd > vaLimit) + UNREACHABLE_MSG("Trying to map a block past the VA limit: virtEnd: 0x{:X}, vaLimit: 0x{:X}", + virtEnd, vaLimit); + + auto blockEndSuccessor{std::lower_bound(blocks.begin(), blocks.end(), virtEnd)}; + if (blockEndSuccessor == blocks.begin()) + UNREACHABLE_MSG("Trying to unmap a block before the VA start: virtEnd: 0x{:X}", virtEnd); + + auto blockEndPredecessor{std::prev(blockEndSuccessor)}; + + auto walkBackToPredecessor{[&](auto iter) { + while (iter->virt >= virt) + iter--; + + return iter; + }}; + + auto eraseBlocksWithEndUnmapped{[&](auto unmappedEnd) { + auto blockStartPredecessor{walkBackToPredecessor(unmappedEnd)}; + auto blockStartSuccessor{std::next(blockStartPredecessor)}; + + auto eraseEnd{[&]() { + if (blockStartPredecessor->Unmapped()) { + // If the start predecessor is unmapped then we can erase everything in our region + // and be done + return std::next(unmappedEnd); + } else { + // Else reuse the end predecessor as the start of our unmapped region then erase all + // up to it + unmappedEnd->virt = virt; + return unmappedEnd; + } + }()}; + + // We can't have two unmapped regions after each other + if (eraseEnd != blocks.end() && + (eraseEnd == blockStartSuccessor || + (blockStartPredecessor->Unmapped() && eraseEnd->Unmapped()))) + UNREACHABLE_MSG("Multiple contiguous unmapped regions are unsupported!"); + + blocks.erase(blockStartSuccessor, eraseEnd); + }}; + + // We can avoid any splitting logic if these are the case + if (blockEndPredecessor->Unmapped()) { + if (blockEndPredecessor->virt > virt) + eraseBlocksWithEndUnmapped(blockEndPredecessor); + + if (unmapCallback) + unmapCallback(virt, size); + + return; // The region is unmapped, bail out early + } else if (blockEndSuccessor->virt == virtEnd && blockEndSuccessor->Unmapped()) { + eraseBlocksWithEndUnmapped(blockEndSuccessor); + + if (unmapCallback) + unmapCallback(virt, size); + + return; // The region is unmapped here and doesn't need splitting, bail out early + } else if (blockEndSuccessor == blocks.end()) { + // This should never happen as the end should always follow an unmapped block + UNREACHABLE_MSG("Unexpected Memory Manager state!"); + } else if (blockEndSuccessor->virt != virtEnd) { + // If one block is directly in front then we don't have to add a tail + + // The previous block is mapped so we will need to add a tail with an offset + PaType tailPhys{[&]() { + if constexpr (PaContigSplit) + return blockEndPredecessor->phys + virtEnd - blockEndPredecessor->virt; + else + return blockEndPredecessor->phys; + }()}; + + if (blockEndPredecessor->virt >= virt) { + // If this block's start would be overlapped by the unmap then reuse it as a tail block + blockEndPredecessor->virt = virtEnd; + blockEndPredecessor->phys = tailPhys; + + // No longer predecessor anymore + blockEndSuccessor = blockEndPredecessor--; + } else { + blocks.insert(blockEndSuccessor, + {Block(virt, UnmappedPa, {}), + Block(virtEnd, tailPhys, blockEndPredecessor->extraInfo)}); + if (unmapCallback) + unmapCallback(virt, size); + + return; // The previous block is mapped and ends before + } + } + + // Walk the block vector to find the start predecessor as this is more efficient than another + // binary search in most scenarios + auto blockStartPredecessor{walkBackToPredecessor(blockEndSuccessor)}; + auto blockStartSuccessor{std::next(blockStartPredecessor)}; + + if (blockStartSuccessor->virt > virtEnd) { + UNREACHABLE_MSG("Unsorted block in AS map: virt: 0x{:X}", blockStartSuccessor->virt); + } else if (blockStartSuccessor->virt == virtEnd) { + // There are no blocks between the start and the end that would let us skip inserting a new + // one for head + + // The previous block is may be unmapped, if so we don't need to insert any unmaps after it + if (blockStartPredecessor->Mapped()) + blocks.insert(blockStartSuccessor, Block(virt, UnmappedPa, {})); + } else if (blockStartPredecessor->Unmapped()) { + // If the previous block is unmapped + blocks.erase(blockStartSuccessor, blockEndPredecessor); + } else { + // Erase overwritten blocks, skipping the first one as we have written the unmapped start + // block there + if (auto eraseStart{std::next(blockStartSuccessor)}; eraseStart != blockEndSuccessor) + blocks.erase(eraseStart, blockEndSuccessor); + + // Add in the unmapped block header + blockStartSuccessor->virt = virt; + blockStartSuccessor->phys = UnmappedPa; + } + + if (unmapCallback) + unmapCallback(virt, size); +} + +ALLOC_MEMBER_CONST()::FlatAllocator(VaType vaStart, VaType vaLimit) + : Base(vaLimit), currentLinearAllocEnd(vaStart), vaStart(vaStart) {} + +ALLOC_MEMBER(VaType)::Allocate(VaType size) { + std::scoped_lock lock(this->blockMutex); + + VaType allocStart{UnmappedVa}; + VaType allocEnd{currentLinearAllocEnd + size}; + + // Avoid searching backwards in the address space if possible + if (allocEnd >= currentLinearAllocEnd && allocEnd <= this->vaLimit) { + auto allocEndSuccessor{ + std::lower_bound(this->blocks.begin(), this->blocks.end(), allocEnd)}; + if (allocEndSuccessor == this->blocks.begin()) + UNREACHABLE_MSG("First block in AS map is invalid!"); + + auto allocEndPredecessor{std::prev(allocEndSuccessor)}; + if (allocEndPredecessor->virt <= currentLinearAllocEnd) { + allocStart = currentLinearAllocEnd; + } else { + // Skip over fixed any mappings in front of us + while (allocEndSuccessor != this->blocks.end()) { + if (allocEndSuccessor->virt - allocEndPredecessor->virt < size || + allocEndPredecessor->Mapped()) { + allocStart = allocEndPredecessor->virt; + break; + } + + allocEndPredecessor = allocEndSuccessor++; + + // Use the VA limit to calculate if we can fit in the final block since it has no + // successor + if (allocEndSuccessor == this->blocks.end()) { + allocEnd = allocEndPredecessor->virt + size; + + if (allocEnd >= allocEndPredecessor->virt && allocEnd <= this->vaLimit) + allocStart = allocEndPredecessor->virt; + } + } + } + } + + if (allocStart != UnmappedVa) { + currentLinearAllocEnd = allocStart + size; + } else { // If linear allocation overflows the AS then find a gap + if (this->blocks.size() <= 2) + UNREACHABLE_MSG("Unexpected allocator state!"); + + auto searchPredecessor{this->blocks.begin()}; + auto searchSuccessor{std::next(searchPredecessor)}; + + while (searchSuccessor != this->blocks.end() && + (searchSuccessor->virt - searchPredecessor->virt < size || + searchPredecessor->Mapped())) { + searchPredecessor = searchSuccessor++; + } + + if (searchSuccessor != this->blocks.end()) + allocStart = searchPredecessor->virt; + else + return {}; // AS is full + } + + this->MapLocked(allocStart, true, size, {}); + return allocStart; +} + +ALLOC_MEMBER(void)::AllocateFixed(VaType virt, VaType size) { + this->Map(virt, true, size); +} + +ALLOC_MEMBER(void)::Free(VaType virt, VaType size) { + this->Unmap(virt, size); +} +} // namespace Common diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp index 5c70c9a57..344ddfc90 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp @@ -6,6 +6,7 @@ #include #include +#include "common/alignment.h" #include "common/assert.h" #include "common/logging/log.h" #include "core/core.h" @@ -21,8 +22,8 @@ namespace Service::Nvidia::Devices { nvhost_as_gpu::nvhost_as_gpu(Core::System& system_, Module& module_, NvCore::Container& core) - : nvdevice{system_}, module{module_}, container{core}, nvmap{core.GetNvMapFile()}, - gmmu{std::make_shared(system)} {} + : nvdevice{system_}, module{module_}, container{core}, nvmap{core.GetNvMapFile()}, vm{}, + gmmu{} {} nvhost_as_gpu::~nvhost_as_gpu() = default; NvResult nvhost_as_gpu::Ioctl1(DeviceFD fd, Ioctl command, const std::vector& input, @@ -89,12 +90,49 @@ NvResult nvhost_as_gpu::AllocAsEx(const std::vector& input, std::vector& IoctlAllocAsEx params{}; std::memcpy(¶ms, input.data(), input.size()); - LOG_WARNING(Service_NVDRV, "(STUBBED) called, big_page_size=0x{:X}", params.big_page_size); - if (params.big_page_size == 0) { - params.big_page_size = DEFAULT_BIG_PAGE_SIZE; + LOG_DEBUG(Service_NVDRV, "called, big_page_size=0x{:X}", params.big_page_size); + + std::scoped_lock lock(mutex); + + if (vm.initialised) { + UNREACHABLE_MSG("Cannot initialise an address space twice!"); + return NvResult::InvalidState; } - big_page_size = params.big_page_size; + if (params.big_page_size) { + if (!std::has_single_bit(params.big_page_size)) { + LOG_ERROR(Service_NVDRV, "Non power-of-2 big page size: 0x{:X}!", params.big_page_size); + return NvResult::BadValue; + } + + if (!(params.big_page_size & VM::SUPPORTED_BIG_PAGE_SIZES)) { + LOG_ERROR(Service_NVDRV, "Unsupported big page size: 0x{:X}!", params.big_page_size); + return NvResult::BadValue; + } + + vm.big_page_size = params.big_page_size; + vm.big_page_size_bits = static_cast(std::countr_zero(params.big_page_size)); + + vm.va_range_start = params.big_page_size << VM::VA_START_SHIFT; + } + + // If this is unspecified then default values should be used + if (params.va_range_start) { + vm.va_range_start = params.va_range_start; + vm.va_range_split = params.va_range_split; + vm.va_range_end = params.va_range_end; + } + + const u64 start_pages{vm.va_range_start >> VM::PAGE_SIZE_BITS}; + const u64 end_pages{vm.va_range_split >> VM::PAGE_SIZE_BITS}; + vm.small_page_allocator = std::make_shared(start_pages, end_pages); + + const u64 start_big_pages{vm.va_range_split >> vm.big_page_size_bits}; + const u64 end_big_pages{(vm.va_range_end - vm.va_range_split) >> vm.big_page_size_bits}; + vm.big_page_allocator = std::make_unique(start_big_pages, end_big_pages); + + gmmu = std::make_shared(system, 40, VM::PAGE_SIZE_BITS); + vm.initialised = true; return NvResult::Success; } @@ -106,21 +144,73 @@ NvResult nvhost_as_gpu::AllocateSpace(const std::vector& input, std::vector< LOG_DEBUG(Service_NVDRV, "called, pages={:X}, page_size={:X}, flags={:X}", params.pages, params.page_size, params.flags); - const auto size{static_cast(params.pages) * static_cast(params.page_size)}; - if ((params.flags & AddressSpaceFlags::FixedOffset) != AddressSpaceFlags::None) { - params.offset = *(gmmu->AllocateFixed(params.offset, size)); + std::scoped_lock lock(mutex); + + if (!vm.initialised) { + return NvResult::BadValue; + } + + if (params.page_size != VM::YUZU_PAGESIZE && params.page_size != vm.big_page_size) { + return NvResult::BadValue; + } + + if (params.page_size != vm.big_page_size && + ((params.flags & MappingFlags::Sparse) != MappingFlags::None)) { + UNIMPLEMENTED_MSG("Sparse small pages are not implemented!"); + return NvResult::NotImplemented; + } + + const u32 page_size_bits{params.page_size == VM::YUZU_PAGESIZE ? VM::PAGE_SIZE_BITS + : vm.big_page_size_bits}; + + auto& allocator{params.page_size == VM::YUZU_PAGESIZE ? *vm.small_page_allocator + : *vm.big_page_allocator}; + + if ((params.flags & MappingFlags::Fixed) != MappingFlags::None) { + allocator.AllocateFixed(static_cast(params.offset >> page_size_bits), params.pages); } else { - params.offset = gmmu->Allocate(size, params.align); + params.offset = static_cast(allocator.Allocate(params.pages)) << page_size_bits; + if (!params.offset) { + UNREACHABLE_MSG("Failed to allocate free space in the GPU AS!"); + return NvResult::InsufficientMemory; + } } - auto result = NvResult::Success; - if (!params.offset) { - LOG_CRITICAL(Service_NVDRV, "allocation failed for size {}", size); - result = NvResult::InsufficientMemory; + u64 size{static_cast(params.pages) * params.page_size}; + + if ((params.flags & MappingFlags::Sparse) != MappingFlags::None) { + gmmu->MapSparse(params.offset, size); } + allocation_map[params.offset] = { + .size = size, + .page_size = params.page_size, + .sparse = (params.flags & MappingFlags::Sparse) != MappingFlags::None, + }; + std::memcpy(output.data(), ¶ms, output.size()); - return result; + return NvResult::Success; +} + +void nvhost_as_gpu::FreeMappingLocked(u64 offset) { + auto mapping{mapping_map.at(offset)}; + + if (!mapping->fixed) { + auto& allocator{mapping->big_page ? *vm.big_page_allocator : *vm.small_page_allocator}; + u32 page_size_bits{mapping->big_page ? vm.big_page_size_bits : VM::PAGE_SIZE_BITS}; + + allocator.Free(static_cast(mapping->offset >> page_size_bits), + static_cast(mapping->size >> page_size_bits)); + } + + // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state + // Only FreeSpace can unmap them fully + if (mapping->sparse_alloc) + gmmu->MapSparse(offset, mapping->size); + else + gmmu->Unmap(offset, mapping->size); + + mapping_map.erase(offset); } NvResult nvhost_as_gpu::FreeSpace(const std::vector& input, std::vector& output) { @@ -130,7 +220,40 @@ NvResult nvhost_as_gpu::FreeSpace(const std::vector& input, std::vector& LOG_DEBUG(Service_NVDRV, "called, offset={:X}, pages={:X}, page_size={:X}", params.offset, params.pages, params.page_size); - gmmu->Unmap(params.offset, static_cast(params.pages) * params.page_size); + std::scoped_lock lock(mutex); + + if (!vm.initialised) { + return NvResult::BadValue; + } + + try { + auto allocation{allocation_map[params.offset]}; + + if (allocation.page_size != params.page_size || + allocation.size != (static_cast(params.pages) * params.page_size)) { + return NvResult::BadValue; + } + + for (const auto& mapping : allocation.mappings) { + FreeMappingLocked(mapping->offset); + } + + // Unset sparse flag if required + if (allocation.sparse) { + gmmu->Unmap(params.offset, allocation.size); + } + + auto& allocator{params.page_size == VM::YUZU_PAGESIZE ? *vm.small_page_allocator + : *vm.big_page_allocator}; + u32 page_size_bits{params.page_size == VM::YUZU_PAGESIZE ? VM::PAGE_SIZE_BITS + : vm.big_page_size_bits}; + + allocator.Free(static_cast(params.offset >> page_size_bits), + static_cast(allocation.size >> page_size_bits)); + allocation_map.erase(params.offset); + } catch ([[maybe_unused]] const std::out_of_range& e) { + return NvResult::BadValue; + } std::memcpy(output.data(), ¶ms, output.size()); return NvResult::Success; @@ -141,43 +264,51 @@ NvResult nvhost_as_gpu::Remap(const std::vector& input, std::vector& out LOG_DEBUG(Service_NVDRV, "called, num_entries=0x{:X}", num_entries); - auto result = NvResult::Success; std::vector entries(num_entries); std::memcpy(entries.data(), input.data(), input.size()); + std::scoped_lock lock(mutex); + + if (!vm.initialised) { + return NvResult::BadValue; + } + for (const auto& entry : entries) { - LOG_DEBUG(Service_NVDRV, "remap entry, offset=0x{:X} handle=0x{:X} pages=0x{:X}", - entry.offset, entry.nvmap_handle, entry.pages); - - if (entry.nvmap_handle == 0) { - // If nvmap handle is null, we should unmap instead. - const auto offset{static_cast(entry.offset) << 0x10}; - const auto size{static_cast(entry.pages) << 0x10}; - gmmu->Unmap(offset, size); - continue; + GPUVAddr virtual_address{static_cast(entry.as_offset_big_pages) + << vm.big_page_size_bits}; + u64 size{static_cast(entry.big_pages) << vm.big_page_size_bits}; + + auto alloc{allocation_map.upper_bound(virtual_address)}; + + if (alloc-- == allocation_map.begin() || + (virtual_address - alloc->first) + size > alloc->second.size) { + LOG_WARNING(Service_NVDRV, "Cannot remap into an unallocated region!"); + return NvResult::BadValue; } - const auto object{nvmap.GetHandle(entry.nvmap_handle)}; - if (!object) { - LOG_CRITICAL(Service_NVDRV, "invalid nvmap_handle={:X}", entry.nvmap_handle); - result = NvResult::InvalidState; - break; + if (!alloc->second.sparse) { + LOG_WARNING(Service_NVDRV, "Cannot remap a non-sparse mapping!"); + return NvResult::BadValue; } - const auto offset{static_cast(entry.offset) << 0x10}; - const auto size{static_cast(entry.pages) << 0x10}; - const auto map_offset{static_cast(entry.map_offset) << 0x10}; - const auto addr{gmmu->Map(object->address + map_offset, offset, size)}; + if (!entry.handle) { + gmmu->MapSparse(virtual_address, size); + } else { + auto handle{nvmap.GetHandle(entry.handle)}; + if (!handle) { + return NvResult::BadValue; + } - if (!addr) { - LOG_CRITICAL(Service_NVDRV, "map returned an invalid address!"); - result = NvResult::InvalidState; - break; + VAddr cpu_address{static_cast( + handle->address + + (static_cast(entry.handle_offset_big_pages) << vm.big_page_size_bits))}; + + gmmu->Map(virtual_address, cpu_address, size); } } std::memcpy(output.data(), entries.data(), output.size()); - return result; + return NvResult::Success; } NvResult nvhost_as_gpu::MapBufferEx(const std::vector& input, std::vector& output) { @@ -187,75 +318,96 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector& input, std::vector(buffer_map->CpuAddr() + params.buffer_offset)}; - const auto gpu_addr{static_cast(params.offset + params.buffer_offset)}; + std::scoped_lock lock(mutex); + + if (!vm.initialised) { + return NvResult::BadValue; + } - if (!gmmu->Map(cpu_addr, gpu_addr, params.mapping_size)) { - LOG_CRITICAL(Service_NVDRV, - "remap failed, flags={:X}, nvmap_handle={:X}, buffer_offset={}, " - "mapping_size = {}, offset={}", - params.flags, params.nvmap_handle, params.buffer_offset, - params.mapping_size, params.offset); + // Remaps a subregion of an existing mapping to a different PA + if ((params.flags & MappingFlags::Remap) != MappingFlags::None) { + try { + auto mapping{mapping_map.at(params.offset)}; - std::memcpy(output.data(), ¶ms, output.size()); - return NvResult::InvalidState; + if (mapping->size < params.mapping_size) { + LOG_WARNING(Service_NVDRV, + "Cannot remap a partially mapped GPU address space region: 0x{:X}", + params.offset); + return NvResult::BadValue; } - std::memcpy(output.data(), ¶ms, output.size()); - return NvResult::Success; - } else { - LOG_CRITICAL(Service_NVDRV, "address not mapped offset={}", params.offset); + u64 gpu_address{static_cast(params.offset + params.buffer_offset)}; + VAddr cpu_address{mapping->ptr + params.buffer_offset}; + + gmmu->Map(gpu_address, cpu_address, params.mapping_size); - std::memcpy(output.data(), ¶ms, output.size()); - return NvResult::InvalidState; + return NvResult::Success; + } catch ([[maybe_unused]] const std::out_of_range& e) { + LOG_WARNING(Service_NVDRV, "Cannot remap an unmapped GPU address space region: 0x{:X}", + params.offset); + return NvResult::BadValue; } } - const auto object{nvmap.GetHandle(params.nvmap_handle)}; - if (!object) { - LOG_CRITICAL(Service_NVDRV, "invalid nvmap_handle={:X}", params.nvmap_handle); - std::memcpy(output.data(), ¶ms, output.size()); - return NvResult::InvalidState; + auto handle{nvmap.GetHandle(params.handle)}; + if (!handle) { + return NvResult::BadValue; } - // The real nvservices doesn't make a distinction between handles and ids, and - // object can only have one handle and it will be the same as its id. Assert that this is the - // case to prevent unexpected behavior. - ASSERT(object->id == params.nvmap_handle); + VAddr cpu_address{static_cast(handle->address + params.buffer_offset)}; + u64 size{params.mapping_size ? params.mapping_size : handle->orig_size}; - u64 page_size{params.page_size}; - if (!page_size) { - page_size = object->align; - } + if ((params.flags & MappingFlags::Fixed) != MappingFlags::None) { + auto alloc{allocation_map.upper_bound(params.offset)}; - const auto physical_address{object->address + params.buffer_offset}; - u64 size{params.mapping_size}; - if (!size) { - size = object->size; - } + if (alloc-- == allocation_map.begin() || + (params.offset - alloc->first) + size > alloc->second.size) { + UNREACHABLE_MSG("Cannot perform a fixed mapping into an unallocated region!"); + return NvResult::BadValue; + } - const bool is_alloc{(params.flags & AddressSpaceFlags::FixedOffset) == AddressSpaceFlags::None}; - if (is_alloc) { - params.offset = gmmu->MapAllocate(physical_address, size, page_size); - } else { - params.offset = gmmu->Map(physical_address, params.offset, size); - } + gmmu->Map(params.offset, cpu_address, size); - auto result = NvResult::Success; - if (!params.offset) { - LOG_CRITICAL(Service_NVDRV, "failed to map size={}", size); - result = NvResult::InvalidState; + auto mapping{std::make_shared(cpu_address, params.offset, size, true, false, + alloc->second.sparse)}; + alloc->second.mappings.push_back(mapping); + mapping_map[params.offset] = mapping; } else { - AddBufferMap(params.offset, size, physical_address, is_alloc); + bool big_page{[&]() { + if (Common::IsAligned(handle->align, vm.big_page_size)) + return true; + else if (Common::IsAligned(handle->align, VM::YUZU_PAGESIZE)) + return false; + else { + UNREACHABLE(); + return false; + } + }()}; + + auto& allocator{big_page ? *vm.big_page_allocator : *vm.small_page_allocator}; + u32 page_size{big_page ? vm.big_page_size : VM::YUZU_PAGESIZE}; + u32 page_size_bits{big_page ? vm.big_page_size_bits : VM::PAGE_SIZE_BITS}; + + params.offset = static_cast(allocator.Allocate( + static_cast(Common::AlignUp(size, page_size) >> page_size_bits))) + << page_size_bits; + if (!params.offset) { + UNREACHABLE_MSG("Failed to allocate free space in the GPU AS!"); + return NvResult::InsufficientMemory; + } + + gmmu->Map(params.offset, cpu_address, size); + + auto mapping{ + std::make_shared(cpu_address, params.offset, size, false, big_page, false)}; + mapping_map[params.offset] = mapping; } std::memcpy(output.data(), ¶ms, output.size()); - return result; + return NvResult::Success; } NvResult nvhost_as_gpu::UnmapBuffer(const std::vector& input, std::vector& output) { @@ -264,13 +416,36 @@ NvResult nvhost_as_gpu::UnmapBuffer(const std::vector& input, std::vectorUnmap(params.offset, *size); - } else { - LOG_ERROR(Service_NVDRV, "invalid offset=0x{:X}", params.offset); + std::scoped_lock lock(mutex); + + if (!vm.initialised) { + return NvResult::BadValue; + } + + try { + auto mapping{mapping_map.at(params.offset)}; + + if (!mapping->fixed) { + auto& allocator{mapping->big_page ? *vm.big_page_allocator : *vm.small_page_allocator}; + u32 page_size_bits{mapping->big_page ? vm.big_page_size_bits : VM::PAGE_SIZE_BITS}; + + allocator.Free(static_cast(mapping->offset >> page_size_bits), + static_cast(mapping->size >> page_size_bits)); + } + + // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state + // Only FreeSpace can unmap them fully + if (mapping->sparse_alloc) { + gmmu->MapSparse(params.offset, mapping->size); + } else { + gmmu->Unmap(params.offset, mapping->size); + } + + mapping_map.erase(params.offset); + } catch ([[maybe_unused]] const std::out_of_range& e) { + LOG_WARNING(Service_NVDRV, "Couldn't find region to unmap at 0x{:X}", params.offset); } - std::memcpy(output.data(), ¶ms, output.size()); return NvResult::Success; } @@ -284,28 +459,37 @@ NvResult nvhost_as_gpu::BindChannel(const std::vector& input, std::vector{ + VaRegion{ + .offset = vm.small_page_allocator->vaStart << VM::PAGE_SIZE_BITS, + .page_size = VM::YUZU_PAGESIZE, + .pages = vm.small_page_allocator->vaLimit - vm.small_page_allocator->vaStart, + }, + VaRegion{ + .offset = vm.big_page_allocator->vaStart << vm.big_page_size_bits, + .page_size = vm.big_page_size, + .pages = vm.big_page_allocator->vaLimit - vm.big_page_allocator->vaStart, + }, + }; +} + NvResult nvhost_as_gpu::GetVARegions(const std::vector& input, std::vector& output) { IoctlGetVaRegions params{}; std::memcpy(¶ms, input.data(), input.size()); - LOG_WARNING(Service_NVDRV, "(STUBBED) called, buf_addr={:X}, buf_size={:X}", params.buf_addr, - params.buf_size); - - params.buf_size = 0x30; + LOG_DEBUG(Service_NVDRV, "called, buf_addr={:X}, buf_size={:X}", params.buf_addr, + params.buf_size); - params.small = IoctlVaRegion{ - .offset = 0x04000000, - .page_size = DEFAULT_SMALL_PAGE_SIZE, - .pages = 0x3fbfff, - }; + std::scoped_lock lock(mutex); - params.big = IoctlVaRegion{ - .offset = 0x04000000, - .page_size = big_page_size, - .pages = 0x1bffff, - }; + if (!vm.initialised) { + return NvResult::BadValue; + } - // TODO(ogniK): This probably can stay stubbed but should add support way way later + GetVARegionsImpl(params); std::memcpy(output.data(), ¶ms, output.size()); return NvResult::Success; @@ -316,64 +500,24 @@ NvResult nvhost_as_gpu::GetVARegions(const std::vector& input, std::vector nvhost_as_gpu::FindBufferMap(GPUVAddr gpu_addr) const { - const auto end{buffer_mappings.upper_bound(gpu_addr)}; - for (auto iter{buffer_mappings.begin()}; iter != end; ++iter) { - if (gpu_addr >= iter->second.StartAddr() && gpu_addr < iter->second.EndAddr()) { - return iter->second; - } - } - - return std::nullopt; -} - -void nvhost_as_gpu::AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr, - bool is_allocated) { - buffer_mappings[gpu_addr] = {gpu_addr, size, cpu_addr, is_allocated}; -} - -std::optional nvhost_as_gpu::RemoveBufferMap(GPUVAddr gpu_addr) { - if (const auto iter{buffer_mappings.find(gpu_addr)}; iter != buffer_mappings.end()) { - std::size_t size{}; - - if (iter->second.IsAllocated()) { - size = iter->second.Size(); - } - - buffer_mappings.erase(iter); - - return size; - } - - return std::nullopt; -} - Kernel::KEvent* nvhost_as_gpu::QueryEvent(u32 event_id) { LOG_CRITICAL(Service_NVDRV, "Unknown AS GPU Event {}", event_id); return nullptr; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h index f5fb33ba7..1d27739e2 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h @@ -5,14 +5,19 @@ #pragma once +#include +#include #include #include +#include #include #include +#include "common/address_space.h" #include "common/common_funcs.h" #include "common/common_types.h" #include "common/swap.h" +#include "core/hle/service/nvdrv/core/nvmap.h" #include "core/hle/service/nvdrv/devices/nvdevice.h" namespace Tegra { @@ -30,17 +35,13 @@ class NvMap; namespace Service::Nvidia::Devices { -constexpr u32 DEFAULT_BIG_PAGE_SIZE = 1 << 16; -constexpr u32 DEFAULT_SMALL_PAGE_SIZE = 1 << 12; - -class nvmap; - -enum class AddressSpaceFlags : u32 { - None = 0x0, - FixedOffset = 0x1, - Remap = 0x100, +enum class MappingFlags : u32 { + None = 0, + Fixed = 1 << 0, + Sparse = 1 << 1, + Remap = 1 << 8, }; -DECLARE_ENUM_FLAG_OPERATORS(AddressSpaceFlags); +DECLARE_ENUM_FLAG_OPERATORS(MappingFlags); class nvhost_as_gpu final : public nvdevice { public: @@ -59,46 +60,15 @@ public: Kernel::KEvent* QueryEvent(u32 event_id) override; -private: - class BufferMap final { - public: - constexpr BufferMap() = default; - - constexpr BufferMap(GPUVAddr start_addr_, std::size_t size_) - : start_addr{start_addr_}, end_addr{start_addr_ + size_} {} - - constexpr BufferMap(GPUVAddr start_addr_, std::size_t size_, VAddr cpu_addr_, - bool is_allocated_) - : start_addr{start_addr_}, end_addr{start_addr_ + size_}, cpu_addr{cpu_addr_}, - is_allocated{is_allocated_} {} - - constexpr VAddr StartAddr() const { - return start_addr; - } - - constexpr VAddr EndAddr() const { - return end_addr; - } - - constexpr std::size_t Size() const { - return end_addr - start_addr; - } - - constexpr VAddr CpuAddr() const { - return cpu_addr; - } - - constexpr bool IsAllocated() const { - return is_allocated; - } - - private: - GPUVAddr start_addr{}; - GPUVAddr end_addr{}; - VAddr cpu_addr{}; - bool is_allocated{}; + struct VaRegion { + u64 offset; + u32 page_size; + u32 _pad0_; + u64 pages; }; + static_assert(sizeof(VaRegion) == 0x18); +private: struct IoctlAllocAsEx { u32_le flags{}; // usually passes 1 s32_le as_fd{}; // ignored; passes 0 @@ -113,7 +83,7 @@ private: struct IoctlAllocSpace { u32_le pages{}; u32_le page_size{}; - AddressSpaceFlags flags{}; + MappingFlags flags{}; INSERT_PADDING_WORDS(1); union { u64_le offset; @@ -130,19 +100,19 @@ private: static_assert(sizeof(IoctlFreeSpace) == 16, "IoctlFreeSpace is incorrect size"); struct IoctlRemapEntry { - u16_le flags{}; - u16_le kind{}; - u32_le nvmap_handle{}; - u32_le map_offset{}; - u32_le offset{}; - u32_le pages{}; + u16 flags; + u16 kind; + NvCore::NvMap::Handle::Id handle; + u32 handle_offset_big_pages; + u32 as_offset_big_pages; + u32 big_pages; }; static_assert(sizeof(IoctlRemapEntry) == 20, "IoctlRemapEntry is incorrect size"); struct IoctlMapBufferEx { - AddressSpaceFlags flags{}; // bit0: fixed_offset, bit2: cacheable - u32_le kind{}; // -1 is default - u32_le nvmap_handle{}; + MappingFlags flags{}; // bit0: fixed_offset, bit2: cacheable + u32_le kind{}; // -1 is default + NvCore::NvMap::Handle::Id handle; u32_le page_size{}; // 0 means don't care s64_le buffer_offset{}; u64_le mapping_size{}; @@ -160,27 +130,15 @@ private: }; static_assert(sizeof(IoctlBindChannel) == 4, "IoctlBindChannel is incorrect size"); - struct IoctlVaRegion { - u64_le offset{}; - u32_le page_size{}; - INSERT_PADDING_WORDS(1); - u64_le pages{}; - }; - static_assert(sizeof(IoctlVaRegion) == 24, "IoctlVaRegion is incorrect size"); - struct IoctlGetVaRegions { u64_le buf_addr{}; // (contained output user ptr on linux, ignored) u32_le buf_size{}; // forced to 2*sizeof(struct va_region) u32_le reserved{}; - IoctlVaRegion small{}; - IoctlVaRegion big{}; + std::array regions{}; }; - static_assert(sizeof(IoctlGetVaRegions) == 16 + sizeof(IoctlVaRegion) * 2, + static_assert(sizeof(IoctlGetVaRegions) == 16 + sizeof(VaRegion) * 2, "IoctlGetVaRegions is incorrect size"); - s32 channel{}; - u32 big_page_size{DEFAULT_BIG_PAGE_SIZE}; - NvResult AllocAsEx(const std::vector& input, std::vector& output); NvResult AllocateSpace(const std::vector& input, std::vector& output); NvResult Remap(const std::vector& input, std::vector& output); @@ -189,23 +147,74 @@ private: NvResult FreeSpace(const std::vector& input, std::vector& output); NvResult BindChannel(const std::vector& input, std::vector& output); + void GetVARegionsImpl(IoctlGetVaRegions& params); NvResult GetVARegions(const std::vector& input, std::vector& output); NvResult GetVARegions(const std::vector& input, std::vector& output, std::vector& inline_output); - std::optional FindBufferMap(GPUVAddr gpu_addr) const; - void AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr, bool is_allocated); - std::optional RemoveBufferMap(GPUVAddr gpu_addr); + void FreeMappingLocked(u64 offset); Module& module; NvCore::Container& container; NvCore::NvMap& nvmap; + struct Mapping { + VAddr ptr; + u64 offset; + u64 size; + bool fixed; + bool big_page; // Only valid if fixed == false + bool sparse_alloc; + + Mapping(VAddr ptr_, u64 offset_, u64 size_, bool fixed_, bool big_page_, bool sparse_alloc_) + : ptr(ptr_), offset(offset_), size(size_), fixed(fixed_), big_page(big_page_), + sparse_alloc(sparse_alloc_) {} + }; + + struct Allocation { + u64 size; + std::list> mappings; + u32 page_size; + bool sparse; + }; + + std::map> + mapping_map; //!< This maps the base addresses of mapped buffers to their total sizes and + //!< mapping type, this is needed as what was originally a single buffer may + //!< have been split into multiple GPU side buffers with the remap flag. + std::map allocation_map; //!< Holds allocations created by AllocSpace from + //!< which fixed buffers can be mapped into + std::mutex mutex; //!< Locks all AS operations + + struct VM { + static constexpr u32 YUZU_PAGESIZE{0x1000}; + static constexpr u32 PAGE_SIZE_BITS{std::countr_zero(YUZU_PAGESIZE)}; + + static constexpr u32 SUPPORTED_BIG_PAGE_SIZES{0x30000}; + static constexpr u32 DEFAULT_BIG_PAGE_SIZE{0x20000}; + u32 big_page_size{DEFAULT_BIG_PAGE_SIZE}; + u32 big_page_size_bits{std::countr_zero(DEFAULT_BIG_PAGE_SIZE)}; + + static constexpr u32 VA_START_SHIFT{10}; + static constexpr u64 DEFAULT_VA_SPLIT{1ULL << 34}; + static constexpr u64 DEFAULT_VA_RANGE{1ULL << 37}; + u64 va_range_start{DEFAULT_BIG_PAGE_SIZE << VA_START_SHIFT}; + u64 va_range_split{DEFAULT_VA_SPLIT}; + u64 va_range_end{DEFAULT_VA_RANGE}; + + using Allocator = Common::FlatAllocator; + + std::unique_ptr big_page_allocator; + std::shared_ptr + small_page_allocator; //! Shared as this is also used by nvhost::GpuChannel + + bool initialised{}; + } vm; std::shared_ptr gmmu; - // This is expected to be ordered, therefore we must use a map, not unordered_map - std::map buffer_mappings; + // s32 channel{}; + // u32 big_page_size{VM::DEFAULT_BIG_PAGE_SIZE}; }; } // namespace Service::Nvidia::Devices diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 9e946d448..fc68bcc73 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -71,18 +71,22 @@ void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) rasterizer = rasterizer_; } -GPUVAddr MemoryManager::Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size) { +GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size) { return PageTableOp(gpu_addr, cpu_addr, size); } +GPUVAddr MemoryManager::MapSparse(GPUVAddr gpu_addr, std::size_t size) { + return PageTableOp(gpu_addr, 0, size); +} + GPUVAddr MemoryManager::MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align) { - return Map(cpu_addr, *FindFreeRange(size, align), size); + return Map(*FindFreeRange(size, align), cpu_addr, size); } GPUVAddr MemoryManager::MapAllocate32(VAddr cpu_addr, std::size_t size) { const std::optional gpu_addr = FindFreeRange(size, 1, true); ASSERT(gpu_addr); - return Map(cpu_addr, *gpu_addr, size); + return Map(*gpu_addr, cpu_addr, size); } void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 0a763fd19..b8878476a 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -88,7 +88,8 @@ public: std::vector> GetSubmappedRange(GPUVAddr gpu_addr, std::size_t size) const; - [[nodiscard]] GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size); + GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size); + GPUVAddr MapSparse(GPUVAddr gpu_addr, std::size_t size); [[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align); [[nodiscard]] GPUVAddr MapAllocate32(VAddr cpu_addr, std::size_t size); [[nodiscard]] std::optional AllocateFixed(GPUVAddr gpu_addr, std::size_t size); -- cgit v1.2.3 From e462191482c6507daed67802c6c1d2c50f10c96e Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 17 Dec 2021 16:45:06 +0100 Subject: Refactor VideoCore to use AS sepparate from Channel. --- src/common/hash.h | 7 ++ src/video_core/CMakeLists.txt | 1 + src/video_core/control/channel_state_cache.cpp | 8 +- src/video_core/control/channel_state_cache.h | 28 ++++- src/video_core/control/channel_state_cache.inc | 23 +++- src/video_core/memory_manager.cpp | 5 +- src/video_core/memory_manager.h | 9 ++ src/video_core/texture_cache/texture_cache.cpp | 16 +++ src/video_core/texture_cache/texture_cache.h | 130 ++++++++-------------- src/video_core/texture_cache/texture_cache_base.h | 96 +++++++--------- 10 files changed, 171 insertions(+), 152 deletions(-) create mode 100644 src/video_core/texture_cache/texture_cache.cpp (limited to 'src/common') diff --git a/src/common/hash.h b/src/common/hash.h index b6f3e6d6f..e8fe78b07 100644 --- a/src/common/hash.h +++ b/src/common/hash.h @@ -18,4 +18,11 @@ struct PairHash { } }; +template +struct IdentityHash { + [[nodiscard]] size_t operator()(T value) const noexcept { + return static_cast(value); + } +}; + } // namespace Common diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index e216c51a2..35faa70a0 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -203,6 +203,7 @@ add_library(video_core STATIC texture_cache/render_targets.h texture_cache/samples_helper.h texture_cache/slot_vector.h + texture_cache/texture_cache.cpp texture_cache/texture_cache.h texture_cache/texture_cache_base.h texture_cache/types.h diff --git a/src/video_core/control/channel_state_cache.cpp b/src/video_core/control/channel_state_cache.cpp index f72a97b2f..ec7ba907c 100644 --- a/src/video_core/control/channel_state_cache.cpp +++ b/src/video_core/control/channel_state_cache.cpp @@ -1,5 +1,11 @@ #include "video_core/control/channel_state_cache.inc" namespace VideoCommon { + +ChannelInfo::ChannelInfo(Tegra::Control::ChannelState& channel_state) + : maxwell3d{*channel_state.maxwell_3d}, kepler_compute{*channel_state.kepler_compute}, + gpu_memory{*channel_state.memory_manager} {} + template class VideoCommon::ChannelSetupCaches; -} + +} // namespace VideoCommon diff --git a/src/video_core/control/channel_state_cache.h b/src/video_core/control/channel_state_cache.h index c8298c003..c51040c83 100644 --- a/src/video_core/control/channel_state_cache.h +++ b/src/video_core/control/channel_state_cache.h @@ -2,6 +2,7 @@ #include #include +#include #include #include "common/common_types.h" @@ -41,9 +42,10 @@ template class ChannelSetupCaches { public: /// Operations for seting the channel of execution. + virtual ~ChannelSetupCaches(); /// Create channel state. - void CreateChannel(Tegra::Control::ChannelState& channel); + virtual void CreateChannel(Tegra::Control::ChannelState& channel); /// Bind a channel for execution. void BindToChannel(s32 id); @@ -51,18 +53,34 @@ public: /// Erase channel's state. void EraseChannel(s32 id); + Tegra::MemoryManager* GetFromID(size_t id) const { + std::unique_lock lk(config_mutex); + const auto ref = address_spaces.find(id); + return ref->second.gpu_memory; + } + protected: static constexpr size_t UNSET_CHANNEL{std::numeric_limits::max()}; - std::deque

channel_storage; - std::deque free_channel_ids; - std::unordered_map channel_map; - P* channel_state; size_t current_channel_id{UNSET_CHANNEL}; + size_t current_address_space{}; Tegra::Engines::Maxwell3D* maxwell3d; Tegra::Engines::KeplerCompute* kepler_compute; Tegra::MemoryManager* gpu_memory; + + std::deque

channel_storage; + std::deque free_channel_ids; + std::unordered_map channel_map; + struct AddresSpaceRef { + size_t ref_count; + size_t storage_id; + Tegra::MemoryManager* gpu_memory; + }; + std::unordered_map address_spaces; + mutable std::mutex config_mutex; + + virtual void OnGPUASRegister([[maybe_unused]] size_t map_id) {} }; } // namespace VideoCommon diff --git a/src/video_core/control/channel_state_cache.inc b/src/video_core/control/channel_state_cache.inc index 3eb73af9f..185eabc35 100644 --- a/src/video_core/control/channel_state_cache.inc +++ b/src/video_core/control/channel_state_cache.inc @@ -6,18 +6,18 @@ namespace VideoCommon { -ChannelInfo::ChannelInfo(Tegra::Control::ChannelState& channel_state) - : maxwell3d{*channel_state.maxwell_3d}, kepler_compute{*channel_state.kepler_compute}, - gpu_memory{*channel_state.memory_manager} {} +template +ChannelSetupCaches

::~ChannelSetupCaches() = default; template void ChannelSetupCaches

::CreateChannel(struct Tegra::Control::ChannelState& channel) { + std::unique_lock lk(config_mutex); ASSERT(channel_map.find(channel.bind_id) == channel_map.end() && channel.bind_id >= 0); auto new_id = [this, &channel]() { if (!free_channel_ids.empty()) { auto id = free_channel_ids.front(); free_channel_ids.pop_front(); - new (&channel_storage[id]) ChannelInfo(channel); + new (&channel_storage[id]) P(channel); return id; } channel_storage.emplace_back(channel); @@ -27,11 +27,24 @@ void ChannelSetupCaches

::CreateChannel(struct Tegra::Control::ChannelState& c if (current_channel_id != UNSET_CHANNEL) { channel_state = &channel_storage[current_channel_id]; } + auto as_it = address_spaces.find(channel.memory_manager->GetID()); + if (as_it != address_spaces.end()) { + as_it->second.ref_count++; + return; + } + AddresSpaceRef new_gpu_mem_ref{ + .ref_count = 1, + .storage_id = address_spaces.size(), + .gpu_memory = channel.memory_manager.get(), + }; + address_spaces.emplace(channel.memory_manager->GetID(), new_gpu_mem_ref); + OnGPUASRegister(channel.memory_manager->GetID()); } /// Bind a channel for execution. template void ChannelSetupCaches

::BindToChannel(s32 id) { + std::unique_lock lk(config_mutex); auto it = channel_map.find(id); ASSERT(it != channel_map.end() && id >= 0); current_channel_id = it->second; @@ -39,11 +52,13 @@ void ChannelSetupCaches

::BindToChannel(s32 id) { maxwell3d = &channel_state->maxwell3d; kepler_compute = &channel_state->kepler_compute; gpu_memory = &channel_state->gpu_memory; + current_address_space = gpu_memory->GetID(); } /// Erase channel's channel_state. template void ChannelSetupCaches

::EraseChannel(s32 id) { + std::unique_lock lk(config_mutex); const auto it = channel_map.find(id); ASSERT(it != channel_map.end() && id >= 0); const auto this_id = it->second; diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index fc68bcc73..d4c0dca78 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -16,9 +16,12 @@ namespace Tegra { +std::atomic MemoryManager::unique_identifier_generator{}; + MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 page_bits_) : system{system_}, address_space_bits{address_space_bits_}, page_bits{page_bits_}, entries{}, - page_table{address_space_bits, address_space_bits + page_bits - 38, page_bits} { + page_table{address_space_bits, address_space_bits + page_bits - 38, page_bits}, + unique_identifier{unique_identifier_generator.fetch_add(1, std::memory_order_acq_rel)} { address_space_size = 1ULL << address_space_bits; allocate_start = address_space_bits > 32 ? 1ULL << 32 : 0; page_size = 1ULL << page_bits; diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index b8878476a..56604ef3e 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -3,6 +3,7 @@ #pragma once +#include #include #include #include @@ -26,6 +27,10 @@ public: u64 page_bits_ = 16); ~MemoryManager(); + size_t GetID() const { + return unique_identifier; + } + /// Binds a renderer to the memory manager. void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); @@ -140,6 +145,10 @@ private: void SetEntry(size_t position, EntryType entry); Common::MultiLevelPageTable page_table; + + const size_t unique_identifier; + + static std::atomic unique_identifier_generator; }; } // namespace Tegra diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp new file mode 100644 index 000000000..bc905a1a4 --- /dev/null +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -0,0 +1,16 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv3 or any later version +// Refer to the license.txt file included. + +#include "video_core/control/channel_state_cache.inc" +#include "video_core/texture_cache/texture_cache_base.h" + +namespace VideoCommon { + +TextureCacheChannelInfo::TextureCacheChannelInfo(Tegra::Control::ChannelState& state) noexcept + : ChannelInfo(state), graphics_image_table{gpu_memory}, graphics_sampler_table{gpu_memory}, + compute_image_table{gpu_memory}, compute_sampler_table{gpu_memory} {} + +template class VideoCommon::ChannelSetupCaches; + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 0ec999d63..89c5faf88 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1,5 +1,7 @@ -// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later +// SPDX-FileCopyrightText: 2021 yuzu emulator team +// (https://github.com/skyline-emu/) +// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3 +// or any later version Refer to the license.txt file included. #pragma once @@ -41,10 +43,6 @@ TextureCache

::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& // Setup channels current_channel_id = UNSET_CHANNEL; - state = nullptr; - maxwell3d = nullptr; - kepler_compute = nullptr; - gpu_memory = nullptr; // Make sure the first index is reserved for the null resources // This way the null resource becomes a compile time constant @@ -156,23 +154,24 @@ void TextureCache

::MarkModification(ImageId id) noexcept { template template void TextureCache

::FillGraphicsImageViews(std::span views) { - FillImageViews(state->graphics_image_table, state->graphics_image_view_ids, - views); + FillImageViews(channel_state->graphics_image_table, + channel_state->graphics_image_view_ids, views); } template void TextureCache

::FillComputeImageViews(std::span views) { - FillImageViews(state->compute_image_table, state->compute_image_view_ids, views); + FillImageViews(channel_state->compute_image_table, channel_state->compute_image_view_ids, + views); } template typename P::Sampler* TextureCache

::GetGraphicsSampler(u32 index) { - if (index > state->graphics_sampler_table.Limit()) { + if (index > channel_state->graphics_sampler_table.Limit()) { LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); return &slot_samplers[NULL_SAMPLER_ID]; } - const auto [descriptor, is_new] = state->graphics_sampler_table.Read(index); - SamplerId& id = state->graphics_sampler_ids[index]; + const auto [descriptor, is_new] = channel_state->graphics_sampler_table.Read(index); + SamplerId& id = channel_state->graphics_sampler_ids[index]; if (is_new) { id = FindSampler(descriptor); } @@ -181,12 +180,12 @@ typename P::Sampler* TextureCache

::GetGraphicsSampler(u32 index) { template typename P::Sampler* TextureCache

::GetComputeSampler(u32 index) { - if (index > state->compute_sampler_table.Limit()) { + if (index > channel_state->compute_sampler_table.Limit()) { LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); return &slot_samplers[NULL_SAMPLER_ID]; } - const auto [descriptor, is_new] = state->compute_sampler_table.Read(index); - SamplerId& id = state->compute_sampler_ids[index]; + const auto [descriptor, is_new] = channel_state->compute_sampler_table.Read(index); + SamplerId& id = channel_state->compute_sampler_ids[index]; if (is_new) { id = FindSampler(descriptor); } @@ -199,11 +198,12 @@ void TextureCache

::SynchronizeGraphicsDescriptors() { const bool linked_tsc = maxwell3d->regs.sampler_index == SamplerIndex::ViaHeaderIndex; const u32 tic_limit = maxwell3d->regs.tic.limit; const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d->regs.tsc.limit; - if (state->graphics_sampler_table.Synchornize(maxwell3d->regs.tsc.Address(), tsc_limit)) { - state->graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); + if (channel_state->graphics_sampler_table.Synchornize(maxwell3d->regs.tsc.Address(), + tsc_limit)) { + channel_state->graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); } - if (state->graphics_image_table.Synchornize(maxwell3d->regs.tic.Address(), tic_limit)) { - state->graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); + if (channel_state->graphics_image_table.Synchornize(maxwell3d->regs.tic.Address(), tic_limit)) { + channel_state->graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); } } @@ -213,11 +213,12 @@ void TextureCache

::SynchronizeComputeDescriptors() { const u32 tic_limit = kepler_compute->regs.tic.limit; const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute->regs.tsc.limit; const GPUVAddr tsc_gpu_addr = kepler_compute->regs.tsc.Address(); - if (state->compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) { - state->compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); + if (channel_state->compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) { + channel_state->compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); } - if (state->compute_image_table.Synchornize(kepler_compute->regs.tic.Address(), tic_limit)) { - state->compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); + if (channel_state->compute_image_table.Synchornize(kepler_compute->regs.tic.Address(), + tic_limit)) { + channel_state->compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); } } @@ -738,7 +739,7 @@ ImageViewId TextureCache

::FindImageView(const TICEntry& config) { if (!IsValidEntry(*gpu_memory, config)) { return NULL_IMAGE_VIEW_ID; } - const auto [pair, is_new] = state->image_views.try_emplace(config); + const auto [pair, is_new] = channel_state->image_views.try_emplace(config); ImageViewId& image_view_id = pair->second; if (is_new) { image_view_id = CreateImageView(config); @@ -1198,7 +1199,7 @@ SamplerId TextureCache

::FindSampler(const TSCEntry& config) { if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) { return NULL_SAMPLER_ID; } - const auto [pair, is_new] = state->samplers.try_emplace(config); + const auto [pair, is_new] = channel_state->samplers.try_emplace(config); if (is_new) { pair->second = slot_samplers.insert(runtime, config); } @@ -1327,8 +1328,8 @@ void TextureCache

::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Fu static constexpr bool BOOL_BREAK = std::is_same_v; boost::container::small_vector images; ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { - const auto it = state->gpu_page_table.find(page); - if (it == state->gpu_page_table.end()) { + const auto it = channel_state->gpu_page_table->find(page); + if (it == channel_state->gpu_page_table->end()) { if constexpr (BOOL_BREAK) { return false; } else { @@ -1454,8 +1455,9 @@ void TextureCache

::RegisterImage(ImageId image_id) { } image.lru_index = lru_cache.Insert(image_id, frame_tick); - ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, - [this, image_id](u64 page) { state->gpu_page_table[page].push_back(image_id); }); + ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { + (*channel_state->gpu_page_table)[page].push_back(image_id); + }); if (False(image.flags & ImageFlagBits::Sparse)) { auto map_id = slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id); @@ -1486,9 +1488,9 @@ void TextureCache

::UnregisterImage(ImageId image_id) { image.flags &= ~ImageFlagBits::BadOverlap; lru_cache.Free(image.lru_index); const auto& clear_page_table = - [this, image_id]( - u64 page, - std::unordered_map, IdentityHash>& selected_page_table) { + [this, image_id](u64 page, + std::unordered_map, Common::IdentityHash>& + selected_page_table) { const auto page_it = selected_page_table.find(page); if (page_it == selected_page_table.end()) { ASSERT_MSG(false, "Unregistering unregistered page=0x{:x}", page << YUZU_PAGEBITS); @@ -1504,7 +1506,7 @@ void TextureCache

::UnregisterImage(ImageId image_id) { image_ids.erase(vector_it); }; ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) { - clear_page_table(page, state->gpu_page_table); + clear_page_table(page, (*channel_state->gpu_page_table)); }); if (False(image.flags & ImageFlagBits::Sparse)) { const auto map_id = image.map_view_id; @@ -1701,11 +1703,11 @@ void TextureCache

::DeleteImage(ImageId image_id, bool immediate_delete) { template void TextureCache

::RemoveImageViewReferences(std::span removed_views) { - auto it = state->image_views.begin(); - while (it != state->image_views.end()) { + auto it = channel_state->image_views.begin(); + while (it != channel_state->image_views.end()) { const auto found = std::ranges::find(removed_views, it->second); if (found != removed_views.end()) { - it = state->image_views.erase(it); + it = channel_state->image_views.erase(it); } else { ++it; } @@ -1967,61 +1969,19 @@ bool TextureCache

::IsFullClear(ImageViewId id) { scissor.max_y >= size.height; } -template -TextureCache

::ChannelInfo::ChannelInfo(Tegra::Control::ChannelState& state) noexcept - : maxwell3d{*state.maxwell_3d}, kepler_compute{*state.kepler_compute}, - gpu_memory{*state.memory_manager}, graphics_image_table{gpu_memory}, - graphics_sampler_table{gpu_memory}, compute_image_table{gpu_memory}, compute_sampler_table{ - gpu_memory} {} - template void TextureCache

::CreateChannel(struct Tegra::Control::ChannelState& channel) { - ASSERT(channel_map.find(channel.bind_id) == channel_map.end() && channel.bind_id >= 0); - auto new_id = [this, &channel]() { - if (!free_channel_ids.empty()) { - auto id = free_channel_ids.front(); - free_channel_ids.pop_front(); - new (&channel_storage[id]) ChannelInfo(channel); - return id; - } - channel_storage.emplace_back(channel); - return channel_storage.size() - 1; - }(); - channel_map.emplace(channel.bind_id, new_id); - if (current_channel_id != UNSET_CHANNEL) { - state = &channel_storage[current_channel_id]; - } + VideoCommon::ChannelSetupCaches::CreateChannel(channel); + const auto it = channel_map.find(channel.bind_id); + auto* this_state = &channel_storage[it->second]; + const auto& this_as_ref = address_spaces[channel.memory_manager->GetID()]; + this_state->gpu_page_table = &gpu_page_table_storage[this_as_ref.storage_id]; } /// Bind a channel for execution. template -void TextureCache

::BindToChannel(s32 id) { - auto it = channel_map.find(id); - ASSERT(it != channel_map.end() && id >= 0); - current_channel_id = it->second; - state = &channel_storage[current_channel_id]; - maxwell3d = &state->maxwell3d; - kepler_compute = &state->kepler_compute; - gpu_memory = &state->gpu_memory; -} - -/// Erase channel's state. -template -void TextureCache

::EraseChannel(s32 id) { - const auto it = channel_map.find(id); - ASSERT(it != channel_map.end() && id >= 0); - const auto this_id = it->second; - free_channel_ids.push_back(this_id); - channel_map.erase(it); - if (this_id == current_channel_id) { - current_channel_id = UNSET_CHANNEL; - state = nullptr; - maxwell3d = nullptr; - kepler_compute = nullptr; - gpu_memory = nullptr; - } else if (current_channel_id != UNSET_CHANNEL) { - state = &channel_storage[current_channel_id]; - } +void TextureCache

::OnGPUASRegister([[maybe_unused]] size_t map_id) { + gpu_page_table_storage.emplace_back(); } } // namespace VideoCommon diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 69efcb718..b24968b03 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -1,5 +1,7 @@ -// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later +// SPDX-FileCopyrightText: 2021 yuzu emulator team +// (https://github.com/skyline-emu/) +// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3 +// or any later version Refer to the license.txt file included. #pragma once @@ -13,9 +15,11 @@ #include #include "common/common_types.h" +#include "common/hash.h" #include "common/literals.h" #include "common/lru_cache.h" #include "video_core/compatible_formats.h" +#include "video_core/control/channel_state_cache.h" #include "video_core/delayed_destruction_ring.h" #include "video_core/engines/fermi_2d.h" #include "video_core/surface.h" @@ -50,8 +54,35 @@ struct ImageViewInOut { ImageViewId id{}; }; +using TextureCacheGPUMap = std::unordered_map, Common::IdentityHash>; + +class TextureCacheChannelInfo : public ChannelInfo { +public: + TextureCacheChannelInfo() = delete; + TextureCacheChannelInfo(Tegra::Control::ChannelState& state) noexcept; + TextureCacheChannelInfo(const TextureCacheChannelInfo& state) = delete; + TextureCacheChannelInfo& operator=(const TextureCacheChannelInfo&) = delete; + TextureCacheChannelInfo(TextureCacheChannelInfo&& other) noexcept = default; + TextureCacheChannelInfo& operator=(TextureCacheChannelInfo&& other) noexcept = default; + + DescriptorTable graphics_image_table{gpu_memory}; + DescriptorTable graphics_sampler_table{gpu_memory}; + std::vector graphics_sampler_ids; + std::vector graphics_image_view_ids; + + DescriptorTable compute_image_table{gpu_memory}; + DescriptorTable compute_sampler_table{gpu_memory}; + std::vector compute_sampler_ids; + std::vector compute_image_view_ids; + + std::unordered_map image_views; + std::unordered_map samplers; + + TextureCacheGPUMap* gpu_page_table; +}; + template -class TextureCache { +class TextureCache : public VideoCommon::ChannelSetupCaches { /// Address shift for caching images into a hash table static constexpr u64 YUZU_PAGEBITS = 20; @@ -85,13 +116,6 @@ class TextureCache { PixelFormat src_format; }; - template - struct IdentityHash { - [[nodiscard]] size_t operator()(T value) const noexcept { - return static_cast(value); - } - }; - public: explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&); @@ -179,13 +203,7 @@ public: [[nodiscard]] bool IsRescaling(const ImageViewBase& image_view) const noexcept; /// Create channel state. - void CreateChannel(struct Tegra::Control::ChannelState& channel); - - /// Bind a channel for execution. - void BindToChannel(s32 id); - - /// Erase channel's state. - void EraseChannel(s32 id); + void CreateChannel(Tegra::Control::ChannelState& channel) final override; std::mutex mutex; @@ -221,6 +239,8 @@ private: } } + void OnGPUASRegister(size_t map_id) final override; + /// Runs the Garbage Collector. void RunGarbageCollector(); @@ -355,51 +375,15 @@ private: Runtime& runtime; - struct ChannelInfo { - ChannelInfo() = delete; - ChannelInfo(struct Tegra::Control::ChannelState& state) noexcept; - ChannelInfo(const ChannelInfo& state) = delete; - ChannelInfo& operator=(const ChannelInfo&) = delete; - ChannelInfo(ChannelInfo&& other) noexcept = default; - ChannelInfo& operator=(ChannelInfo&& other) noexcept = default; - - Tegra::Engines::Maxwell3D& maxwell3d; - Tegra::Engines::KeplerCompute& kepler_compute; - Tegra::MemoryManager& gpu_memory; - - DescriptorTable graphics_image_table{gpu_memory}; - DescriptorTable graphics_sampler_table{gpu_memory}; - std::vector graphics_sampler_ids; - std::vector graphics_image_view_ids; - - DescriptorTable compute_image_table{gpu_memory}; - DescriptorTable compute_sampler_table{gpu_memory}; - std::vector compute_sampler_ids; - std::vector compute_image_view_ids; - - std::unordered_map image_views; - std::unordered_map samplers; - - std::unordered_map, IdentityHash> gpu_page_table; - }; - - std::deque channel_storage; - std::deque free_channel_ids; - std::unordered_map channel_map; - - ChannelInfo* state; - size_t current_channel_id{UNSET_CHANNEL}; VideoCore::RasterizerInterface& rasterizer; - Tegra::Engines::Maxwell3D* maxwell3d; - Tegra::Engines::KeplerCompute* kepler_compute; - Tegra::MemoryManager* gpu_memory; + std::deque gpu_page_table_storage; RenderTargets render_targets; std::unordered_map framebuffers; - std::unordered_map, IdentityHash> page_table; - std::unordered_map, IdentityHash> sparse_page_table; + std::unordered_map, Common::IdentityHash> page_table; + std::unordered_map, Common::IdentityHash> sparse_page_table; std::unordered_map> sparse_views; VAddr virtual_invalid_space{}; -- cgit v1.2.3 From 4d60410dd979fb688de7735d2b4b25a557bdeac7 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 5 Feb 2022 18:15:26 +0100 Subject: MemoryManager: initial multi paging system implementation. --- src/common/multi_level_page_table.inc | 3 + .../hle/service/nvdrv/devices/nvhost_as_gpu.cpp | 45 ++- src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h | 1 + src/core/hle/service/nvdrv/devices/nvmap.cpp | 10 + src/video_core/memory_manager.cpp | 436 +++++++++++++-------- src/video_core/memory_manager.h | 57 ++- 6 files changed, 343 insertions(+), 209 deletions(-) (limited to 'src/common') diff --git a/src/common/multi_level_page_table.inc b/src/common/multi_level_page_table.inc index 7fbcb908a..9a68cad93 100644 --- a/src/common/multi_level_page_table.inc +++ b/src/common/multi_level_page_table.inc @@ -19,6 +19,9 @@ MultiLevelPageTable::MultiLevelPageTable(std::size_t address_space_bit std::size_t page_bits_) : address_space_bits{address_space_bits_}, first_level_bits{first_level_bits_}, page_bits{page_bits_} { + if (page_bits == 0) { + return; + } first_level_shift = address_space_bits - first_level_bits; first_level_chunk_size = (1ULL << (first_level_shift - page_bits)) * sizeof(BaseAddr); alloc_size = (1ULL << (address_space_bits - page_bits)) * sizeof(BaseAddr); diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp index db2a6c3b2..d95a88393 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp @@ -133,7 +133,8 @@ NvResult nvhost_as_gpu::AllocAsEx(const std::vector& input, std::vector& const u64 end_big_pages{(vm.va_range_end - vm.va_range_split) >> vm.big_page_size_bits}; vm.big_page_allocator = std::make_unique(start_big_pages, end_big_pages); - gmmu = std::make_shared(system, 40, VM::PAGE_SIZE_BITS); + gmmu = std::make_shared(system, 40, vm.big_page_size_bits, + VM::PAGE_SIZE_BITS); system.GPU().InitAddressSpace(*gmmu); vm.initialised = true; @@ -189,6 +190,7 @@ NvResult nvhost_as_gpu::AllocateSpace(const std::vector& input, std::vector< .size = size, .page_size = params.page_size, .sparse = (params.flags & MappingFlags::Sparse) != MappingFlags::None, + .big_pages = params.page_size != VM::YUZU_PAGESIZE, }; std::memcpy(output.data(), ¶ms, output.size()); @@ -209,7 +211,7 @@ void nvhost_as_gpu::FreeMappingLocked(u64 offset) { // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state // Only FreeSpace can unmap them fully if (mapping->sparse_alloc) - gmmu->MapSparse(offset, mapping->size); + gmmu->MapSparse(offset, mapping->size, mapping->big_page); else gmmu->Unmap(offset, mapping->size); @@ -294,8 +296,9 @@ NvResult nvhost_as_gpu::Remap(const std::vector& input, std::vector& out return NvResult::BadValue; } + const bool use_big_pages = alloc->second.big_pages; if (!entry.handle) { - gmmu->MapSparse(virtual_address, size); + gmmu->MapSparse(virtual_address, size, use_big_pages); } else { auto handle{nvmap.GetHandle(entry.handle)}; if (!handle) { @@ -306,7 +309,7 @@ NvResult nvhost_as_gpu::Remap(const std::vector& input, std::vector& out handle->address + (static_cast(entry.handle_offset_big_pages) << vm.big_page_size_bits))}; - gmmu->Map(virtual_address, cpu_address, size); + gmmu->Map(virtual_address, cpu_address, size, use_big_pages); } } @@ -345,7 +348,7 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector& input, std::vector(params.offset + params.buffer_offset)}; VAddr cpu_address{mapping->ptr + params.buffer_offset}; - gmmu->Map(gpu_address, cpu_address, params.mapping_size); + gmmu->Map(gpu_address, cpu_address, params.mapping_size, mapping->big_page); return NvResult::Success; } catch ([[maybe_unused]] const std::out_of_range& e) { @@ -363,6 +366,17 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector& input, std::vector(handle->address + params.buffer_offset)}; u64 size{params.mapping_size ? params.mapping_size : handle->orig_size}; + bool big_page{[&]() { + if (Common::IsAligned(handle->align, vm.big_page_size)) + return true; + else if (Common::IsAligned(handle->align, VM::YUZU_PAGESIZE)) + return false; + else { + UNREACHABLE(); + return false; + } + }()}; + if ((params.flags & MappingFlags::Fixed) != MappingFlags::None) { auto alloc{allocation_map.upper_bound(params.offset)}; @@ -372,23 +386,14 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector& input, std::vectorMap(params.offset, cpu_address, size); + const bool use_big_pages = alloc->second.big_pages && big_page; + gmmu->Map(params.offset, cpu_address, size, use_big_pages); - auto mapping{std::make_shared(cpu_address, params.offset, size, true, false, - alloc->second.sparse)}; + auto mapping{std::make_shared(cpu_address, params.offset, size, true, + use_big_pages, alloc->second.sparse)}; alloc->second.mappings.push_back(mapping); mapping_map[params.offset] = mapping; } else { - bool big_page{[&]() { - if (Common::IsAligned(handle->align, vm.big_page_size)) - return true; - else if (Common::IsAligned(handle->align, VM::YUZU_PAGESIZE)) - return false; - else { - UNREACHABLE(); - return false; - } - }()}; auto& allocator{big_page ? *vm.big_page_allocator : *vm.small_page_allocator}; u32 page_size{big_page ? vm.big_page_size : VM::YUZU_PAGESIZE}; @@ -402,7 +407,7 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector& input, std::vectorMap(params.offset, cpu_address, size); + gmmu->Map(params.offset, cpu_address, Common::AlignUp(size, page_size), big_page); auto mapping{ std::make_shared(cpu_address, params.offset, size, false, big_page, false)}; @@ -439,7 +444,7 @@ NvResult nvhost_as_gpu::UnmapBuffer(const std::vector& input, std::vectorsparse_alloc) { - gmmu->MapSparse(params.offset, mapping->size); + gmmu->MapSparse(params.offset, mapping->size, mapping->big_page); } else { gmmu->Unmap(params.offset, mapping->size); } diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h index 1d27739e2..12e881f0d 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h @@ -177,6 +177,7 @@ private: std::list> mappings; u32 page_size; bool sparse; + bool big_pages; }; std::map> diff --git a/src/core/hle/service/nvdrv/devices/nvmap.cpp b/src/core/hle/service/nvdrv/devices/nvmap.cpp index 279997e81..992c117f1 100644 --- a/src/core/hle/service/nvdrv/devices/nvmap.cpp +++ b/src/core/hle/service/nvdrv/devices/nvmap.cpp @@ -9,6 +9,8 @@ #include "common/assert.h" #include "common/logging/log.h" #include "core/core.h" +#include "core/hle/kernel/k_page_table.h" +#include "core/hle/kernel/k_process.h" #include "core/hle/service/nvdrv/core/container.h" #include "core/hle/service/nvdrv/core/nvmap.h" #include "core/hle/service/nvdrv/devices/nvmap.h" @@ -136,6 +138,10 @@ NvResult nvmap::IocAlloc(const std::vector& input, std::vector& output) LOG_CRITICAL(Service_NVDRV, "Object failed to allocate, handle={:08X}", params.handle); return result; } + ASSERT(system.CurrentProcess() + ->PageTable() + .LockForDeviceAddressSpace(handle_description->address, handle_description->size) + .IsSuccess()); std::memcpy(output.data(), ¶ms, sizeof(params)); return result; } @@ -256,6 +262,10 @@ NvResult nvmap::IocFree(const std::vector& input, std::vector& output) { } if (auto freeInfo{file.FreeHandle(params.handle, false)}) { + ASSERT(system.CurrentProcess() + ->PageTable() + .UnlockForDeviceAddressSpace(freeInfo->address, freeInfo->size) + .IsSuccess()); params.address = freeInfo->address; params.size = static_cast(freeInfo->size); params.flags.raw = 0; diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index b36067613..836ece136 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -7,6 +7,7 @@ #include "common/assert.h" #include "common/logging/log.h" #include "core/core.h" +#include "core/device_memory.h" #include "core/hle/kernel/k_page_table.h" #include "core/hle/kernel/k_process.h" #include "core/memory.h" @@ -14,40 +15,69 @@ #include "video_core/rasterizer_interface.h" #include "video_core/renderer_base.h" +#pragma optimize("", off) + namespace Tegra { std::atomic MemoryManager::unique_identifier_generator{}; -MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 page_bits_) - : system{system_}, address_space_bits{address_space_bits_}, page_bits{page_bits_}, entries{}, - page_table{address_space_bits, address_space_bits + page_bits - 38, page_bits}, +MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 big_page_bits_, + u64 page_bits_) + : system{system_}, memory{system.Memory()}, device_memory{system.DeviceMemory()}, + address_space_bits{address_space_bits_}, page_bits{page_bits_}, big_page_bits{big_page_bits_}, + entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38, + page_bits != big_page_bits ? page_bits : 0}, unique_identifier{unique_identifier_generator.fetch_add(1, std::memory_order_acq_rel)} { address_space_size = 1ULL << address_space_bits; - allocate_start = address_space_bits > 32 ? 1ULL << 32 : 0; page_size = 1ULL << page_bits; page_mask = page_size - 1ULL; - const u64 page_table_bits = address_space_bits - cpu_page_bits; + big_page_size = 1ULL << big_page_bits; + big_page_mask = big_page_size - 1ULL; + const u64 page_table_bits = address_space_bits - page_bits; + const u64 big_page_table_bits = address_space_bits - big_page_bits; const u64 page_table_size = 1ULL << page_table_bits; + const u64 big_page_table_size = 1ULL << big_page_table_bits; page_table_mask = page_table_size - 1; + big_page_table_mask = big_page_table_size - 1; + big_entries.resize(big_page_table_size / 32, 0); + big_page_table_cpu.resize(big_page_table_size); + big_page_table_physical.resize(big_page_table_size); entries.resize(page_table_size / 32, 0); } MemoryManager::~MemoryManager() = default; +template MemoryManager::EntryType MemoryManager::GetEntry(size_t position) const { - position = position >> page_bits; - const u64 entry_mask = entries[position / 32]; - const size_t sub_index = position % 32; - return static_cast((entry_mask >> (2 * sub_index)) & 0x03ULL); + if constexpr (is_big_page) { + position = position >> big_page_bits; + const u64 entry_mask = big_entries[position / 32]; + const size_t sub_index = position % 32; + return static_cast((entry_mask >> (2 * sub_index)) & 0x03ULL); + } else { + position = position >> page_bits; + const u64 entry_mask = entries[position / 32]; + const size_t sub_index = position % 32; + return static_cast((entry_mask >> (2 * sub_index)) & 0x03ULL); + } } +template void MemoryManager::SetEntry(size_t position, MemoryManager::EntryType entry) { - position = position >> page_bits; - const u64 entry_mask = entries[position / 32]; - const size_t sub_index = position % 32; - entries[position / 32] = - (~(3ULL << sub_index * 2) & entry_mask) | (static_cast(entry) << sub_index * 2); + if constexpr (is_big_page) { + position = position >> big_page_bits; + const u64 entry_mask = big_entries[position / 32]; + const size_t sub_index = position % 32; + big_entries[position / 32] = + (~(3ULL << sub_index * 2) & entry_mask) | (static_cast(entry) << sub_index * 2); + } else { + position = position >> page_bits; + const u64 entry_mask = entries[position / 32]; + const size_t sub_index = position % 32; + entries[position / 32] = + (~(3ULL << sub_index * 2) & entry_mask) | (static_cast(entry) << sub_index * 2); + } } template @@ -59,48 +89,66 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp } for (u64 offset{}; offset < size; offset += page_size) { const GPUVAddr current_gpu_addr = gpu_addr + offset; - [[maybe_unused]] const auto current_entry_type = GetEntry(current_gpu_addr); - SetEntry(current_gpu_addr, entry_type); + [[maybe_unused]] const auto current_entry_type = GetEntry(current_gpu_addr); + SetEntry(current_gpu_addr, entry_type); if (current_entry_type != entry_type) { rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, page_size); } if constexpr (entry_type == EntryType::Mapped) { const VAddr current_cpu_addr = cpu_addr + offset; - const auto index = PageEntryIndex(current_gpu_addr); - const u32 sub_value = static_cast(current_cpu_addr >> 12ULL); - if (current_entry_type == entry_type && sub_value != page_table[index]) { - rasterizer->InvalidateRegion(static_cast(page_table[index]) << 12ULL, - page_size); - } - page_table[index] = static_cast(current_cpu_addr >> 12ULL); + const auto index = PageEntryIndex(current_gpu_addr); + const u32 sub_value = static_cast(current_cpu_addr >> cpu_page_bits); + page_table[index] = sub_value; } remaining_size -= page_size; } return gpu_addr; } +template +GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, + size_t size) { + u64 remaining_size{size}; + for (u64 offset{}; offset < size; offset += big_page_size) { + const GPUVAddr current_gpu_addr = gpu_addr + offset; + [[maybe_unused]] const auto current_entry_type = GetEntry(current_gpu_addr); + SetEntry(current_gpu_addr, entry_type); + if (current_entry_type != entry_type) { + rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, big_page_size); + } + if constexpr (entry_type == EntryType::Mapped) { + const VAddr current_cpu_addr = cpu_addr + offset; + const auto index = PageEntryIndex(current_gpu_addr); + const u32 sub_value = static_cast(current_cpu_addr >> cpu_page_bits); + big_page_table_cpu[index] = sub_value; + const PAddr phys_address = + device_memory.GetPhysicalAddr(memory.GetPointer(current_cpu_addr)); + big_page_table_physical[index] = static_cast(phys_address); + } + remaining_size -= big_page_size; + } + return gpu_addr; +} + void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { rasterizer = rasterizer_; } -GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size) { +GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, + bool is_big_pages) { + if (is_big_pages) [[likely]] { + return BigPageTableOp(gpu_addr, cpu_addr, size); + } return PageTableOp(gpu_addr, cpu_addr, size); } -GPUVAddr MemoryManager::MapSparse(GPUVAddr gpu_addr, std::size_t size) { +GPUVAddr MemoryManager::MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages) { + if (is_big_pages) [[likely]] { + return BigPageTableOp(gpu_addr, 0, size); + } return PageTableOp(gpu_addr, 0, size); } -GPUVAddr MemoryManager::MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align) { - return Map(*FindFreeRange(size, align), cpu_addr, size); -} - -GPUVAddr MemoryManager::MapAllocate32(VAddr cpu_addr, std::size_t size) { - const std::optional gpu_addr = FindFreeRange(size, 1, true); - ASSERT(gpu_addr); - return Map(*gpu_addr, cpu_addr, size); -} - void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { if (size == 0) { return; @@ -115,61 +163,24 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { rasterizer->UnmapMemory(*cpu_addr, map_size); } + BigPageTableOp(gpu_addr, 0, size); PageTableOp(gpu_addr, 0, size); } -std::optional MemoryManager::AllocateFixed(GPUVAddr gpu_addr, std::size_t size) { - for (u64 offset{}; offset < size; offset += page_size) { - if (GetEntry(gpu_addr + offset) != EntryType::Free) { +std::optional MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const { + if (GetEntry(gpu_addr) != EntryType::Mapped) [[unlikely]] { + if (GetEntry(gpu_addr) != EntryType::Mapped) { return std::nullopt; } - } - return PageTableOp(gpu_addr, 0, size); -} - -GPUVAddr MemoryManager::Allocate(std::size_t size, std::size_t align) { - return *AllocateFixed(*FindFreeRange(size, align), size); -} - -std::optional MemoryManager::FindFreeRange(std::size_t size, std::size_t align, - bool start_32bit_address) const { - if (!align) { - align = page_size; - } else { - align = Common::AlignUp(align, page_size); - } - - u64 available_size{}; - GPUVAddr gpu_addr{start_32bit_address ? 0 : allocate_start}; - while (gpu_addr + available_size < address_space_size) { - if (GetEntry(gpu_addr + available_size) == EntryType::Free) { - available_size += page_size; - - if (available_size >= size) { - return gpu_addr; - } - } else { - gpu_addr += available_size + page_size; - available_size = 0; - - const auto remainder{gpu_addr % align}; - if (remainder) { - gpu_addr = (gpu_addr - remainder) + align; - } - } - } - - return std::nullopt; -} - -std::optional MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const { - if (GetEntry(gpu_addr) != EntryType::Mapped) { - return std::nullopt; + const VAddr cpu_addr_base = static_cast(page_table[PageEntryIndex(gpu_addr)]) + << cpu_page_bits; + return cpu_addr_base + (gpu_addr & page_mask); } - const VAddr cpu_addr_base = static_cast(page_table[PageEntryIndex(gpu_addr)]) << 12ULL; - return cpu_addr_base + (gpu_addr & page_mask); + const VAddr cpu_addr_base = + static_cast(big_page_table_cpu[PageEntryIndex(gpu_addr)]) << cpu_page_bits; + return cpu_addr_base + (gpu_addr & big_page_mask); } std::optional MemoryManager::GpuToCpuAddress(GPUVAddr addr, std::size_t size) const { @@ -225,7 +236,7 @@ u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) { return {}; } - return system.Memory().GetPointer(*address); + return memory.GetPointer(*address); } const u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) const { @@ -234,98 +245,161 @@ const u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) const { return {}; } - return system.Memory().GetPointer(*address); + return memory.GetPointer(*address); } -void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, - bool is_safe) const { +#pragma inline_recursion(on) + +template +inline void MemoryManager::MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, + FuncMapped&& func_mapped, FuncReserved&& func_reserved, + FuncUnmapped&& func_unmapped) const { + u64 used_page_size; + u64 used_page_mask; + u64 used_page_bits; + if constexpr (is_big_pages) { + used_page_size = big_page_size; + used_page_mask = big_page_mask; + used_page_bits = big_page_bits; + } else { + used_page_size = page_size; + used_page_mask = page_mask; + used_page_bits = page_bits; + } std::size_t remaining_size{size}; - std::size_t page_index{gpu_src_addr >> page_bits}; - std::size_t page_offset{gpu_src_addr & page_mask}; + std::size_t page_index{gpu_src_addr >> used_page_bits}; + std::size_t page_offset{gpu_src_addr & used_page_mask}; + GPUVAddr current_address = gpu_src_addr; while (remaining_size > 0) { const std::size_t copy_amount{ - std::min(static_cast(page_size) - page_offset, remaining_size)}; - const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; - if (page_addr) { - const auto src_addr{*page_addr + page_offset}; - if (is_safe) { - // Flush must happen on the rasterizer interface, such that memory is always - // synchronous when it is read (even when in asynchronous GPU mode). - // Fixes Dead Cells title menu. - rasterizer->FlushRegion(src_addr, copy_amount); - } - system.Memory().ReadBlockUnsafe(src_addr, dest_buffer, copy_amount); - } else { - std::memset(dest_buffer, 0, copy_amount); + std::min(static_cast(used_page_size) - page_offset, remaining_size)}; + auto entry = GetEntry(current_address); + if (entry == EntryType::Mapped) [[likely]] { + func_mapped(page_index, page_offset, copy_amount); + } else if (entry == EntryType::Reserved) { + func_reserved(page_index, page_offset, copy_amount); + } else [[unlikely]] { + func_unmapped(page_index, page_offset, copy_amount); } - page_index++; page_offset = 0; - dest_buffer = static_cast(dest_buffer) + copy_amount; remaining_size -= copy_amount; + current_address += copy_amount; } } +template +void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, + std::size_t size) const { + auto set_to_zero = [&]([[maybe_unused]] std::size_t page_index, + [[maybe_unused]] std::size_t offset, std::size_t copy_amount) { + std::memset(dest_buffer, 0, copy_amount); + dest_buffer = static_cast(dest_buffer) + copy_amount; + }; + auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { + const VAddr cpu_addr_base = + (static_cast(page_table[page_index]) << cpu_page_bits) + offset; + if constexpr (is_safe) { + rasterizer->FlushRegion(cpu_addr_base, copy_amount); + } + memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount); + dest_buffer = static_cast(dest_buffer) + copy_amount; + }; + auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { + const VAddr cpu_addr_base = + (static_cast(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; + if constexpr (is_safe) { + rasterizer->FlushRegion(cpu_addr_base, copy_amount); + } + memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount); + // u8* physical = device_memory.GetPointer(big_page_table_physical[page_index] + offset); + // std::memcpy(dest_buffer, physical, copy_amount); + dest_buffer = static_cast(dest_buffer) + copy_amount; + }; + auto read_short_pages = [&](std::size_t page_index, std::size_t offset, + std::size_t copy_amount) { + GPUVAddr base = (page_index << big_page_bits) + offset; + MemoryOperation(base, copy_amount, mapped_normal, set_to_zero, set_to_zero); + }; + MemoryOperation(gpu_src_addr, size, mapped_big, set_to_zero, read_short_pages); +} + void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const { - ReadBlockImpl(gpu_src_addr, dest_buffer, size, true); + ReadBlockImpl(gpu_src_addr, dest_buffer, size); } void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, const std::size_t size) const { - ReadBlockImpl(gpu_src_addr, dest_buffer, size, false); + ReadBlockImpl(gpu_src_addr, dest_buffer, size); } -void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size, - bool is_safe) { - std::size_t remaining_size{size}; - std::size_t page_index{gpu_dest_addr >> page_bits}; - std::size_t page_offset{gpu_dest_addr & page_mask}; - - while (remaining_size > 0) { - const std::size_t copy_amount{ - std::min(static_cast(page_size) - page_offset, remaining_size)}; - const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; - if (page_addr) { - const auto dest_addr{*page_addr + page_offset}; - - if (is_safe) { - // Invalidate must happen on the rasterizer interface, such that memory is always - // synchronous when it is written (even when in asynchronous GPU mode). - rasterizer->InvalidateRegion(dest_addr, copy_amount); - } - system.Memory().WriteBlockUnsafe(dest_addr, src_buffer, copy_amount); +template +void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, + std::size_t size) { + auto just_advance = [&]([[maybe_unused]] std::size_t page_index, + [[maybe_unused]] std::size_t offset, std::size_t copy_amount) { + src_buffer = static_cast(src_buffer) + copy_amount; + }; + auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { + const VAddr cpu_addr_base = + (static_cast(page_table[page_index]) << cpu_page_bits) + offset; + if constexpr (is_safe) { + rasterizer->InvalidateRegion(cpu_addr_base, copy_amount); } - - page_index++; - page_offset = 0; + memory.WriteBlockUnsafe(cpu_addr_base, src_buffer, copy_amount); src_buffer = static_cast(src_buffer) + copy_amount; - remaining_size -= copy_amount; - } + }; + auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { + const VAddr cpu_addr_base = + (static_cast(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; + if constexpr (is_safe) { + rasterizer->InvalidateRegion(cpu_addr_base, copy_amount); + } + memory.WriteBlockUnsafe(cpu_addr_base, src_buffer, copy_amount); + /*u8* physical = + device_memory.GetPointer(big_page_table_physical[page_index] << cpu_page_bits) + offset; + std::memcpy(physical, src_buffer, copy_amount);*/ + src_buffer = static_cast(src_buffer) + copy_amount; + }; + auto write_short_pages = [&](std::size_t page_index, std::size_t offset, + std::size_t copy_amount) { + GPUVAddr base = (page_index << big_page_bits) + offset; + MemoryOperation(base, copy_amount, mapped_normal, just_advance, just_advance); + }; + MemoryOperation(gpu_dest_addr, size, mapped_big, just_advance, write_short_pages); } void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size) { - WriteBlockImpl(gpu_dest_addr, src_buffer, size, true); + WriteBlockImpl(gpu_dest_addr, src_buffer, size); } void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size) { - WriteBlockImpl(gpu_dest_addr, src_buffer, size, false); + WriteBlockImpl(gpu_dest_addr, src_buffer, size); } void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size) const { - size_t remaining_size{size}; - size_t page_index{gpu_addr >> page_bits}; - size_t page_offset{gpu_addr & page_mask}; - while (remaining_size > 0) { - const size_t num_bytes{std::min(page_size - page_offset, remaining_size)}; - if (const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; page_addr) { - rasterizer->FlushRegion(*page_addr + page_offset, num_bytes); - } - ++page_index; - page_offset = 0; - remaining_size -= num_bytes; - } + auto do_nothing = [&]([[maybe_unused]] std::size_t page_index, + [[maybe_unused]] std::size_t offset, + [[maybe_unused]] std::size_t copy_amount) {}; + + auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { + const VAddr cpu_addr_base = + (static_cast(page_table[page_index]) << cpu_page_bits) + offset; + rasterizer->FlushRegion(cpu_addr_base, copy_amount); + }; + auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { + const VAddr cpu_addr_base = + (static_cast(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; + rasterizer->FlushRegion(cpu_addr_base, copy_amount); + }; + auto flush_short_pages = [&](std::size_t page_index, std::size_t offset, + std::size_t copy_amount) { + GPUVAddr base = (page_index << big_page_bits) + offset; + MemoryOperation(base, copy_amount, mapped_normal, do_nothing, do_nothing); + }; + MemoryOperation(gpu_addr, size, mapped_big, do_nothing, flush_short_pages); } void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size) { @@ -348,7 +422,7 @@ bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const { } bool MemoryManager::IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const { - size_t page_index{gpu_addr >> page_bits}; + size_t page_index{gpu_addr >> big_page_bits}; const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits}; std::optional old_page_addr{}; while (page_index != page_last) { @@ -371,7 +445,7 @@ bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) cons size_t page_index{gpu_addr >> page_bits}; const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits}; while (page_index < page_last) { - if (GetEntry(page_index << page_bits) == EntryType::Free) { + if (GetEntry(page_index << page_bits) == EntryType::Free) { return false; } ++page_index; @@ -379,47 +453,63 @@ bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) cons return true; } +#pragma inline_recursion(on) + std::vector> MemoryManager::GetSubmappedRange( GPUVAddr gpu_addr, std::size_t size) const { std::vector> result{}; - size_t page_index{gpu_addr >> page_bits}; - size_t remaining_size{size}; - size_t page_offset{gpu_addr & page_mask}; std::optional> last_segment{}; std::optional old_page_addr{}; - const auto extend_size = [this, &last_segment, &page_index, &page_offset](std::size_t bytes) { - if (!last_segment) { - const GPUVAddr new_base_addr = (page_index << page_bits) + page_offset; - last_segment = {new_base_addr, bytes}; - } else { - last_segment->second += bytes; - } - }; - const auto split = [&last_segment, &result] { + const auto split = [&last_segment, &result]([[maybe_unused]] std::size_t page_index, + [[maybe_unused]] std::size_t offset, + [[maybe_unused]] std::size_t copy_amount) { if (last_segment) { result.push_back(*last_segment); last_segment = std::nullopt; } }; - while (remaining_size > 0) { - const size_t num_bytes{std::min(page_size - page_offset, remaining_size)}; - const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; - if (!page_addr || *page_addr == 0) { - split(); - } else if (old_page_addr) { - if (*old_page_addr + page_size != *page_addr) { - split(); + const auto extend_size_big = [this, &split, &old_page_addr, + &last_segment](std::size_t page_index, std::size_t offset, + std::size_t copy_amount) { + const VAddr cpu_addr_base = + (static_cast(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; + if (old_page_addr) { + if (*old_page_addr != cpu_addr_base) { + split(0, 0, 0); } - extend_size(num_bytes); + } + old_page_addr = {cpu_addr_base + copy_amount}; + if (!last_segment) { + const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset; + last_segment = {new_base_addr, copy_amount}; } else { - extend_size(num_bytes); + last_segment->second += copy_amount; } - ++page_index; - page_offset = 0; - remaining_size -= num_bytes; - old_page_addr = page_addr; - } - split(); + }; + const auto extend_size_short = [this, &split, &old_page_addr, + &last_segment](std::size_t page_index, std::size_t offset, + std::size_t copy_amount) { + const VAddr cpu_addr_base = + (static_cast(page_table[page_index]) << cpu_page_bits) + offset; + if (old_page_addr) { + if (*old_page_addr != cpu_addr_base) { + split(0, 0, 0); + } + } + old_page_addr = {cpu_addr_base + copy_amount}; + if (!last_segment) { + const GPUVAddr new_base_addr = (page_index << page_bits) + offset; + last_segment = {new_base_addr, copy_amount}; + } else { + last_segment->second += copy_amount; + } + }; + auto do_short_pages = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { + GPUVAddr base = (page_index << big_page_bits) + offset; + MemoryOperation(base, copy_amount, extend_size_short, split, split); + }; + MemoryOperation(gpu_addr, size, extend_size_big, split, do_short_pages); + split(0, 0, 0); return result; } diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 56604ef3e..9c388a06e 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -10,21 +10,26 @@ #include "common/common_types.h" #include "common/multi_level_page_table.h" +#include "common/virtual_buffer.h" namespace VideoCore { class RasterizerInterface; } namespace Core { +class DeviceMemory; +namespace Memory { +class Memory; +} // namespace Memory class System; -} +} // namespace Core namespace Tegra { class MemoryManager final { public: explicit MemoryManager(Core::System& system_, u64 address_space_bits_ = 40, - u64 page_bits_ = 16); + u64 big_page_bits_ = 16, u64 page_bits_ = 12); ~MemoryManager(); size_t GetID() const { @@ -93,12 +98,8 @@ public: std::vector> GetSubmappedRange(GPUVAddr gpu_addr, std::size_t size) const; - GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size); - GPUVAddr MapSparse(GPUVAddr gpu_addr, std::size_t size); - [[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align); - [[nodiscard]] GPUVAddr MapAllocate32(VAddr cpu_addr, std::size_t size); - [[nodiscard]] std::optional AllocateFixed(GPUVAddr gpu_addr, std::size_t size); - [[nodiscard]] GPUVAddr Allocate(std::size_t size, std::size_t align); + GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, bool is_big_pages = true); + GPUVAddr MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages = true); void Unmap(GPUVAddr gpu_addr, std::size_t size); void FlushRegion(GPUVAddr gpu_addr, size_t size) const; @@ -107,26 +108,42 @@ private: [[nodiscard]] std::optional FindFreeRange(std::size_t size, std::size_t align, bool start_32bit_address = false) const; - void ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, - bool is_safe) const; - void WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size, - bool is_safe); + template + inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped, + FuncReserved&& func_reserved, FuncUnmapped&& func_unmapped) const; + + template + void ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const; + + template + void WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size); + template [[nodiscard]] inline std::size_t PageEntryIndex(GPUVAddr gpu_addr) const { - return (gpu_addr >> page_bits) & page_table_mask; + if constexpr (is_big_page) { + return (gpu_addr >> big_page_bits) & big_page_table_mask; + } else { + return (gpu_addr >> page_bits) & page_table_mask; + } } Core::System& system; + Core::Memory::Memory& memory; + Core::DeviceMemory& device_memory; const u64 address_space_bits; const u64 page_bits; u64 address_space_size; - u64 allocate_start; u64 page_size; u64 page_mask; u64 page_table_mask; static constexpr u64 cpu_page_bits{12}; + const u64 big_page_bits; + u64 big_page_size; + u64 big_page_mask; + u64 big_page_table_mask; + VideoCore::RasterizerInterface* rasterizer = nullptr; enum class EntryType : u64 { @@ -136,15 +153,23 @@ private: }; std::vector entries; + std::vector big_entries; template GPUVAddr PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size); - EntryType GetEntry(size_t position) const; + template + GPUVAddr BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size); + + template + inline EntryType GetEntry(size_t position) const; - void SetEntry(size_t position, EntryType entry); + template + inline void SetEntry(size_t position, EntryType entry); Common::MultiLevelPageTable page_table; + Common::VirtualBuffer big_page_table_cpu; + Common::VirtualBuffer big_page_table_physical; const size_t unique_identifier; -- cgit v1.2.3 From f5fd6b5c8674fcf64a3e70809ee0a34d3a95beb6 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sun, 14 Aug 2022 02:36:36 -0700 Subject: DMA & InlineToMemory Engines Rework. --- src/common/algorithm.h | 8 + src/video_core/buffer_cache/buffer_cache.h | 4 +- src/video_core/engines/engine_upload.cpp | 46 ++++- src/video_core/engines/engine_upload.h | 6 +- src/video_core/engines/kepler_compute.cpp | 13 +- src/video_core/engines/kepler_memory.cpp | 13 +- src/video_core/engines/maxwell_3d.cpp | 5 +- src/video_core/engines/maxwell_dma.cpp | 91 ++++++--- src/video_core/engines/maxwell_dma.h | 6 + src/video_core/host1x/vic.cpp | 5 +- src/video_core/memory_manager.cpp | 91 +++++++++ src/video_core/memory_manager.h | 6 + src/video_core/rasterizer_interface.h | 2 +- src/video_core/renderer_opengl/gl_rasterizer.cpp | 2 +- src/video_core/renderer_opengl/gl_rasterizer.h | 2 +- src/video_core/renderer_vulkan/vk_compute_pass.cpp | 2 - src/video_core/renderer_vulkan/vk_rasterizer.cpp | 2 +- src/video_core/renderer_vulkan/vk_rasterizer.h | 2 +- src/video_core/texture_cache/util.cpp | 1 - src/video_core/textures/decoders.cpp | 225 +++++++-------------- src/video_core/textures/decoders.h | 33 +-- 21 files changed, 323 insertions(+), 242 deletions(-) (limited to 'src/common') diff --git a/src/common/algorithm.h b/src/common/algorithm.h index 9ddfd637b..055dca142 100644 --- a/src/common/algorithm.h +++ b/src/common/algorithm.h @@ -24,4 +24,12 @@ template > return first != last && !comp(value, *first) ? first : last; } +template +T FoldRight(T initial_value, Func&& func, Args&&... args) { + T value{initial_value}; + const auto high_func = [&value, &func](T x) { value = func(value, x); }; + (std::invoke(high_func, std::forward(args)), ...); + return value; +} + } // namespace Common diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index e55cac0d6..359c11d6f 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -126,7 +126,7 @@ public: void DownloadMemory(VAddr cpu_addr, u64 size); - bool InlineMemory(VAddr dest_address, size_t copy_size, std::span inlined_buffer); + bool InlineMemory(VAddr dest_address, size_t copy_size, std::span inlined_buffer); void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size); @@ -1685,7 +1685,7 @@ void BufferCache

::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, template bool BufferCache

::InlineMemory(VAddr dest_address, size_t copy_size, - std::span inlined_buffer) { + std::span inlined_buffer) { const bool is_dirty = IsRegionRegistered(dest_address, copy_size); if (!is_dirty) { return false; diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp index 6ff5b1eca..a34819234 100644 --- a/src/video_core/engines/engine_upload.cpp +++ b/src/video_core/engines/engine_upload.cpp @@ -3,6 +3,7 @@ #include +#include "common/algorithm.h" #include "common/assert.h" #include "video_core/engines/engine_upload.h" #include "video_core/memory_manager.h" @@ -34,21 +35,48 @@ void State::ProcessData(const u32 data, const bool is_last_call) { if (!is_last_call) { return; } + ProcessData(inner_buffer); +} + +void State::ProcessData(const u32* data, size_t num_data) { + std::span read_buffer(reinterpret_cast(data), num_data * sizeof(u32)); + ProcessData(read_buffer); +} + +void State::ProcessData(std::span read_buffer) { const GPUVAddr address{regs.dest.Address()}; if (is_linear) { - rasterizer->AccelerateInlineToMemory(address, copy_size, inner_buffer); + if (regs.line_count == 1) { + rasterizer->AccelerateInlineToMemory(address, copy_size, read_buffer); + } else { + for (u32 line = 0; line < regs.line_count; ++line) { + const GPUVAddr dest_line = address + static_cast(line) * regs.dest.pitch; + memory_manager.WriteBlockUnsafe( + dest_line, read_buffer.data() + static_cast(line) * regs.line_length_in, + regs.line_length_in); + } + memory_manager.InvalidateRegion(address, regs.dest.pitch * regs.line_count); + } } else { - UNIMPLEMENTED_IF(regs.dest.z != 0); - UNIMPLEMENTED_IF(regs.dest.depth != 1); - UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 0); - UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 0); + u32 width = regs.dest.width; + u32 x_elements = regs.line_length_in; + u32 x_offset = regs.dest.x; + const u32 bpp_shift = Common::FoldRight( + 4U, [](u32 x, u32 y) { return std::min(x, static_cast(std::countr_zero(y))); }, + width, x_elements, x_offset, static_cast(address)); + width >>= bpp_shift; + x_elements >>= bpp_shift; + x_offset >>= bpp_shift; + const u32 bytes_per_pixel = 1U << bpp_shift; const std::size_t dst_size = Tegra::Texture::CalculateSize( - true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 0); + true, bytes_per_pixel, width, regs.dest.height, regs.dest.depth, + regs.dest.BlockHeight(), regs.dest.BlockDepth()); tmp_buffer.resize(dst_size); memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size); - Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, regs.dest.y, - regs.dest.BlockHeight(), copy_size, inner_buffer.data(), - tmp_buffer.data()); + Tegra::Texture::SwizzleSubrect(tmp_buffer, read_buffer, bytes_per_pixel, width, + regs.dest.height, regs.dest.depth, x_offset, regs.dest.y, + x_elements, regs.line_count, regs.dest.BlockHeight(), + regs.dest.BlockDepth(), regs.line_length_in); memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size); } } diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h index 94ff3314a..f08f6e36a 100644 --- a/src/video_core/engines/engine_upload.h +++ b/src/video_core/engines/engine_upload.h @@ -3,6 +3,7 @@ #pragma once +#include #include #include "common/bit_field.h" #include "common/common_types.h" @@ -33,7 +34,7 @@ struct Registers { u32 width; u32 height; u32 depth; - u32 z; + u32 layer; u32 x; u32 y; @@ -62,11 +63,14 @@ public: void ProcessExec(bool is_linear_); void ProcessData(u32 data, bool is_last_call); + void ProcessData(const u32* data, size_t num_data); /// Binds a rasterizer to this engine. void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); private: + void ProcessData(std::span read_buffer); + u32 write_offset = 0; u32 copy_size = 0; std::vector inner_buffer; diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 5db254d94..7c50bdbe0 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -36,8 +36,6 @@ void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_cal } case KEPLER_COMPUTE_REG_INDEX(data_upload): { upload_state.ProcessData(method_argument, is_last_call); - if (is_last_call) { - } break; } case KEPLER_COMPUTE_REG_INDEX(launch): @@ -50,8 +48,15 @@ void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_cal void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) { - for (std::size_t i = 0; i < amount; i++) { - CallMethod(method, base_start[i], methods_pending - static_cast(i) <= 1); + switch (method) { + case KEPLER_COMPUTE_REG_INDEX(data_upload): + upload_state.ProcessData(base_start, static_cast(amount)); + return; + default: + for (std::size_t i = 0; i < amount; i++) { + CallMethod(method, base_start[i], methods_pending - static_cast(i) <= 1); + } + break; } } diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index e2b029542..a3fbab1e5 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp @@ -33,8 +33,6 @@ void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call } case KEPLERMEMORY_REG_INDEX(data): { upload_state.ProcessData(method_argument, is_last_call); - if (is_last_call) { - } break; } } @@ -42,8 +40,15 @@ void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call void KeplerMemory::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) { - for (std::size_t i = 0; i < amount; i++) { - CallMethod(method, base_start[i], methods_pending - static_cast(i) <= 1); + switch (method) { + case KEPLERMEMORY_REG_INDEX(data): + upload_state.ProcessData(base_start, static_cast(amount)); + return; + default: + for (std::size_t i = 0; i < amount; i++) { + CallMethod(method, base_start[i], methods_pending - static_cast(i) <= 1); + } + break; } } diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index add1ccebe..632052c53 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -239,8 +239,6 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume return upload_state.ProcessExec(regs.exec_upload.linear != 0); case MAXWELL3D_REG_INDEX(data_upload): upload_state.ProcessData(argument, is_last_call); - if (is_last_call) { - } return; case MAXWELL3D_REG_INDEX(fragment_barrier): return rasterizer->FragmentBarrier(); @@ -316,6 +314,9 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 15: ProcessCBMultiData(base_start, amount); break; + case MAXWELL3D_REG_INDEX(data_upload): + upload_state.ProcessData(base_start, static_cast(amount)); + return; default: for (std::size_t i = 0; i < amount; i++) { CallMethod(method, base_start[i], methods_pending - static_cast(i) <= 1); diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 0efe58282..a12a95ce2 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "common/algorithm.h" #include "common/assert.h" #include "common/logging/log.h" #include "common/microprofile.h" @@ -54,8 +55,6 @@ void MaxwellDMA::Launch() { const LaunchDMA& launch = regs.launch_dma; ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE); ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED); - ASSERT(regs.dst_params.origin.x == 0); - ASSERT(regs.dst_params.origin.y == 0); const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH; const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH; @@ -121,12 +120,13 @@ void MaxwellDMA::CopyPitchToPitch() { void MaxwellDMA::CopyBlockLinearToPitch() { UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0); - UNIMPLEMENTED_IF(regs.src_params.block_size.depth != 0); UNIMPLEMENTED_IF(regs.src_params.layer != 0); + const bool is_remapping = regs.launch_dma.remap_enable != 0; + // Optimized path for micro copies. const size_t dst_size = static_cast(regs.pitch_out) * regs.line_count; - if (dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X && + if (!is_remapping && dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X && regs.src_params.height > GOB_SIZE_Y) { FastCopyBlockLinearToPitch(); return; @@ -134,10 +134,27 @@ void MaxwellDMA::CopyBlockLinearToPitch() { // Deswizzle the input and copy it over. UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0); - const u32 bytes_per_pixel = - regs.launch_dma.remap_enable ? regs.pitch_out / regs.line_length_in : 1; const Parameters& src_params = regs.src_params; - const u32 width = src_params.width; + + const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1; + const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1; + + const u32 base_bpp = !is_remapping ? 1U : num_remap_components * remap_components_size; + + u32 width = src_params.width; + u32 x_elements = regs.line_length_in; + u32 x_offset = src_params.origin.x; + u32 bpp_shift = 0U; + if (!is_remapping) { + bpp_shift = Common::FoldRight( + 4U, [](u32 x, u32 y) { return std::min(x, static_cast(std::countr_zero(y))); }, + width, x_elements, x_offset, static_cast(regs.offset_in)); + width >>= bpp_shift; + x_elements >>= bpp_shift; + x_offset >>= bpp_shift; + } + + const u32 bytes_per_pixel = base_bpp << bpp_shift; const u32 height = src_params.height; const u32 depth = src_params.depth; const u32 block_height = src_params.block_size.height; @@ -155,30 +172,46 @@ void MaxwellDMA::CopyBlockLinearToPitch() { memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); - UnswizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_out, width, bytes_per_pixel, - block_height, src_params.origin.x, src_params.origin.y, write_buffer.data(), - read_buffer.data()); + UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset, + src_params.origin.y, x_elements, regs.line_count, block_height, block_depth, + regs.pitch_out); memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); } void MaxwellDMA::CopyPitchToBlockLinear() { UNIMPLEMENTED_IF_MSG(regs.dst_params.block_size.width != 0, "Block width is not one"); + UNIMPLEMENTED_IF(regs.dst_params.layer != 0); UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0); + const bool is_remapping = regs.launch_dma.remap_enable != 0; + const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1; + const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1; + const auto& dst_params = regs.dst_params; - const u32 bytes_per_pixel = - regs.launch_dma.remap_enable ? regs.pitch_in / regs.line_length_in : 1; - const u32 width = dst_params.width; + + const u32 base_bpp = !is_remapping ? 1U : num_remap_components * remap_components_size; + + u32 width = dst_params.width; + u32 x_elements = regs.line_length_in; + u32 x_offset = dst_params.origin.x; + u32 bpp_shift = 0U; + if (!is_remapping) { + bpp_shift = Common::FoldRight( + 4U, [](u32 x, u32 y) { return std::min(x, static_cast(std::countr_zero(y))); }, + width, x_elements, x_offset, static_cast(regs.offset_out)); + width >>= bpp_shift; + x_elements >>= bpp_shift; + x_offset >>= bpp_shift; + } + + const u32 bytes_per_pixel = base_bpp << bpp_shift; const u32 height = dst_params.height; const u32 depth = dst_params.depth; const u32 block_height = dst_params.block_size.height; const u32 block_depth = dst_params.block_size.depth; const size_t dst_size = CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); - const size_t dst_layer_size = - CalculateSize(true, bytes_per_pixel, width, height, 1, block_height, block_depth); - const size_t src_size = static_cast(regs.pitch_in) * regs.line_count; if (read_buffer.size() < src_size) { @@ -188,32 +221,23 @@ void MaxwellDMA::CopyPitchToBlockLinear() { write_buffer.resize(dst_size); } + memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); if (Settings::IsGPULevelExtreme()) { - memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); } else { - memory_manager.ReadBlockUnsafe(regs.offset_in, read_buffer.data(), src_size); memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size); } // If the input is linear and the output is tiled, swizzle the input and copy it over. - if (regs.dst_params.block_size.depth > 0) { - ASSERT(dst_params.layer == 0); - SwizzleSliceToVoxel(regs.line_length_in, regs.line_count, regs.pitch_in, width, height, - bytes_per_pixel, block_height, block_depth, dst_params.origin.x, - dst_params.origin.y, write_buffer.data(), read_buffer.data()); - } else { - SwizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_in, width, bytes_per_pixel, - write_buffer.data() + dst_layer_size * dst_params.layer, read_buffer.data(), - block_height, dst_params.origin.x, dst_params.origin.y); - } + SwizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset, + dst_params.origin.y, x_elements, regs.line_count, block_height, block_depth, + regs.pitch_in); memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); } void MaxwellDMA::FastCopyBlockLinearToPitch() { - const u32 bytes_per_pixel = - regs.launch_dma.remap_enable ? regs.pitch_out / regs.line_length_in : 1; + const u32 bytes_per_pixel = 1U; const size_t src_size = GOB_SIZE; const size_t dst_size = static_cast(regs.pitch_out) * regs.line_count; u32 pos_x = regs.src_params.origin.x; @@ -239,9 +263,10 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() { memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size); } - UnswizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_out, regs.src_params.width, - bytes_per_pixel, regs.src_params.block_size.height, pos_x, pos_y, - write_buffer.data(), read_buffer.data()); + UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, regs.src_params.width, + regs.src_params.height, 1, pos_x, pos_y, regs.line_length_in, regs.line_count, + regs.src_params.block_size.height, regs.src_params.block_size.depth, + regs.pitch_out); memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); } diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 074bac92c..9c5d567a6 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -189,10 +189,16 @@ public: BitField<4, 3, Swizzle> dst_y; BitField<8, 3, Swizzle> dst_z; BitField<12, 3, Swizzle> dst_w; + BitField<0, 12, u32> dst_components_raw; BitField<16, 2, u32> component_size_minus_one; BitField<20, 2, u32> num_src_components_minus_one; BitField<24, 2, u32> num_dst_components_minus_one; }; + + Swizzle GetComponent(size_t i) { + const u32 raw = dst_components_raw; + return static_cast((raw >> (i * 3)) & 0x7); + } }; static_assert(sizeof(RemapConst) == 12); diff --git a/src/video_core/host1x/vic.cpp b/src/video_core/host1x/vic.cpp index 5d8039841..b9ac41529 100644 --- a/src/video_core/host1x/vic.cpp +++ b/src/video_core/host1x/vic.cpp @@ -156,8 +156,9 @@ void Vic::WriteRGBFrame(const AVFrame* frame, const VicConfig& config) { const u32 block_height = static_cast(config.block_linear_height_log2); const auto size = Texture::CalculateSize(true, 4, width, height, 1, block_height, 0); luma_buffer.resize(size); - Texture::SwizzleSubrect(width, height, width * 4, width, 4, luma_buffer.data(), - converted_frame_buf_addr, block_height, 0, 0); + std::span frame_buff(converted_frame_buf_addr, 4 * width * height); + Texture::SwizzleSubrect(luma_buffer, frame_buff, 4, width, height, 1, + 0, 0, width, height, block_height, 0, width * 4); host1x.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), size); } else { diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 4e52ce0fd..4a692448e 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -462,6 +462,97 @@ void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size) const { MemoryOperation(gpu_addr, size, mapped_big, do_nothing, flush_short_pages); } +bool MemoryManager::IsMemoryDirty(GPUVAddr gpu_addr, size_t size) const { + bool result = false; + auto do_nothing = [&]([[maybe_unused]] std::size_t page_index, + [[maybe_unused]] std::size_t offset, + [[maybe_unused]] std::size_t copy_amount) { return false; }; + + auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { + const VAddr cpu_addr_base = + (static_cast(page_table[page_index]) << cpu_page_bits) + offset; + result |= rasterizer->MustFlushRegion(cpu_addr_base, copy_amount); + return result; + }; + auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { + const VAddr cpu_addr_base = + (static_cast(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; + result |= rasterizer->MustFlushRegion(cpu_addr_base, copy_amount); + return result; + }; + auto check_short_pages = [&](std::size_t page_index, std::size_t offset, + std::size_t copy_amount) { + GPUVAddr base = (page_index << big_page_bits) + offset; + MemoryOperation(base, copy_amount, mapped_normal, do_nothing, do_nothing); + return result; + }; + MemoryOperation(gpu_addr, size, mapped_big, do_nothing, check_short_pages); + return result; +} + +size_t MemoryManager::MaxContinousRange(GPUVAddr gpu_addr, size_t size) const { + std::optional old_page_addr{}; + size_t range_so_far = 0; + bool result{false}; + auto fail = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, + std::size_t copy_amount) { + result = true; + return true; + }; + auto short_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { + const VAddr cpu_addr_base = + (static_cast(page_table[page_index]) << cpu_page_bits) + offset; + if (old_page_addr && *old_page_addr != cpu_addr_base) { + result = true; + return true; + } + range_so_far += copy_amount; + old_page_addr = {cpu_addr_base + copy_amount}; + return false; + }; + auto big_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { + const VAddr cpu_addr_base = + (static_cast(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; + if (old_page_addr && *old_page_addr != cpu_addr_base) { + return true; + } + range_so_far += copy_amount; + old_page_addr = {cpu_addr_base + copy_amount}; + return false; + }; + auto check_short_pages = [&](std::size_t page_index, std::size_t offset, + std::size_t copy_amount) { + GPUVAddr base = (page_index << big_page_bits) + offset; + MemoryOperation(base, copy_amount, short_check, fail, fail); + return result; + }; + MemoryOperation(gpu_addr, size, big_check, fail, check_short_pages); + return range_so_far; +} + +void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size) const { + auto do_nothing = [&]([[maybe_unused]] std::size_t page_index, + [[maybe_unused]] std::size_t offset, + [[maybe_unused]] std::size_t copy_amount) {}; + + auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { + const VAddr cpu_addr_base = + (static_cast(page_table[page_index]) << cpu_page_bits) + offset; + rasterizer->InvalidateRegion(cpu_addr_base, copy_amount); + }; + auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { + const VAddr cpu_addr_base = + (static_cast(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; + rasterizer->InvalidateRegion(cpu_addr_base, copy_amount); + }; + auto invalidate_short_pages = [&](std::size_t page_index, std::size_t offset, + std::size_t copy_amount) { + GPUVAddr base = (page_index << big_page_bits) + offset; + MemoryOperation(base, copy_amount, mapped_normal, do_nothing, do_nothing); + }; + MemoryOperation(gpu_addr, size, mapped_big, do_nothing, invalidate_short_pages); +} + void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size) { std::vector tmp_buffer(size); ReadBlock(gpu_src_addr, tmp_buffer.data(), size); diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 8f8877a92..9c08edc20 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -104,6 +104,12 @@ public: void FlushRegion(GPUVAddr gpu_addr, size_t size) const; + void InvalidateRegion(GPUVAddr gpu_addr, size_t size) const; + + bool IsMemoryDirty(GPUVAddr gpu_addr, size_t size) const; + + size_t MaxContinousRange(GPUVAddr gpu_addr, size_t size) const; + private: template inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped, diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index cb07f3d38..d2d40884c 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -129,7 +129,7 @@ public: [[nodiscard]] virtual Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() = 0; virtual void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, - std::span memory) = 0; + std::span memory) = 0; /// Attempt to use a faster method to display the framebuffer to screen [[nodiscard]] virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config, diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 02bb17715..c2d80605d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -476,7 +476,7 @@ Tegra::Engines::AccelerateDMAInterface& RasterizerOpenGL::AccessAccelerateDMA() } void RasterizerOpenGL::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, - std::span memory) { + std::span memory) { auto cpu_addr = gpu_memory->GpuToCpuAddress(address); if (!cpu_addr) [[unlikely]] { gpu_memory->WriteBlock(address, memory.data(), copy_size); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index fe0ba979a..45131b785 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -99,7 +99,7 @@ public: const Tegra::Engines::Fermi2D::Config& copy_config) override; Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, - std::span memory) override; + std::span memory) override; bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) override; void LoadDiskResources(u64 title_id, std::stop_token stop_loading, diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index f17a5ccd6..241d7573e 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -26,8 +26,6 @@ namespace Vulkan { -using Tegra::Texture::SWIZZLE_TABLE; - namespace { constexpr u32 ASTC_BINDING_INPUT_BUFFER = 0; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index a35e41199..acfd5da7d 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -548,7 +548,7 @@ Tegra::Engines::AccelerateDMAInterface& RasterizerVulkan::AccessAccelerateDMA() } void RasterizerVulkan::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, - std::span memory) { + std::span memory) { auto cpu_addr = gpu_memory->GpuToCpuAddress(address); if (!cpu_addr) [[unlikely]] { gpu_memory->WriteBlock(address, memory.data(), copy_size); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index fb9e83e8f..4cde3c983 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -95,7 +95,7 @@ public: const Tegra::Engines::Fermi2D::Config& copy_config) override; Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, - std::span memory) override; + std::span memory) override; bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) override; void LoadDiskResources(u64 title_id, std::stop_token stop_loading, diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index bea1c27d0..1223df5a0 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -517,7 +517,6 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr const u32 host_bytes_per_layer = num_blocks_per_layer * bytes_per_block; UNIMPLEMENTED_IF(info.tile_width_spacing > 0); - UNIMPLEMENTED_IF(copy.image_offset.x != 0); UNIMPLEMENTED_IF(copy.image_offset.y != 0); UNIMPLEMENTED_IF(copy.image_offset.z != 0); diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 913f8ebcb..fcc636e0b 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -89,6 +89,69 @@ void SwizzleImpl(std::span output, std::span input, u32 width, u32 } } +template +void SwizzleSubrectImpl(std::span output, std::span input, u32 width, u32 height, + u32 depth, u32 origin_x, u32 origin_y, u32 extent_x, u32 num_lines, + u32 block_height, u32 block_depth, u32 pitch_linear) { + // The origin of the transformation can be configured here, leave it as zero as the current API + // doesn't expose it. + static constexpr u32 origin_z = 0; + + // We can configure here a custom pitch + // As it's not exposed 'width * BYTES_PER_PIXEL' will be the expected pitch. + const u32 pitch = pitch_linear; + const u32 stride = Common::AlignUpLog2(width * BYTES_PER_PIXEL, GOB_SIZE_X_SHIFT); + + const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT); + const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth); + const u32 slice_size = + Common::DivCeilLog2(height, block_height + GOB_SIZE_Y_SHIFT) * block_size; + + const u32 block_height_mask = (1U << block_height) - 1; + const u32 block_depth_mask = (1U << block_depth) - 1; + const u32 x_shift = GOB_SIZE_SHIFT + block_height + block_depth; + + u32 unprocessed_lines = num_lines; + u32 extent_y = std::min(num_lines, height - origin_y); + + for (u32 slice = 0; slice < depth; ++slice) { + const u32 z = slice + origin_z; + const u32 offset_z = (z >> block_depth) * slice_size + + ((z & block_depth_mask) << (GOB_SIZE_SHIFT + block_height)); + const u32 lines_in_y = std::min(unprocessed_lines, extent_y); + for (u32 line = 0; line < lines_in_y; ++line) { + const u32 y = line + origin_y; + const u32 swizzled_y = pdep(y); + + const u32 block_y = y >> GOB_SIZE_Y_SHIFT; + const u32 offset_y = (block_y >> block_height) * block_size + + ((block_y & block_height_mask) << GOB_SIZE_SHIFT); + + u32 swizzled_x = pdep(origin_x * BYTES_PER_PIXEL); + for (u32 column = 0; column < extent_x; + ++column, incrpdep(swizzled_x)) { + const u32 x = (column + origin_x) * BYTES_PER_PIXEL; + const u32 offset_x = (x >> GOB_SIZE_X_SHIFT) << x_shift; + + const u32 base_swizzled_offset = offset_z + offset_y + offset_x; + const u32 swizzled_offset = base_swizzled_offset + (swizzled_x | swizzled_y); + + const u32 unswizzled_offset = + slice * pitch * height + line * pitch + column * BYTES_PER_PIXEL; + + u8* const dst = &output[TO_LINEAR ? swizzled_offset : unswizzled_offset]; + const u8* const src = &input[TO_LINEAR ? unswizzled_offset : swizzled_offset]; + + std::memcpy(dst, src, BYTES_PER_PIXEL); + } + } + unprocessed_lines -= lines_in_y; + if (unprocessed_lines == 0) { + return; + } + } +} + template void Swizzle(std::span output, std::span input, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) { @@ -111,97 +174,6 @@ void Swizzle(std::span output, std::span input, u32 bytes_per_pixe } } -template -void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, - u8* swizzled_data, const u8* unswizzled_data, u32 block_height_bit, - u32 offset_x, u32 offset_y) { - const u32 block_height = 1U << block_height_bit; - const u32 image_width_in_gobs = - (swizzled_width * BYTES_PER_PIXEL + (GOB_SIZE_X - 1)) / GOB_SIZE_X; - for (u32 line = 0; line < subrect_height; ++line) { - const u32 dst_y = line + offset_y; - const u32 gob_address_y = - (dst_y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs + - ((dst_y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; - - const u32 swizzled_y = pdep(dst_y); - u32 swizzled_x = pdep(offset_x * BYTES_PER_PIXEL); - for (u32 x = 0; x < subrect_width; - ++x, incrpdep(swizzled_x)) { - const u32 dst_x = x + offset_x; - const u32 gob_address = - gob_address_y + (dst_x * BYTES_PER_PIXEL / GOB_SIZE_X) * GOB_SIZE * block_height; - const u32 swizzled_offset = gob_address + (swizzled_x | swizzled_y); - const u32 unswizzled_offset = line * source_pitch + x * BYTES_PER_PIXEL; - - const u8* const source_line = unswizzled_data + unswizzled_offset; - u8* const dest_addr = swizzled_data + swizzled_offset; - std::memcpy(dest_addr, source_line, BYTES_PER_PIXEL); - } - } -} - -template -void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 block_height, - u32 origin_x, u32 origin_y, u8* output, const u8* input) { - const u32 stride = width * BYTES_PER_PIXEL; - const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X; - const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height); - - const u32 block_height_mask = (1U << block_height) - 1; - const u32 x_shift = GOB_SIZE_SHIFT + block_height; - - for (u32 line = 0; line < line_count; ++line) { - const u32 src_y = line + origin_y; - const u32 swizzled_y = pdep(src_y); - - const u32 block_y = src_y >> GOB_SIZE_Y_SHIFT; - const u32 src_offset_y = (block_y >> block_height) * block_size + - ((block_y & block_height_mask) << GOB_SIZE_SHIFT); - - u32 swizzled_x = pdep(origin_x * BYTES_PER_PIXEL); - for (u32 column = 0; column < line_length_in; - ++column, incrpdep(swizzled_x)) { - const u32 src_x = (column + origin_x) * BYTES_PER_PIXEL; - const u32 src_offset_x = (src_x >> GOB_SIZE_X_SHIFT) << x_shift; - - const u32 swizzled_offset = src_offset_y + src_offset_x + (swizzled_x | swizzled_y); - const u32 unswizzled_offset = line * pitch + column * BYTES_PER_PIXEL; - - std::memcpy(output + unswizzled_offset, input + swizzled_offset, BYTES_PER_PIXEL); - } - } -} - -template -void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 height, - u32 block_height, u32 block_depth, u32 origin_x, u32 origin_y, u8* output, - const u8* input) { - UNIMPLEMENTED_IF(origin_x > 0); - UNIMPLEMENTED_IF(origin_y > 0); - - const u32 stride = width * BYTES_PER_PIXEL; - const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X; - const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth); - - const u32 block_height_mask = (1U << block_height) - 1; - const u32 x_shift = static_cast(GOB_SIZE_SHIFT) + block_height + block_depth; - - for (u32 line = 0; line < line_count; ++line) { - const u32 swizzled_y = pdep(line); - const u32 block_y = line / GOB_SIZE_Y; - const u32 dst_offset_y = - (block_y >> block_height) * block_size + (block_y & block_height_mask) * GOB_SIZE; - - u32 swizzled_x = 0; - for (u32 x = 0; x < line_length_in; ++x, incrpdep(swizzled_x)) { - const u32 dst_offset = - ((x / GOB_SIZE_X) << x_shift) + dst_offset_y + (swizzled_x | swizzled_y); - const u32 src_offset = x * BYTES_PER_PIXEL + line * pitch; - std::memcpy(output + dst_offset, input + src_offset, BYTES_PER_PIXEL); - } - } -} } // Anonymous namespace void UnswizzleTexture(std::span output, std::span input, u32 bytes_per_pixel, @@ -218,15 +190,15 @@ void SwizzleTexture(std::span output, std::span input, u32 bytes_p stride_alignment); } -void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, - u32 bytes_per_pixel, u8* swizzled_data, const u8* unswizzled_data, - u32 block_height_bit, u32 offset_x, u32 offset_y) { +void SwizzleSubrect(std::span output, std::span input, u32 bytes_per_pixel, u32 width, + u32 height, u32 depth, u32 origin_x, u32 origin_y, u32 extent_x, u32 extent_y, + u32 block_height, u32 block_depth, u32 pitch_linear) { switch (bytes_per_pixel) { #define BPP_CASE(x) \ case x: \ - return SwizzleSubrect(subrect_width, subrect_height, source_pitch, swizzled_width, \ - swizzled_data, unswizzled_data, block_height_bit, offset_x, \ - offset_y); + return SwizzleSubrectImpl(output, input, width, height, depth, origin_x, \ + origin_y, extent_x, extent_y, block_height, \ + block_depth, pitch_linear); BPP_CASE(1) BPP_CASE(2) BPP_CASE(3) @@ -241,13 +213,15 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 } } -void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 bytes_per_pixel, - u32 block_height, u32 origin_x, u32 origin_y, u8* output, const u8* input) { +void UnswizzleSubrect(std::span output, std::span input, u32 bytes_per_pixel, + u32 width, u32 height, u32 depth, u32 origin_x, u32 origin_y, u32 extent_x, + u32 extent_y, u32 block_height, u32 block_depth, u32 pitch_linear) { switch (bytes_per_pixel) { #define BPP_CASE(x) \ case x: \ - return UnswizzleSubrect(line_length_in, line_count, pitch, width, block_height, \ - origin_x, origin_y, output, input); + return SwizzleSubrectImpl(output, input, width, height, depth, origin_x, \ + origin_y, extent_x, extent_y, block_height, \ + block_depth, pitch_linear); BPP_CASE(1) BPP_CASE(2) BPP_CASE(3) @@ -262,55 +236,6 @@ void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, } } -void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 height, - u32 bytes_per_pixel, u32 block_height, u32 block_depth, u32 origin_x, - u32 origin_y, u8* output, const u8* input) { - switch (bytes_per_pixel) { -#define BPP_CASE(x) \ - case x: \ - return SwizzleSliceToVoxel(line_length_in, line_count, pitch, width, height, \ - block_height, block_depth, origin_x, origin_y, output, \ - input); - BPP_CASE(1) - BPP_CASE(2) - BPP_CASE(3) - BPP_CASE(4) - BPP_CASE(6) - BPP_CASE(8) - BPP_CASE(12) - BPP_CASE(16) -#undef BPP_CASE - default: - ASSERT_MSG(false, "Invalid bytes_per_pixel={}", bytes_per_pixel); - } -} - -void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y, - const u32 block_height_bit, const std::size_t copy_size, const u8* source_data, - u8* swizzle_data) { - const u32 block_height = 1U << block_height_bit; - const u32 image_width_in_gobs{(width + GOB_SIZE_X - 1) / GOB_SIZE_X}; - std::size_t count = 0; - for (std::size_t y = dst_y; y < height && count < copy_size; ++y) { - const std::size_t gob_address_y = - (y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs + - ((y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; - const u32 swizzled_y = pdep(static_cast(y)); - u32 swizzled_x = pdep(dst_x); - for (std::size_t x = dst_x; x < width && count < copy_size; - ++x, incrpdep(swizzled_x)) { - const std::size_t gob_address = - gob_address_y + (x / GOB_SIZE_X) * GOB_SIZE * block_height; - const std::size_t swizzled_offset = gob_address + (swizzled_x | swizzled_y); - const u8* source_line = source_data + count; - u8* dest_addr = swizzle_data + swizzled_offset; - count++; - - *dest_addr = *source_line; - } - } -} - std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth) { if (tiled) { diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index 31a11708f..e70407692 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h @@ -40,7 +40,6 @@ constexpr SwizzleTable MakeSwizzleTable() { } return table; } -constexpr SwizzleTable SWIZZLE_TABLE = MakeSwizzleTable(); /// Unswizzles a block linear texture into linear memory. void UnswizzleTexture(std::span output, std::span input, u32 bytes_per_pixel, @@ -57,34 +56,14 @@ std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height u32 block_height, u32 block_depth); /// Copies an untiled subrectangle into a tiled surface. -void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, - u32 bytes_per_pixel, u8* swizzled_data, const u8* unswizzled_data, - u32 block_height_bit, u32 offset_x, u32 offset_y); +void SwizzleSubrect(std::span output, std::span input, u32 bytes_per_pixel, u32 width, + u32 height, u32 depth, u32 origin_x, u32 origin_y, u32 extent_x, u32 extent_y, + u32 block_height, u32 block_depth, u32 pitch_linear); /// Copies a tiled subrectangle into a linear surface. -void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 bytes_per_pixel, - u32 block_height, u32 origin_x, u32 origin_y, u8* output, const u8* input); - -/// @brief Swizzles a 2D array of pixels into a 3D texture -/// @param line_length_in Number of pixels per line -/// @param line_count Number of lines -/// @param pitch Number of bytes per line -/// @param width Width of the swizzled texture -/// @param height Height of the swizzled texture -/// @param bytes_per_pixel Number of bytes used per pixel -/// @param block_height Block height shift -/// @param block_depth Block depth shift -/// @param origin_x Column offset in pixels of the swizzled texture -/// @param origin_y Row offset in pixels of the swizzled texture -/// @param output Pointer to the pixels of the swizzled texture -/// @param input Pointer to the 2D array of pixels used as input -/// @pre input and output points to an array large enough to hold the number of bytes used -void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 height, - u32 bytes_per_pixel, u32 block_height, u32 block_depth, u32 origin_x, - u32 origin_y, u8* output, const u8* input); - -void SwizzleKepler(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height, - std::size_t copy_size, const u8* source_data, u8* swizzle_data); +void UnswizzleSubrect(std::span output, std::span input, u32 bytes_per_pixel, + u32 width, u32 height, u32 depth, u32 origin_x, u32 origin_y, u32 extent_x, + u32 extent_y, u32 block_height, u32 block_depth, u32 pitch_linear); /// Obtains the offset of the gob for positions 'dst_x' & 'dst_y' u64 GetGOBOffset(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height, -- cgit v1.2.3 From afab6c143cb486c7d14f1509cd04049ad08d3a65 Mon Sep 17 00:00:00 2001 From: Liam White Date: Wed, 13 Apr 2022 21:02:55 +0200 Subject: General: Fix compilation for GCC --- src/common/address_space.h | 10 +++--- src/common/algorithm.h | 2 +- src/common/bit_field.h | 13 +++----- src/common/multi_level_page_table.cpp | 4 +-- src/common/multi_level_page_table.inc | 2 +- src/core/hle/service/nvdrv/core/nvmap.cpp | 38 ++++++++++++++-------- src/core/hle/service/nvdrv/core/nvmap.h | 1 + .../hle/service/nvdrv/devices/nvhost_as_gpu.cpp | 3 ++ src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp | 8 ++--- .../hle/service/nvdrv/devices/nvhost_nvdec.cpp | 4 +-- src/core/hle/service/nvdrv/devices/nvhost_vic.cpp | 4 +-- src/core/hle/service/nvdrv/devices/nvmap.cpp | 2 +- src/core/hle/service/nvdrv/nvdrv.h | 1 + src/core/hle/service/vi/vi.cpp | 1 + src/shader_recompiler/ir_opt/texture_pass.cpp | 2 +- src/video_core/buffer_cache/buffer_cache.h | 3 +- 16 files changed, 56 insertions(+), 42 deletions(-) (limited to 'src/common') diff --git a/src/common/address_space.h b/src/common/address_space.h index fd2f32b7d..8e13935af 100644 --- a/src/common/address_space.h +++ b/src/common/address_space.h @@ -22,7 +22,8 @@ struct EmptyStruct {}; */ template -requires AddressSpaceValid class FlatAddressSpaceMap { +requires AddressSpaceValid +class FlatAddressSpaceMap { private: std::function unmapCallback{}; //!< Callback called when the mappings in an region have changed @@ -40,8 +41,8 @@ protected: Block() = default; - Block(VaType virt, PaType phys, ExtraBlockInfo extraInfo) - : virt(virt), phys(phys), extraInfo(extraInfo) {} + Block(VaType virt_, PaType phys_, ExtraBlockInfo extraInfo_) + : virt(virt_), phys(phys_), extraInfo(extraInfo_) {} constexpr bool Valid() { return virt != UnmappedVa; @@ -102,7 +103,8 @@ public: * initial, fast linear pass and a subsequent slower pass that iterates until it finds a free block */ template -requires AddressSpaceValid class FlatAllocator +requires AddressSpaceValid +class FlatAllocator : public FlatAddressSpaceMap { private: using Base = FlatAddressSpaceMap; diff --git a/src/common/algorithm.h b/src/common/algorithm.h index 055dca142..c27c9241d 100644 --- a/src/common/algorithm.h +++ b/src/common/algorithm.h @@ -27,7 +27,7 @@ template > template T FoldRight(T initial_value, Func&& func, Args&&... args) { T value{initial_value}; - const auto high_func = [&value, &func](T x) { value = func(value, x); }; + const auto high_func = [&value, &func](U x) { value = func(value, x); }; (std::invoke(high_func, std::forward(args)), ...); return value; } diff --git a/src/common/bit_field.h b/src/common/bit_field.h index 368b7b98c..7e1df62b1 100644 --- a/src/common/bit_field.h +++ b/src/common/bit_field.h @@ -127,14 +127,11 @@ public: } } - BitField(T val) { - Assign(val); - } - - BitField& operator=(T val) { - Assign(val); - return *this; - } + // This constructor and assignment operator might be considered ambiguous: + // Would they initialize the storage or just the bitfield? + // Hence, delete them. Use the Assign method to set bitfield values! + BitField(T val) = delete; + BitField& operator=(T val) = delete; constexpr BitField() noexcept = default; diff --git a/src/common/multi_level_page_table.cpp b/src/common/multi_level_page_table.cpp index aed04d0b5..3a7a75aa7 100644 --- a/src/common/multi_level_page_table.cpp +++ b/src/common/multi_level_page_table.cpp @@ -1,8 +1,6 @@ #include "common/multi_level_page_table.inc" namespace Common { -template class Common::MultiLevelPageTable; -template class Common::MultiLevelPageTable; -template class Common::MultiLevelPageTable; +template class Common::MultiLevelPageTable; template class Common::MultiLevelPageTable; } // namespace Common diff --git a/src/common/multi_level_page_table.inc b/src/common/multi_level_page_table.inc index 9a68cad93..4def6dba8 100644 --- a/src/common/multi_level_page_table.inc +++ b/src/common/multi_level_page_table.inc @@ -30,7 +30,7 @@ MultiLevelPageTable::MultiLevelPageTable(std::size_t address_space_bit #ifdef _WIN32 void* base{VirtualAlloc(nullptr, alloc_size, MEM_RESERVE, PAGE_READWRITE)}; #else - void* base{mmap(nullptr, alloc_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)}; + void* base{mmap(nullptr, alloc_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)}; if (base == MAP_FAILED) { base = nullptr; diff --git a/src/core/hle/service/nvdrv/core/nvmap.cpp b/src/core/hle/service/nvdrv/core/nvmap.cpp index 86d825af9..b02dbb9c9 100644 --- a/src/core/hle/service/nvdrv/core/nvmap.cpp +++ b/src/core/hle/service/nvdrv/core/nvmap.cpp @@ -13,7 +13,8 @@ using Core::Memory::YUZU_PAGESIZE; namespace Service::Nvidia::NvCore { -NvMap::Handle::Handle(u64 size, Id id) : size(size), aligned_size(size), orig_size(size), id(id) { +NvMap::Handle::Handle(u64 size_, Id id_) + : size(size_), aligned_size(size), orig_size(size), id(id_) { flags.raw = 0; } @@ -21,19 +22,21 @@ NvResult NvMap::Handle::Alloc(Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress) std::scoped_lock lock(mutex); // Handles cannot be allocated twice - if (allocated) + if (allocated) { return NvResult::AccessDenied; + } flags = pFlags; kind = pKind; align = pAlign < YUZU_PAGESIZE ? YUZU_PAGESIZE : pAlign; // This flag is only applicable for handles with an address passed - if (pAddress) - flags.keep_uncached_after_free = 0; - else + if (pAddress) { + flags.keep_uncached_after_free.Assign(0); + } else { LOG_CRITICAL(Service_NVDRV, "Mapping nvmap handles without a CPU side address is unimplemented!"); + } size = Common::AlignUp(size, YUZU_PAGESIZE); aligned_size = Common::AlignUp(size, align); @@ -48,17 +51,19 @@ NvResult NvMap::Handle::Alloc(Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress) NvResult NvMap::Handle::Duplicate(bool internal_session) { // Unallocated handles cannot be duplicated as duplication requires memory accounting (in HOS) - if (!allocated) [[unlikely]] + if (!allocated) [[unlikely]] { return NvResult::BadValue; + } std::scoped_lock lock(mutex); // If we internally use FromId the duplication tracking of handles won't work accurately due to // us not implementing per-process handle refs. - if (internal_session) + if (internal_session) { internal_dupes++; - else + } else { dupes++; + } return NvResult::Success; } @@ -92,8 +97,9 @@ bool NvMap::TryRemoveHandle(const Handle& handle_description) { std::scoped_lock lock(handles_lock); auto it{handles.find(handle_description.id)}; - if (it != handles.end()) + if (it != handles.end()) { handles.erase(it); + } return true; } else { @@ -102,8 +108,9 @@ bool NvMap::TryRemoveHandle(const Handle& handle_description) { } NvResult NvMap::CreateHandle(u64 size, std::shared_ptr& result_out) { - if (!size) [[unlikely]] + if (!size) [[unlikely]] { return NvResult::BadValue; + } u32 id{next_handle_id.fetch_add(HandleIdIncrement, std::memory_order_relaxed)}; auto handle_description{std::make_shared(size, id)}; @@ -133,8 +140,9 @@ VAddr NvMap::GetHandleAddress(Handle::Id handle) { u32 NvMap::PinHandle(NvMap::Handle::Id handle) { auto handle_description{GetHandle(handle)}; - if (!handle_description) [[unlikely]] + if (!handle_description) [[unlikely]] { return 0; + } std::scoped_lock lock(handle_description->mutex); if (!handle_description->pins) { @@ -183,8 +191,9 @@ u32 NvMap::PinHandle(NvMap::Handle::Id handle) { void NvMap::UnpinHandle(Handle::Id handle) { auto handle_description{GetHandle(handle)}; - if (!handle_description) + if (!handle_description) { return; + } std::scoped_lock lock(handle_description->mutex); if (--handle_description->pins < 0) { @@ -226,12 +235,13 @@ std::optional NvMap::FreeHandle(Handle::Id handle, bool interna // Try to remove the shared ptr to the handle from the map, if nothing else is using the // handle then it will now be freed when `handle_description` goes out of scope - if (TryRemoveHandle(*handle_description)) + if (TryRemoveHandle(*handle_description)) { LOG_DEBUG(Service_NVDRV, "Removed nvmap handle: {}", handle); - else + } else { LOG_DEBUG(Service_NVDRV, "Tried to free nvmap handle: {} but didn't as it still has duplicates", handle); + } freeInfo = { .address = handle_description->address, diff --git a/src/core/hle/service/nvdrv/core/nvmap.h b/src/core/hle/service/nvdrv/core/nvmap.h index 4f37dcf43..1082bb58d 100644 --- a/src/core/hle/service/nvdrv/core/nvmap.h +++ b/src/core/hle/service/nvdrv/core/nvmap.h @@ -5,6 +5,7 @@ #pragma once +#include #include #include #include diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp index d95a88393..d1beefba6 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp @@ -188,6 +188,7 @@ NvResult nvhost_as_gpu::AllocateSpace(const std::vector& input, std::vector< allocation_map[params.offset] = { .size = size, + .mappings{}, .page_size = params.page_size, .sparse = (params.flags & MappingFlags::Sparse) != MappingFlags::None, .big_pages = params.page_size != VM::YUZU_PAGESIZE, @@ -474,11 +475,13 @@ void nvhost_as_gpu::GetVARegionsImpl(IoctlGetVaRegions& params) { VaRegion{ .offset = vm.small_page_allocator->vaStart << VM::PAGE_SIZE_BITS, .page_size = VM::YUZU_PAGESIZE, + ._pad0_{}, .pages = vm.small_page_allocator->vaLimit - vm.small_page_allocator->vaStart, }, VaRegion{ .offset = vm.big_page_allocator->vaStart << vm.big_page_size_bits, .page_size = vm.big_page_size, + ._pad0_{}, .pages = vm.big_page_allocator->vaLimit - vm.big_page_allocator->vaStart, }, }; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp index a84e4d425..7fffb8e48 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp @@ -204,12 +204,12 @@ NvResult nvhost_ctrl::IocCtrlEventWait(const std::vector& input, std::vector event.wait_handle = host1x_syncpoint_manager.RegisterHostAction(fence_id, target_value, [this, slot]() { - auto& event = events[slot]; - if (event.status.exchange(EventState::Signalling, std::memory_order_acq_rel) == + auto& event_ = events[slot]; + if (event_.status.exchange(EventState::Signalling, std::memory_order_acq_rel) == EventState::Waiting) { - event.kevent->GetWritableEvent().Signal(); + event_.kevent->GetWritableEvent().Signal(); } - event.status.store(EventState::Signalled, std::memory_order_release); + event_.status.store(EventState::Signalled, std::memory_order_release); }); return NvResult::Timeout; } diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp index 5e3820085..fed537039 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp @@ -12,8 +12,8 @@ namespace Service::Nvidia::Devices { u32 nvhost_nvdec::next_id{}; -nvhost_nvdec::nvhost_nvdec(Core::System& system_, NvCore::Container& core) - : nvhost_nvdec_common{system_, core, NvCore::ChannelType::NvDec} {} +nvhost_nvdec::nvhost_nvdec(Core::System& system_, NvCore::Container& core_) + : nvhost_nvdec_common{system_, core_, NvCore::ChannelType::NvDec} {} nvhost_nvdec::~nvhost_nvdec() = default; NvResult nvhost_nvdec::Ioctl1(DeviceFD fd, Ioctl command, const std::vector& input, diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp index 490e399f4..2e4ff988c 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp @@ -11,8 +11,8 @@ namespace Service::Nvidia::Devices { u32 nvhost_vic::next_id{}; -nvhost_vic::nvhost_vic(Core::System& system_, NvCore::Container& core) - : nvhost_nvdec_common{system_, core, NvCore::ChannelType::VIC} {} +nvhost_vic::nvhost_vic(Core::System& system_, NvCore::Container& core_) + : nvhost_nvdec_common{system_, core_, NvCore::ChannelType::VIC} {} nvhost_vic::~nvhost_vic() = default; diff --git a/src/core/hle/service/nvdrv/devices/nvmap.cpp b/src/core/hle/service/nvdrv/devices/nvmap.cpp index 992c117f1..f84fc8c37 100644 --- a/src/core/hle/service/nvdrv/devices/nvmap.cpp +++ b/src/core/hle/service/nvdrv/devices/nvmap.cpp @@ -269,7 +269,7 @@ NvResult nvmap::IocFree(const std::vector& input, std::vector& output) { params.address = freeInfo->address; params.size = static_cast(freeInfo->size); params.flags.raw = 0; - params.flags.map_uncached = freeInfo->was_uncached; + params.flags.map_uncached.Assign(freeInfo->was_uncached); } else { // This is possible when there's internel dups or other duplicates. } diff --git a/src/core/hle/service/nvdrv/nvdrv.h b/src/core/hle/service/nvdrv/nvdrv.h index 31c45236e..b26254753 100644 --- a/src/core/hle/service/nvdrv/nvdrv.h +++ b/src/core/hle/service/nvdrv/nvdrv.h @@ -6,6 +6,7 @@ #pragma once #include +#include #include #include #include diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp index f083811ec..9c917cacf 100644 --- a/src/core/hle/service/vi/vi.cpp +++ b/src/core/hle/service/vi/vi.cpp @@ -58,6 +58,7 @@ static_assert(sizeof(DisplayInfo) == 0x60, "DisplayInfo has wrong size"); class NativeWindow final { public: constexpr explicit NativeWindow(u32 id_) : id{id_} {} + constexpr explicit NativeWindow(const NativeWindow& other) = default; private: const u32 magic = 2; diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index 0726d4d21..e8be58357 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -269,7 +269,7 @@ std::optional TryGetConstBuffer(const IR::Inst* inst, Environme } std::optional lhs{Track(op1, env)}; if (lhs) { - lhs->shift_left = std::countr_zero(op2.U32()); + lhs->shift_left = static_cast(std::countr_zero(op2.U32())); } return lhs; break; diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 2e616cee4..8e26b3f95 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -1323,7 +1323,8 @@ void BufferCache

::UpdateVertexBuffer(u32 index) { return; } if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) { - address_size = gpu_memory->MaxContinousRange(gpu_addr_begin, address_size); + address_size = + static_cast(gpu_memory->MaxContinousRange(gpu_addr_begin, address_size)); } const u32 size = address_size; // TODO: Analyze stride and number of vertices vertex_buffers[index] = Binding{ -- cgit v1.2.3 From 1a9b71b1c6a5b6fb2a41fc485a986e9c505b2856 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 16 Jun 2022 02:00:29 +0200 Subject: Common: Fix variable shadowing. --- src/common/address_space.inc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/common') diff --git a/src/common/address_space.inc b/src/common/address_space.inc index 907c55d88..e1241d099 100644 --- a/src/common/address_space.inc +++ b/src/common/address_space.inc @@ -30,9 +30,9 @@ FlatAllocator namespace Common { -MAP_MEMBER_CONST()::FlatAddressSpaceMap(VaType vaLimit, - std::function unmapCallback) - : unmapCallback(std::move(unmapCallback)), vaLimit(vaLimit) { +MAP_MEMBER_CONST()::FlatAddressSpaceMap(VaType vaLimit_, + std::function unmapCallback_) + : unmapCallback(std::move(unmapCallback_)), vaLimit(vaLimit_) { if (vaLimit > VaMaximum) UNREACHABLE_MSG("Invalid VA limit!"); } @@ -261,8 +261,8 @@ MAP_MEMBER(void)::UnmapLocked(VaType virt, VaType size) { unmapCallback(virt, size); } -ALLOC_MEMBER_CONST()::FlatAllocator(VaType vaStart, VaType vaLimit) - : Base(vaLimit), currentLinearAllocEnd(vaStart), vaStart(vaStart) {} +ALLOC_MEMBER_CONST()::FlatAllocator(VaType vaStart_, VaType vaLimit) + : Base(vaLimit), currentLinearAllocEnd(vaStart_), vaStart(vaStart_) {} ALLOC_MEMBER(VaType)::Allocate(VaType size) { std::scoped_lock lock(this->blockMutex); -- cgit v1.2.3 From fe24c65153349d3a759a2eef02ec703851a96847 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 16 Jun 2022 02:12:21 +0200 Subject: General: Fix clang format. --- src/common/address_space.inc | 4 ++-- src/video_core/control/channel_state_cache.h | 1 + src/video_core/host1x/codecs/vp9.cpp | 5 +++-- src/video_core/host1x/syncpoint_manager.h | 4 ++-- src/video_core/host1x/vic.cpp | 4 ++-- src/video_core/memory_manager.cpp | 1 - src/video_core/renderer_vulkan/renderer_vulkan.cpp | 13 ++++--------- 7 files changed, 14 insertions(+), 18 deletions(-) (limited to 'src/common') diff --git a/src/common/address_space.inc b/src/common/address_space.inc index e1241d099..7cfbb150b 100644 --- a/src/common/address_space.inc +++ b/src/common/address_space.inc @@ -261,8 +261,8 @@ MAP_MEMBER(void)::UnmapLocked(VaType virt, VaType size) { unmapCallback(virt, size); } -ALLOC_MEMBER_CONST()::FlatAllocator(VaType vaStart_, VaType vaLimit) - : Base(vaLimit), currentLinearAllocEnd(vaStart_), vaStart(vaStart_) {} +ALLOC_MEMBER_CONST()::FlatAllocator(VaType vaStart_, VaType vaLimit_) + : Base(vaLimit_), currentLinearAllocEnd(vaStart_), vaStart(vaStart_) {} ALLOC_MEMBER(VaType)::Allocate(VaType size) { std::scoped_lock lock(this->blockMutex); diff --git a/src/video_core/control/channel_state_cache.h b/src/video_core/control/channel_state_cache.h index dbf833de7..102947adb 100644 --- a/src/video_core/control/channel_state_cache.h +++ b/src/video_core/control/channel_state_cache.h @@ -9,6 +9,7 @@ #include #include #include +#include #include "common/common_types.h" diff --git a/src/video_core/host1x/codecs/vp9.cpp b/src/video_core/host1x/codecs/vp9.cpp index 667aadc6a..cf40c9012 100644 --- a/src/video_core/host1x/codecs/vp9.cpp +++ b/src/video_core/host1x/codecs/vp9.cpp @@ -382,8 +382,9 @@ Vp9FrameContainer VP9::GetCurrentFrame(const Host1x::NvdecCommon::NvdecRegisters // gpu.SyncGuestHost(); epic, why? current_frame.info = GetVp9PictureInfo(state); current_frame.bit_stream.resize(current_frame.info.bitstream_size); - host1x.MemoryManager().ReadBlock(state.frame_bitstream_offset, current_frame.bit_stream.data(), - current_frame.info.bitstream_size); + host1x.MemoryManager().ReadBlock(state.frame_bitstream_offset, + current_frame.bit_stream.data(), + current_frame.info.bitstream_size); } if (!next_frame.bit_stream.empty()) { Vp9FrameContainer temp{ diff --git a/src/video_core/host1x/syncpoint_manager.h b/src/video_core/host1x/syncpoint_manager.h index 0ecc040ab..440b1508a 100644 --- a/src/video_core/host1x/syncpoint_manager.h +++ b/src/video_core/host1x/syncpoint_manager.h @@ -49,9 +49,9 @@ public: expected_value, func); } - void DeregisterGuestAction(u32 syncpoint_id,ActionHandle& handle); + void DeregisterGuestAction(u32 syncpoint_id, ActionHandle& handle); - void DeregisterHostAction(u32 syncpoint_id,ActionHandle& handle); + void DeregisterHostAction(u32 syncpoint_id, ActionHandle& handle); void IncrementGuest(u32 syncpoint_id); diff --git a/src/video_core/host1x/vic.cpp b/src/video_core/host1x/vic.cpp index b9ac41529..ac0b7d20e 100644 --- a/src/video_core/host1x/vic.cpp +++ b/src/video_core/host1x/vic.cpp @@ -157,8 +157,8 @@ void Vic::WriteRGBFrame(const AVFrame* frame, const VicConfig& config) { const auto size = Texture::CalculateSize(true, 4, width, height, 1, block_height, 0); luma_buffer.resize(size); std::span frame_buff(converted_frame_buf_addr, 4 * width * height); - Texture::SwizzleSubrect(luma_buffer, frame_buff, 4, width, height, 1, - 0, 0, width, height, block_height, 0, width * 4); + Texture::SwizzleSubrect(luma_buffer, frame_buff, 4, width, height, 1, 0, 0, width, height, + block_height, 0, width * 4); host1x.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), size); } else { diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 3cb2d9224..cca401c74 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -619,7 +619,6 @@ bool MemoryManager::IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const } bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const { - std::optional old_page_addr{}; bool result{true}; auto fail = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, [[maybe_unused]] std::size_t copy_amount) { diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 51d9e8f68..d8131232a 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -95,19 +95,14 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, std::unique_ptr context_) try - : RendererBase(emu_window, std::move(context_)), - telemetry_session(telemetry_session_), - cpu_memory(cpu_memory_), - gpu(gpu_), - library(OpenLibrary()), + : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_), + cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()), instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, true, Settings::values.renderer_debug.GetValue())), debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr), surface(CreateSurface(instance, render_window)), - device(CreateDevice(instance, dld, *surface)), - memory_allocator(device, false), - state_tracker(), - scheduler(device, state_tracker), + device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false), + state_tracker(), scheduler(device, state_tracker), swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width, render_window.GetFramebufferLayout().height, false), blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler, -- cgit v1.2.3 From fedd983f96bcbcc0c39f651db1cca0503d582fd9 Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Wed, 29 Jun 2022 19:27:49 -0400 Subject: general: Format licenses as per SPDX guidelines --- src/common/address_space.cpp | 5 ++--- src/common/address_space.h | 5 ++--- src/common/address_space.inc | 4 ++-- src/common/multi_level_page_table.cpp | 3 +++ src/common/multi_level_page_table.h | 5 ++--- src/common/multi_level_page_table.inc | 5 ++--- src/core/hle/service/nvdrv/core/container.cpp | 7 +++---- src/core/hle/service/nvdrv/core/container.h | 7 +++---- src/core/hle/service/nvdrv/core/nvmap.cpp | 7 +++---- src/core/hle/service/nvdrv/core/nvmap.h | 7 +++---- src/core/hle/service/nvdrv/core/syncpoint_manager.cpp | 7 +++---- src/core/hle/service/nvdrv/core/syncpoint_manager.h | 7 +++---- src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp | 7 +++---- src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h | 7 +++---- src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp | 7 +++---- src/core/hle/service/nvdrv/devices/nvhost_ctrl.h | 7 +++---- src/core/hle/service/nvdrv/nvdata.h | 7 +++---- src/core/hle/service/nvdrv/nvdrv.cpp | 7 +++---- src/core/hle/service/nvdrv/nvdrv.h | 7 +++---- src/core/hle/service/nvdrv/nvdrv_interface.cpp | 7 +++---- src/video_core/control/channel_state.cpp | 5 ++--- src/video_core/control/channel_state.h | 5 ++--- src/video_core/control/channel_state_cache.cpp | 3 +++ src/video_core/control/channel_state_cache.h | 5 ++--- src/video_core/control/channel_state_cache.inc | 2 ++ src/video_core/control/scheduler.cpp | 5 ++--- src/video_core/control/scheduler.h | 5 ++--- src/video_core/engines/puller.cpp | 5 ++--- src/video_core/engines/puller.h | 5 ++--- src/video_core/host1x/control.cpp | 5 ++--- src/video_core/host1x/control.h | 7 +++---- src/video_core/host1x/host1x.cpp | 5 ++--- src/video_core/host1x/host1x.h | 5 ++--- src/video_core/host1x/syncpoint_manager.cpp | 5 ++--- src/video_core/host1x/syncpoint_manager.h | 5 ++--- src/video_core/texture_cache/texture_cache.cpp | 5 ++--- src/video_core/texture_cache/texture_cache.h | 6 ++---- src/video_core/texture_cache/texture_cache_base.h | 6 ++---- 38 files changed, 93 insertions(+), 121 deletions(-) (limited to 'src/common') diff --git a/src/common/address_space.cpp b/src/common/address_space.cpp index 6db85be87..866e78dbe 100644 --- a/src/common/address_space.cpp +++ b/src/common/address_space.cpp @@ -1,6 +1,5 @@ -// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/) -// Licensed under GPLv3 or any later version -// Refer to the license.txt file included. +// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors +// SPDX-License-Identifier: GPL-3.0-or-later #include "common/address_space.inc" diff --git a/src/common/address_space.h b/src/common/address_space.h index 8e13935af..5b3832f07 100644 --- a/src/common/address_space.h +++ b/src/common/address_space.h @@ -1,6 +1,5 @@ -// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/) -// Licensed under GPLv3 or any later version -// Refer to the license.txt file included. +// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors +// SPDX-License-Identifier: GPL-3.0-or-later #pragma once diff --git a/src/common/address_space.inc b/src/common/address_space.inc index 7cfbb150b..a063782b3 100644 --- a/src/common/address_space.inc +++ b/src/common/address_space.inc @@ -1,5 +1,5 @@ -// SPDX-License-Identifier: GPLv3 or later -// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/) +// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors +// SPDX-License-Identifier: GPL-3.0-or-later #include "common/address_space.h" #include "common/assert.h" diff --git a/src/common/multi_level_page_table.cpp b/src/common/multi_level_page_table.cpp index 3a7a75aa7..46e362f3b 100644 --- a/src/common/multi_level_page_table.cpp +++ b/src/common/multi_level_page_table.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + #include "common/multi_level_page_table.inc" namespace Common { diff --git a/src/common/multi_level_page_table.h b/src/common/multi_level_page_table.h index dde1cc962..08092c89a 100644 --- a/src/common/multi_level_page_table.h +++ b/src/common/multi_level_page_table.h @@ -1,6 +1,5 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. +// SPDX-FileCopyrightText: 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later #pragma once diff --git a/src/common/multi_level_page_table.inc b/src/common/multi_level_page_table.inc index 4def6dba8..8ac506fa0 100644 --- a/src/common/multi_level_page_table.inc +++ b/src/common/multi_level_page_table.inc @@ -1,6 +1,5 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. +// SPDX-FileCopyrightText: 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later #ifdef _WIN32 #include diff --git a/src/core/hle/service/nvdrv/core/container.cpp b/src/core/hle/service/nvdrv/core/container.cpp index 4175d3d9c..d2a632646 100644 --- a/src/core/hle/service/nvdrv/core/container.cpp +++ b/src/core/hle/service/nvdrv/core/container.cpp @@ -1,7 +1,6 @@ -// SPDX-FileCopyrightText: 2022 yuzu emulator team and Skyline Team and Contributors -// (https://github.com/skyline-emu/) -// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3 -// or any later version Refer to the license.txt file included. +// SPDX-FileCopyrightText: 2022 yuzu Emulator Project +// SPDX-FileCopyrightText: 2022 Skyline Team and Contributors +// SPDX-License-Identifier: GPL-3.0-or-later #include "core/hle/service/nvdrv/core/container.h" #include "core/hle/service/nvdrv/core/nvmap.h" diff --git a/src/core/hle/service/nvdrv/core/container.h b/src/core/hle/service/nvdrv/core/container.h index e069ade4e..5c8b95803 100644 --- a/src/core/hle/service/nvdrv/core/container.h +++ b/src/core/hle/service/nvdrv/core/container.h @@ -1,7 +1,6 @@ -// SPDX-FileCopyrightText: 2022 yuzu emulator team and Skyline Team and Contributors -// (https://github.com/skyline-emu/) -// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3 -// or any later version Refer to the license.txt file included. +// SPDX-FileCopyrightText: 2022 yuzu Emulator Project +// SPDX-FileCopyrightText: 2022 Skyline Team and Contributors +// SPDX-License-Identifier: GPL-3.0-or-later #pragma once diff --git a/src/core/hle/service/nvdrv/core/nvmap.cpp b/src/core/hle/service/nvdrv/core/nvmap.cpp index 9b21da6b1..e63ec7717 100644 --- a/src/core/hle/service/nvdrv/core/nvmap.cpp +++ b/src/core/hle/service/nvdrv/core/nvmap.cpp @@ -1,7 +1,6 @@ -// SPDX-FileCopyrightText: 2022 yuzu emulator team and Skyline Team and Contributors -// (https://github.com/skyline-emu/) -// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3 -// or any later version Refer to the license.txt file included. +// SPDX-FileCopyrightText: 2022 yuzu Emulator Project +// SPDX-FileCopyrightText: 2022 Skyline Team and Contributors +// SPDX-License-Identifier: GPL-3.0-or-later #include "common/alignment.h" #include "common/assert.h" diff --git a/src/core/hle/service/nvdrv/core/nvmap.h b/src/core/hle/service/nvdrv/core/nvmap.h index ef2df3ad7..6d6dac023 100644 --- a/src/core/hle/service/nvdrv/core/nvmap.h +++ b/src/core/hle/service/nvdrv/core/nvmap.h @@ -1,7 +1,6 @@ -// SPDX-FileCopyrightText: 2022 yuzu emulator team and Skyline Team and Contributors -// (https://github.com/skyline-emu/) -// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3 -// or any later version Refer to the license.txt file included. +// SPDX-FileCopyrightText: 2022 yuzu Emulator Project +// SPDX-FileCopyrightText: 2022 Skyline Team and Contributors +// SPDX-License-Identifier: GPL-3.0-or-later #pragma once diff --git a/src/core/hle/service/nvdrv/core/syncpoint_manager.cpp b/src/core/hle/service/nvdrv/core/syncpoint_manager.cpp index fc4ff3c2f..0bb2aec97 100644 --- a/src/core/hle/service/nvdrv/core/syncpoint_manager.cpp +++ b/src/core/hle/service/nvdrv/core/syncpoint_manager.cpp @@ -1,7 +1,6 @@ -// SPDX-FileCopyrightText: 2022 yuzu emulator team and Skyline Team and Contributors -// (https://github.com/skyline-emu/) -// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3 -// or any later version Refer to the license.txt file included. +// SPDX-FileCopyrightText: 2022 yuzu Emulator Project +// SPDX-FileCopyrightText: 2022 Skyline Team and Contributors +// SPDX-License-Identifier: GPL-3.0-or-later #include "common/assert.h" #include "core/hle/service/nvdrv/core/syncpoint_manager.h" diff --git a/src/core/hle/service/nvdrv/core/syncpoint_manager.h b/src/core/hle/service/nvdrv/core/syncpoint_manager.h index da456f206..6b71cd33d 100644 --- a/src/core/hle/service/nvdrv/core/syncpoint_manager.h +++ b/src/core/hle/service/nvdrv/core/syncpoint_manager.h @@ -1,7 +1,6 @@ -// SPDX-FileCopyrightText: 2022 yuzu emulator team and Skyline Team and Contributors -// (https://github.com/skyline-emu/) -// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3 -// or any later version Refer to the license.txt file included. +// SPDX-FileCopyrightText: 2022 yuzu Emulator Project +// SPDX-FileCopyrightText: 2022 Skyline Team and Contributors +// SPDX-License-Identifier: GPL-3.0-or-later #pragma once diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp index d1beefba6..b48f7fcaf 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp @@ -1,7 +1,6 @@ -// SPDX-FileCopyrightText: 2021 yuzu emulator team, Skyline Team and Contributors -// (https://github.com/skyline-emu/) -// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3 -// or any later version Refer to the license.txt file included. +// SPDX-FileCopyrightText: 2021 yuzu Emulator Project +// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors +// SPDX-License-Identifier: GPL-3.0-or-later #include #include diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h index 12e881f0d..86fe71c75 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h @@ -1,7 +1,6 @@ -// SPDX-FileCopyrightText: 2021 yuzu emulator team, Skyline Team and Contributors -// (https://github.com/skyline-emu/) -// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3 -// or any later version Refer to the license.txt file included. +// SPDX-FileCopyrightText: 2021 yuzu Emulator Project +// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors +// SPDX-License-Identifier: GPL-3.0-or-later #pragma once diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp index 7fffb8e48..5bee4a3d3 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp @@ -1,7 +1,6 @@ -// SPDX-FileCopyrightText: 2021 yuzu emulator team, Skyline Team and Contributors -// (https://github.com/skyline-emu/) -// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3 -// or any later version Refer to the license.txt file included. +// SPDX-FileCopyrightText: 2021 yuzu Emulator Project +// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors +// SPDX-License-Identifier: GPL-3.0-or-later #include #include diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h index f511c0296..4aa738b41 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h @@ -1,7 +1,6 @@ -// SPDX-FileCopyrightText: 2021 yuzu emulator team, Skyline Team and Contributors -// (https://github.com/skyline-emu/) -// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3 -// or any later version Refer to the license.txt file included. +// SPDX-FileCopyrightText: 2021 yuzu Emulator Project +// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors +// SPDX-License-Identifier: GPL-3.0-or-later #pragma once diff --git a/src/core/hle/service/nvdrv/nvdata.h b/src/core/hle/service/nvdrv/nvdata.h index 2ee91f9c4..0e2f47075 100644 --- a/src/core/hle/service/nvdrv/nvdata.h +++ b/src/core/hle/service/nvdrv/nvdata.h @@ -1,7 +1,6 @@ -// SPDX-FileCopyrightText: 2021 yuzu emulator team and Skyline Team and Contributors -// (https://github.com/skyline-emu/) -// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3 -// or any later version Refer to the license.txt file included. +// SPDX-FileCopyrightText: 2021 yuzu Emulator Project +// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors +// SPDX-License-Identifier: GPL-3.0-or-later #pragma once diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp index 20bf24ec8..7929443d2 100644 --- a/src/core/hle/service/nvdrv/nvdrv.cpp +++ b/src/core/hle/service/nvdrv/nvdrv.cpp @@ -1,7 +1,6 @@ -// SPDX-FileCopyrightText: 2021 yuzu emulator team and Skyline Team and Contributors -// (https://github.com/skyline-emu/) -// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3 -// or any later version Refer to the license.txt file included. +// SPDX-FileCopyrightText: 2021 yuzu Emulator Project +// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors +// SPDX-License-Identifier: GPL-3.0-or-later #include diff --git a/src/core/hle/service/nvdrv/nvdrv.h b/src/core/hle/service/nvdrv/nvdrv.h index 22836529d..a2aeb80b4 100644 --- a/src/core/hle/service/nvdrv/nvdrv.h +++ b/src/core/hle/service/nvdrv/nvdrv.h @@ -1,7 +1,6 @@ -// SPDX-FileCopyrightText: 2021 yuzu emulator team and Skyline Team and Contributors -// (https://github.com/skyline-emu/) -// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3 -// or any later version Refer to the license.txt file included. +// SPDX-FileCopyrightText: 2021 yuzu Emulator Project +// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors +// SPDX-License-Identifier: GPL-3.0-or-later #pragma once diff --git a/src/core/hle/service/nvdrv/nvdrv_interface.cpp b/src/core/hle/service/nvdrv/nvdrv_interface.cpp index 5e50a04e8..edbdfee43 100644 --- a/src/core/hle/service/nvdrv/nvdrv_interface.cpp +++ b/src/core/hle/service/nvdrv/nvdrv_interface.cpp @@ -1,7 +1,6 @@ -// SPDX-FileCopyrightText: 2021 yuzu emulator team and Skyline Team and Contributors -// (https://github.com/skyline-emu/) -// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3 -// or any later version Refer to the license.txt file included. +// SPDX-FileCopyrightText: 2021 yuzu Emulator Project +// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors +// SPDX-License-Identifier: GPL-3.0-or-later #include #include "common/logging/log.h" diff --git a/src/video_core/control/channel_state.cpp b/src/video_core/control/channel_state.cpp index 3613c4992..b04922ac0 100644 --- a/src/video_core/control/channel_state.cpp +++ b/src/video_core/control/channel_state.cpp @@ -1,6 +1,5 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv3 or any later version -// Refer to the license.txt file included. +// SPDX-FileCopyrightText: 2022 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later #include "common/assert.h" #include "video_core/control/channel_state.h" diff --git a/src/video_core/control/channel_state.h b/src/video_core/control/channel_state.h index 08a7591e1..305b21cba 100644 --- a/src/video_core/control/channel_state.h +++ b/src/video_core/control/channel_state.h @@ -1,6 +1,5 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv3 or any later version -// Refer to the license.txt file included. +// SPDX-FileCopyrightText: 2022 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later #pragma once diff --git a/src/video_core/control/channel_state_cache.cpp b/src/video_core/control/channel_state_cache.cpp index ec7ba907c..4ebeb6356 100644 --- a/src/video_core/control/channel_state_cache.cpp +++ b/src/video_core/control/channel_state_cache.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: 2022 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + #include "video_core/control/channel_state_cache.inc" namespace VideoCommon { diff --git a/src/video_core/control/channel_state_cache.h b/src/video_core/control/channel_state_cache.h index 102947adb..5246192a8 100644 --- a/src/video_core/control/channel_state_cache.h +++ b/src/video_core/control/channel_state_cache.h @@ -1,6 +1,5 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv3 or any later version -// Refer to the license.txt file included. +// SPDX-FileCopyrightText: 2022 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later #pragma once diff --git a/src/video_core/control/channel_state_cache.inc b/src/video_core/control/channel_state_cache.inc index d3ae758b2..460313893 100644 --- a/src/video_core/control/channel_state_cache.inc +++ b/src/video_core/control/channel_state_cache.inc @@ -1,3 +1,5 @@ +// SPDX-FileCopyrightText: 2022 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later #include diff --git a/src/video_core/control/scheduler.cpp b/src/video_core/control/scheduler.cpp index a9bb00aa7..733042690 100644 --- a/src/video_core/control/scheduler.cpp +++ b/src/video_core/control/scheduler.cpp @@ -1,6 +1,5 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv3 or any later version -// Refer to the license.txt file included. +// SPDX-FileCopyrightText: 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later #include diff --git a/src/video_core/control/scheduler.h b/src/video_core/control/scheduler.h index c1a773946..305a01e0a 100644 --- a/src/video_core/control/scheduler.h +++ b/src/video_core/control/scheduler.h @@ -1,6 +1,5 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv3 or any later version -// Refer to the license.txt file included. +// SPDX-FileCopyrightText: 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later #pragma once diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp index c3ed11c13..cca890792 100644 --- a/src/video_core/engines/puller.cpp +++ b/src/video_core/engines/puller.cpp @@ -1,6 +1,5 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. +// SPDX-FileCopyrightText: 2022 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later #include "common/assert.h" #include "common/logging/log.h" diff --git a/src/video_core/engines/puller.h b/src/video_core/engines/puller.h index b4619e9a8..d4175ee94 100644 --- a/src/video_core/engines/puller.h +++ b/src/video_core/engines/puller.h @@ -1,6 +1,5 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. +// SPDX-FileCopyrightText: 2022 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later #pragma once diff --git a/src/video_core/host1x/control.cpp b/src/video_core/host1x/control.cpp index a81c635ae..dceefdb7f 100644 --- a/src/video_core/host1x/control.cpp +++ b/src/video_core/host1x/control.cpp @@ -1,6 +1,5 @@ -// Copyright 2022 yuzu Emulator Project -// Licensed under GPLv3 or any later version -// Refer to the license.txt file included. +// SPDX-FileCopyrightText: 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later #include "common/assert.h" #include "video_core/host1x/control.h" diff --git a/src/video_core/host1x/control.h b/src/video_core/host1x/control.h index 18a9b56c0..e117888a3 100644 --- a/src/video_core/host1x/control.h +++ b/src/video_core/host1x/control.h @@ -1,7 +1,6 @@ -// SPDX-FileCopyrightText: 2021 yuzu emulator team and Skyline Team and Contributors -// (https://github.com/skyline-emu/) -// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3 -// or any later version Refer to the license.txt file included. +// SPDX-FileCopyrightText: 2021 yuzu Emulator Project +// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors +// SPDX-License-Identifier: GPL-3.0-or-later #pragma once diff --git a/src/video_core/host1x/host1x.cpp b/src/video_core/host1x/host1x.cpp index eb00f4855..7c317a85d 100644 --- a/src/video_core/host1x/host1x.cpp +++ b/src/video_core/host1x/host1x.cpp @@ -1,6 +1,5 @@ -// Copyright 2022 yuzu Emulator Project -// Licensed under GPLv3 or any later version -// Refer to the license.txt file included. +// SPDX-FileCopyrightText: 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later #include "core/core.h" #include "video_core/host1x/host1x.h" diff --git a/src/video_core/host1x/host1x.h b/src/video_core/host1x/host1x.h index e4b69d75a..7ecf853d9 100644 --- a/src/video_core/host1x/host1x.h +++ b/src/video_core/host1x/host1x.h @@ -1,6 +1,5 @@ -// Copyright 2022 yuzu Emulator Project -// Licensed under GPLv3 or any later version -// Refer to the license.txt file included. +// SPDX-FileCopyrightText: 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later #pragma once diff --git a/src/video_core/host1x/syncpoint_manager.cpp b/src/video_core/host1x/syncpoint_manager.cpp index 825bd551e..4471bacae 100644 --- a/src/video_core/host1x/syncpoint_manager.cpp +++ b/src/video_core/host1x/syncpoint_manager.cpp @@ -1,6 +1,5 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv3 or any later version -// Refer to the license.txt file included. +// SPDX-FileCopyrightText: 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later #include "common/microprofile.h" #include "video_core/host1x/syncpoint_manager.h" diff --git a/src/video_core/host1x/syncpoint_manager.h b/src/video_core/host1x/syncpoint_manager.h index 440b1508a..72220a09a 100644 --- a/src/video_core/host1x/syncpoint_manager.h +++ b/src/video_core/host1x/syncpoint_manager.h @@ -1,6 +1,5 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv3 or any later version -// Refer to the license.txt file included. +// SPDX-FileCopyrightText: 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later #pragma once diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index bc905a1a4..8a9a32f44 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -1,6 +1,5 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv3 or any later version -// Refer to the license.txt file included. +// SPDX-FileCopyrightText: 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later #include "video_core/control/channel_state_cache.inc" #include "video_core/texture_cache/texture_cache_base.h" diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 9a835cefc..eaf4a1c95 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1,7 +1,5 @@ -// SPDX-FileCopyrightText: 2021 yuzu emulator team -// (https://github.com/skyline-emu/) -// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3 -// or any later version Refer to the license.txt file included. +// SPDX-FileCopyrightText: 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later #pragma once diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 2f4db5047..2fa8445eb 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -1,7 +1,5 @@ -// SPDX-FileCopyrightText: 2021 yuzu emulator team -// (https://github.com/skyline-emu/) -// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3 -// or any later version Refer to the license.txt file included. +// SPDX-FileCopyrightText: 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later #pragma once -- cgit v1.2.3 From fa342cae227666c861806b9bf63e4286aff1e4d7 Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Wed, 29 Jun 2022 20:33:04 -0400 Subject: address_space: Address feedback --- src/common/address_space.h | 105 ++++--- src/common/address_space.inc | 319 +++++++++++---------- .../hle/service/nvdrv/devices/nvhost_as_gpu.cpp | 8 +- 3 files changed, 237 insertions(+), 195 deletions(-) (limited to 'src/common') diff --git a/src/common/address_space.h b/src/common/address_space.h index 5b3832f07..bf649018c 100644 --- a/src/common/address_space.h +++ b/src/common/address_space.h @@ -23,9 +23,29 @@ template requires AddressSpaceValid class FlatAddressSpaceMap { -private: - std::function - unmapCallback{}; //!< Callback called when the mappings in an region have changed +public: + /// The maximum VA that this AS can technically reach + static constexpr VaType VaMaximum{(1ULL << (AddressSpaceBits - 1)) + + ((1ULL << (AddressSpaceBits - 1)) - 1)}; + + explicit FlatAddressSpaceMap(VaType va_limit, + std::function unmap_callback = {}); + + FlatAddressSpaceMap() = default; + + void Map(VaType virt, PaType phys, VaType size, ExtraBlockInfo extra_info = {}) { + std::scoped_lock lock(block_mutex); + MapLocked(virt, phys, size, extra_info); + } + + void Unmap(VaType virt, VaType size) { + std::scoped_lock lock(block_mutex); + UnmapLocked(virt, size); + } + + VaType GetVALimit() const { + return va_limit; + } protected: /** @@ -33,68 +53,55 @@ protected: * another block with a different phys address is hit */ struct Block { - VaType virt{UnmappedVa}; //!< VA of the block - PaType phys{UnmappedPa}; //!< PA of the block, will increase 1-1 with VA until a new block - //!< is encountered - [[no_unique_address]] ExtraBlockInfo extraInfo; + /// VA of the block + VaType virt{UnmappedVa}; + /// PA of the block, will increase 1-1 with VA until a new block is encountered + PaType phys{UnmappedPa}; + [[no_unique_address]] ExtraBlockInfo extra_info; Block() = default; - Block(VaType virt_, PaType phys_, ExtraBlockInfo extraInfo_) - : virt(virt_), phys(phys_), extraInfo(extraInfo_) {} + Block(VaType virt_, PaType phys_, ExtraBlockInfo extra_info_) + : virt(virt_), phys(phys_), extra_info(extra_info_) {} - constexpr bool Valid() { + bool Valid() const { return virt != UnmappedVa; } - constexpr bool Mapped() { + bool Mapped() const { return phys != UnmappedPa; } - constexpr bool Unmapped() { + bool Unmapped() const { return phys == UnmappedPa; } - bool operator<(const VaType& pVirt) const { - return virt < pVirt; + bool operator<(const VaType& p_virt) const { + return virt < p_virt; } }; - std::mutex blockMutex; - std::vector blocks{Block{}}; - /** * @brief Maps a PA range into the given AS region - * @note blockMutex MUST be locked when calling this + * @note block_mutex MUST be locked when calling this */ - void MapLocked(VaType virt, PaType phys, VaType size, ExtraBlockInfo extraInfo); + void MapLocked(VaType virt, PaType phys, VaType size, ExtraBlockInfo extra_info); /** * @brief Unmaps the given range and merges it with other unmapped regions - * @note blockMutex MUST be locked when calling this + * @note block_mutex MUST be locked when calling this */ void UnmapLocked(VaType virt, VaType size); -public: - static constexpr VaType VaMaximum{(1ULL << (AddressSpaceBits - 1)) + - ((1ULL << (AddressSpaceBits - 1)) - - 1)}; //!< The maximum VA that this AS can technically reach - - VaType vaLimit{VaMaximum}; //!< A soft limit on the maximum VA of the AS - - FlatAddressSpaceMap(VaType vaLimit, std::function unmapCallback = {}); - - FlatAddressSpaceMap() = default; + std::mutex block_mutex; + std::vector blocks{Block{}}; - void Map(VaType virt, PaType phys, VaType size, ExtraBlockInfo extraInfo = {}) { - std::scoped_lock lock(blockMutex); - MapLocked(virt, phys, size, extraInfo); - } + /// a soft limit on the maximum VA of the AS + VaType va_limit{VaMaximum}; - void Unmap(VaType virt, VaType size) { - std::scoped_lock lock(blockMutex); - UnmapLocked(virt, size); - } +private: + /// Callback called when the mappings in an region have changed + std::function unmap_callback{}; }; /** @@ -108,14 +115,8 @@ class FlatAllocator private: using Base = FlatAddressSpaceMap; - VaType currentLinearAllocEnd; //!< The end address for the initial linear allocation pass, once - //!< this reaches the AS limit the slower allocation path will be - //!< used - public: - VaType vaStart; //!< The base VA of the allocator, no allocations will be below this - - FlatAllocator(VaType vaStart, VaType vaLimit = Base::VaMaximum); + explicit FlatAllocator(VaType va_start, VaType va_limit = Base::VaMaximum); /** * @brief Allocates a region in the AS of the given size and returns its address @@ -131,5 +132,19 @@ public: * @brief Frees an AS region so it can be used again */ void Free(VaType virt, VaType size); + + VaType GetVAStart() const { + return va_start; + } + +private: + /// The base VA of the allocator, no allocations will be below this + VaType va_start; + + /** + * The end address for the initial linear allocation pass + * Once this reaches the AS limit the slower allocation path will be used + */ + VaType current_linear_alloc_end; }; } // namespace Common diff --git a/src/common/address_space.inc b/src/common/address_space.inc index a063782b3..3661b298e 100644 --- a/src/common/address_space.inc +++ b/src/common/address_space.inc @@ -30,137 +30,151 @@ FlatAllocator namespace Common { -MAP_MEMBER_CONST()::FlatAddressSpaceMap(VaType vaLimit_, - std::function unmapCallback_) - : unmapCallback(std::move(unmapCallback_)), vaLimit(vaLimit_) { - if (vaLimit > VaMaximum) +MAP_MEMBER_CONST()::FlatAddressSpaceMap(VaType va_limit_, + std::function unmap_callback_) + : va_limit{va_limit_}, unmap_callback{std::move(unmap_callback_)} { + if (va_limit > VaMaximum) { UNREACHABLE_MSG("Invalid VA limit!"); + } } -MAP_MEMBER(void)::MapLocked(VaType virt, PaType phys, VaType size, ExtraBlockInfo extraInfo) { - VaType virtEnd{virt + size}; +MAP_MEMBER(void)::MapLocked(VaType virt, PaType phys, VaType size, ExtraBlockInfo extra_info) { + VaType virt_end{virt + size}; - if (virtEnd > vaLimit) - UNREACHABLE_MSG("Trying to map a block past the VA limit: virtEnd: 0x{:X}, vaLimit: 0x{:X}", - virtEnd, vaLimit); + if (virt_end > va_limit) { + UNREACHABLE_MSG( + "Trying to map a block past the VA limit: virt_end: 0x{:X}, va_limit: 0x{:X}", virt_end, + va_limit); + } - auto blockEndSuccessor{std::lower_bound(blocks.begin(), blocks.end(), virtEnd)}; - if (blockEndSuccessor == blocks.begin()) - UNREACHABLE_MSG("Trying to map a block before the VA start: virtEnd: 0x{:X}", virtEnd); + auto block_end_successor{std::lower_bound(blocks.begin(), blocks.end(), virt_end)}; + if (block_end_successor == blocks.begin()) { + UNREACHABLE_MSG("Trying to map a block before the VA start: virt_end: 0x{:X}", virt_end); + } - auto blockEndPredecessor{std::prev(blockEndSuccessor)}; + auto block_end_predecessor{std::prev(block_end_successor)}; - if (blockEndSuccessor != blocks.end()) { + if (block_end_successor != blocks.end()) { // We have blocks in front of us, if one is directly in front then we don't have to add a // tail - if (blockEndSuccessor->virt != virtEnd) { + if (block_end_successor->virt != virt_end) { PaType tailPhys{[&]() -> PaType { if constexpr (!PaContigSplit) { - return blockEndPredecessor - ->phys; // Always propagate unmapped regions rather than calculating offset + // Always propagate unmapped regions rather than calculating offset + return block_end_predecessor->phys; } else { - if (blockEndPredecessor->Unmapped()) - return blockEndPredecessor->phys; // Always propagate unmapped regions - // rather than calculating offset - else - return blockEndPredecessor->phys + virtEnd - blockEndPredecessor->virt; + if (block_end_predecessor->Unmapped()) { + // Always propagate unmapped regions rather than calculating offset + return block_end_predecessor->phys; + } else { + return block_end_predecessor->phys + virt_end - block_end_predecessor->virt; + } } }()}; - if (blockEndPredecessor->virt >= virt) { + if (block_end_predecessor->virt >= virt) { // If this block's start would be overlapped by the map then reuse it as a tail // block - blockEndPredecessor->virt = virtEnd; - blockEndPredecessor->phys = tailPhys; - blockEndPredecessor->extraInfo = blockEndPredecessor->extraInfo; + block_end_predecessor->virt = virt_end; + block_end_predecessor->phys = tailPhys; + block_end_predecessor->extra_info = block_end_predecessor->extra_info; // No longer predecessor anymore - blockEndSuccessor = blockEndPredecessor--; + block_end_successor = block_end_predecessor--; } else { // Else insert a new one and we're done - blocks.insert(blockEndSuccessor, - {Block(virt, phys, extraInfo), - Block(virtEnd, tailPhys, blockEndPredecessor->extraInfo)}); - if (unmapCallback) - unmapCallback(virt, size); + blocks.insert(block_end_successor, + {Block(virt, phys, extra_info), + Block(virt_end, tailPhys, block_end_predecessor->extra_info)}); + if (unmap_callback) { + unmap_callback(virt, size); + } return; } } } else { - // blockEndPredecessor will always be unmapped as blocks has to be terminated by an unmapped - // chunk - if (blockEndPredecessor != blocks.begin() && blockEndPredecessor->virt >= virt) { + // block_end_predecessor will always be unmapped as blocks has to be terminated by an + // unmapped chunk + if (block_end_predecessor != blocks.begin() && block_end_predecessor->virt >= virt) { // Move the unmapped block start backwards - blockEndPredecessor->virt = virtEnd; + block_end_predecessor->virt = virt_end; // No longer predecessor anymore - blockEndSuccessor = blockEndPredecessor--; + block_end_successor = block_end_predecessor--; } else { // Else insert a new one and we're done - blocks.insert(blockEndSuccessor, - {Block(virt, phys, extraInfo), Block(virtEnd, UnmappedPa, {})}); - if (unmapCallback) - unmapCallback(virt, size); + blocks.insert(block_end_successor, + {Block(virt, phys, extra_info), Block(virt_end, UnmappedPa, {})}); + if (unmap_callback) { + unmap_callback(virt, size); + } return; } } - auto blockStartSuccessor{blockEndSuccessor}; + auto block_start_successor{block_end_successor}; // Walk the block vector to find the start successor as this is more efficient than another // binary search in most scenarios - while (std::prev(blockStartSuccessor)->virt >= virt) - blockStartSuccessor--; + while (std::prev(block_start_successor)->virt >= virt) { + block_start_successor--; + } // Check that the start successor is either the end block or something in between - if (blockStartSuccessor->virt > virtEnd) { - UNREACHABLE_MSG("Unsorted block in AS map: virt: 0x{:X}", blockStartSuccessor->virt); - } else if (blockStartSuccessor->virt == virtEnd) { + if (block_start_successor->virt > virt_end) { + UNREACHABLE_MSG("Unsorted block in AS map: virt: 0x{:X}", block_start_successor->virt); + } else if (block_start_successor->virt == virt_end) { // We need to create a new block as there are none spare that we would overwrite - blocks.insert(blockStartSuccessor, Block(virt, phys, extraInfo)); + blocks.insert(block_start_successor, Block(virt, phys, extra_info)); } else { // Erase overwritten blocks - if (auto eraseStart{std::next(blockStartSuccessor)}; eraseStart != blockEndSuccessor) - blocks.erase(eraseStart, blockEndSuccessor); + if (auto eraseStart{std::next(block_start_successor)}; eraseStart != block_end_successor) { + blocks.erase(eraseStart, block_end_successor); + } // Reuse a block that would otherwise be overwritten as a start block - blockStartSuccessor->virt = virt; - blockStartSuccessor->phys = phys; - blockStartSuccessor->extraInfo = extraInfo; + block_start_successor->virt = virt; + block_start_successor->phys = phys; + block_start_successor->extra_info = extra_info; } - if (unmapCallback) - unmapCallback(virt, size); + if (unmap_callback) { + unmap_callback(virt, size); + } } MAP_MEMBER(void)::UnmapLocked(VaType virt, VaType size) { - VaType virtEnd{virt + size}; + VaType virt_end{virt + size}; - if (virtEnd > vaLimit) - UNREACHABLE_MSG("Trying to map a block past the VA limit: virtEnd: 0x{:X}, vaLimit: 0x{:X}", - virtEnd, vaLimit); + if (virt_end > va_limit) { + UNREACHABLE_MSG( + "Trying to map a block past the VA limit: virt_end: 0x{:X}, va_limit: 0x{:X}", virt_end, + va_limit); + } - auto blockEndSuccessor{std::lower_bound(blocks.begin(), blocks.end(), virtEnd)}; - if (blockEndSuccessor == blocks.begin()) - UNREACHABLE_MSG("Trying to unmap a block before the VA start: virtEnd: 0x{:X}", virtEnd); + auto block_end_successor{std::lower_bound(blocks.begin(), blocks.end(), virt_end)}; + if (block_end_successor == blocks.begin()) { + UNREACHABLE_MSG("Trying to unmap a block before the VA start: virt_end: 0x{:X}", virt_end); + } - auto blockEndPredecessor{std::prev(blockEndSuccessor)}; + auto block_end_predecessor{std::prev(block_end_successor)}; - auto walkBackToPredecessor{[&](auto iter) { - while (iter->virt >= virt) + auto walk_back_to_predecessor{[&](auto iter) { + while (iter->virt >= virt) { iter--; + } return iter; }}; - auto eraseBlocksWithEndUnmapped{[&](auto unmappedEnd) { - auto blockStartPredecessor{walkBackToPredecessor(unmappedEnd)}; - auto blockStartSuccessor{std::next(blockStartPredecessor)}; + auto erase_blocks_with_end_unmapped{[&](auto unmappedEnd) { + auto block_start_predecessor{walk_back_to_predecessor(unmappedEnd)}; + auto block_start_successor{std::next(block_start_predecessor)}; auto eraseEnd{[&]() { - if (blockStartPredecessor->Unmapped()) { + if (block_start_predecessor->Unmapped()) { // If the start predecessor is unmapped then we can erase everything in our region // and be done return std::next(unmappedEnd); @@ -174,158 +188,171 @@ MAP_MEMBER(void)::UnmapLocked(VaType virt, VaType size) { // We can't have two unmapped regions after each other if (eraseEnd != blocks.end() && - (eraseEnd == blockStartSuccessor || - (blockStartPredecessor->Unmapped() && eraseEnd->Unmapped()))) + (eraseEnd == block_start_successor || + (block_start_predecessor->Unmapped() && eraseEnd->Unmapped()))) { UNREACHABLE_MSG("Multiple contiguous unmapped regions are unsupported!"); + } - blocks.erase(blockStartSuccessor, eraseEnd); + blocks.erase(block_start_successor, eraseEnd); }}; // We can avoid any splitting logic if these are the case - if (blockEndPredecessor->Unmapped()) { - if (blockEndPredecessor->virt > virt) - eraseBlocksWithEndUnmapped(blockEndPredecessor); + if (block_end_predecessor->Unmapped()) { + if (block_end_predecessor->virt > virt) { + erase_blocks_with_end_unmapped(block_end_predecessor); + } - if (unmapCallback) - unmapCallback(virt, size); + if (unmap_callback) { + unmap_callback(virt, size); + } return; // The region is unmapped, bail out early - } else if (blockEndSuccessor->virt == virtEnd && blockEndSuccessor->Unmapped()) { - eraseBlocksWithEndUnmapped(blockEndSuccessor); + } else if (block_end_successor->virt == virt_end && block_end_successor->Unmapped()) { + erase_blocks_with_end_unmapped(block_end_successor); - if (unmapCallback) - unmapCallback(virt, size); + if (unmap_callback) { + unmap_callback(virt, size); + } return; // The region is unmapped here and doesn't need splitting, bail out early - } else if (blockEndSuccessor == blocks.end()) { + } else if (block_end_successor == blocks.end()) { // This should never happen as the end should always follow an unmapped block UNREACHABLE_MSG("Unexpected Memory Manager state!"); - } else if (blockEndSuccessor->virt != virtEnd) { + } else if (block_end_successor->virt != virt_end) { // If one block is directly in front then we don't have to add a tail // The previous block is mapped so we will need to add a tail with an offset PaType tailPhys{[&]() { - if constexpr (PaContigSplit) - return blockEndPredecessor->phys + virtEnd - blockEndPredecessor->virt; - else - return blockEndPredecessor->phys; + if constexpr (PaContigSplit) { + return block_end_predecessor->phys + virt_end - block_end_predecessor->virt; + } else { + return block_end_predecessor->phys; + } }()}; - if (blockEndPredecessor->virt >= virt) { + if (block_end_predecessor->virt >= virt) { // If this block's start would be overlapped by the unmap then reuse it as a tail block - blockEndPredecessor->virt = virtEnd; - blockEndPredecessor->phys = tailPhys; + block_end_predecessor->virt = virt_end; + block_end_predecessor->phys = tailPhys; // No longer predecessor anymore - blockEndSuccessor = blockEndPredecessor--; + block_end_successor = block_end_predecessor--; } else { - blocks.insert(blockEndSuccessor, + blocks.insert(block_end_successor, {Block(virt, UnmappedPa, {}), - Block(virtEnd, tailPhys, blockEndPredecessor->extraInfo)}); - if (unmapCallback) - unmapCallback(virt, size); + Block(virt_end, tailPhys, block_end_predecessor->extra_info)}); + if (unmap_callback) { + unmap_callback(virt, size); + } - return; // The previous block is mapped and ends before + // The previous block is mapped and ends before + return; } } // Walk the block vector to find the start predecessor as this is more efficient than another // binary search in most scenarios - auto blockStartPredecessor{walkBackToPredecessor(blockEndSuccessor)}; - auto blockStartSuccessor{std::next(blockStartPredecessor)}; + auto block_start_predecessor{walk_back_to_predecessor(block_end_successor)}; + auto block_start_successor{std::next(block_start_predecessor)}; - if (blockStartSuccessor->virt > virtEnd) { - UNREACHABLE_MSG("Unsorted block in AS map: virt: 0x{:X}", blockStartSuccessor->virt); - } else if (blockStartSuccessor->virt == virtEnd) { + if (block_start_successor->virt > virt_end) { + UNREACHABLE_MSG("Unsorted block in AS map: virt: 0x{:X}", block_start_successor->virt); + } else if (block_start_successor->virt == virt_end) { // There are no blocks between the start and the end that would let us skip inserting a new // one for head // The previous block is may be unmapped, if so we don't need to insert any unmaps after it - if (blockStartPredecessor->Mapped()) - blocks.insert(blockStartSuccessor, Block(virt, UnmappedPa, {})); - } else if (blockStartPredecessor->Unmapped()) { + if (block_start_predecessor->Mapped()) { + blocks.insert(block_start_successor, Block(virt, UnmappedPa, {})); + } + } else if (block_start_predecessor->Unmapped()) { // If the previous block is unmapped - blocks.erase(blockStartSuccessor, blockEndPredecessor); + blocks.erase(block_start_successor, block_end_predecessor); } else { // Erase overwritten blocks, skipping the first one as we have written the unmapped start // block there - if (auto eraseStart{std::next(blockStartSuccessor)}; eraseStart != blockEndSuccessor) - blocks.erase(eraseStart, blockEndSuccessor); + if (auto eraseStart{std::next(block_start_successor)}; eraseStart != block_end_successor) { + blocks.erase(eraseStart, block_end_successor); + } // Add in the unmapped block header - blockStartSuccessor->virt = virt; - blockStartSuccessor->phys = UnmappedPa; + block_start_successor->virt = virt; + block_start_successor->phys = UnmappedPa; } - if (unmapCallback) - unmapCallback(virt, size); + if (unmap_callback) + unmap_callback(virt, size); } -ALLOC_MEMBER_CONST()::FlatAllocator(VaType vaStart_, VaType vaLimit_) - : Base(vaLimit_), currentLinearAllocEnd(vaStart_), vaStart(vaStart_) {} +ALLOC_MEMBER_CONST()::FlatAllocator(VaType va_start_, VaType va_limit_) + : Base{va_limit_}, va_start{va_start_}, current_linear_alloc_end{va_start_} {} ALLOC_MEMBER(VaType)::Allocate(VaType size) { - std::scoped_lock lock(this->blockMutex); + std::scoped_lock lock(this->block_mutex); - VaType allocStart{UnmappedVa}; - VaType allocEnd{currentLinearAllocEnd + size}; + VaType alloc_start{UnmappedVa}; + VaType alloc_end{current_linear_alloc_end + size}; // Avoid searching backwards in the address space if possible - if (allocEnd >= currentLinearAllocEnd && allocEnd <= this->vaLimit) { - auto allocEndSuccessor{ - std::lower_bound(this->blocks.begin(), this->blocks.end(), allocEnd)}; - if (allocEndSuccessor == this->blocks.begin()) + if (alloc_end >= current_linear_alloc_end && alloc_end <= this->va_limit) { + auto alloc_end_successor{ + std::lower_bound(this->blocks.begin(), this->blocks.end(), alloc_end)}; + if (alloc_end_successor == this->blocks.begin()) { UNREACHABLE_MSG("First block in AS map is invalid!"); + } - auto allocEndPredecessor{std::prev(allocEndSuccessor)}; - if (allocEndPredecessor->virt <= currentLinearAllocEnd) { - allocStart = currentLinearAllocEnd; + auto alloc_end_predecessor{std::prev(alloc_end_successor)}; + if (alloc_end_predecessor->virt <= current_linear_alloc_end) { + alloc_start = current_linear_alloc_end; } else { // Skip over fixed any mappings in front of us - while (allocEndSuccessor != this->blocks.end()) { - if (allocEndSuccessor->virt - allocEndPredecessor->virt < size || - allocEndPredecessor->Mapped()) { - allocStart = allocEndPredecessor->virt; + while (alloc_end_successor != this->blocks.end()) { + if (alloc_end_successor->virt - alloc_end_predecessor->virt < size || + alloc_end_predecessor->Mapped()) { + alloc_start = alloc_end_predecessor->virt; break; } - allocEndPredecessor = allocEndSuccessor++; + alloc_end_predecessor = alloc_end_successor++; // Use the VA limit to calculate if we can fit in the final block since it has no // successor - if (allocEndSuccessor == this->blocks.end()) { - allocEnd = allocEndPredecessor->virt + size; + if (alloc_end_successor == this->blocks.end()) { + alloc_end = alloc_end_predecessor->virt + size; - if (allocEnd >= allocEndPredecessor->virt && allocEnd <= this->vaLimit) - allocStart = allocEndPredecessor->virt; + if (alloc_end >= alloc_end_predecessor->virt && alloc_end <= this->va_limit) { + alloc_start = alloc_end_predecessor->virt; + } } } } } - if (allocStart != UnmappedVa) { - currentLinearAllocEnd = allocStart + size; + if (alloc_start != UnmappedVa) { + current_linear_alloc_end = alloc_start + size; } else { // If linear allocation overflows the AS then find a gap - if (this->blocks.size() <= 2) + if (this->blocks.size() <= 2) { UNREACHABLE_MSG("Unexpected allocator state!"); + } - auto searchPredecessor{this->blocks.begin()}; - auto searchSuccessor{std::next(searchPredecessor)}; + auto search_predecessor{this->blocks.begin()}; + auto search_successor{std::next(search_predecessor)}; - while (searchSuccessor != this->blocks.end() && - (searchSuccessor->virt - searchPredecessor->virt < size || - searchPredecessor->Mapped())) { - searchPredecessor = searchSuccessor++; + while (search_successor != this->blocks.end() && + (search_successor->virt - search_predecessor->virt < size || + search_predecessor->Mapped())) { + search_predecessor = search_successor++; } - if (searchSuccessor != this->blocks.end()) - allocStart = searchPredecessor->virt; - else + if (search_successor != this->blocks.end()) { + alloc_start = search_predecessor->virt; + } else { return {}; // AS is full + } } - this->MapLocked(allocStart, true, size, {}); - return allocStart; + this->MapLocked(alloc_start, true, size, {}); + return alloc_start; } ALLOC_MEMBER(void)::AllocateFixed(VaType virt, VaType size) { diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp index b48f7fcaf..7a95f5305 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp @@ -472,16 +472,16 @@ void nvhost_as_gpu::GetVARegionsImpl(IoctlGetVaRegions& params) { params.regions = std::array{ VaRegion{ - .offset = vm.small_page_allocator->vaStart << VM::PAGE_SIZE_BITS, + .offset = vm.small_page_allocator->GetVAStart() << VM::PAGE_SIZE_BITS, .page_size = VM::YUZU_PAGESIZE, ._pad0_{}, - .pages = vm.small_page_allocator->vaLimit - vm.small_page_allocator->vaStart, + .pages = vm.small_page_allocator->GetVALimit() - vm.small_page_allocator->GetVAStart(), }, VaRegion{ - .offset = vm.big_page_allocator->vaStart << vm.big_page_size_bits, + .offset = vm.big_page_allocator->GetVAStart() << vm.big_page_size_bits, .page_size = vm.big_page_size, ._pad0_{}, - .pages = vm.big_page_allocator->vaLimit - vm.big_page_allocator->vaStart, + .pages = vm.big_page_allocator->GetVALimit() - vm.big_page_allocator->GetVAStart(), }, }; } -- cgit v1.2.3 From 11e1cbbdbde8269e7cdb0e150f25639223bdd3e6 Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Wed, 29 Jun 2022 20:36:39 -0400 Subject: address_space: Rename va_start to virt_start Avoids conflicting with the va_start macro --- src/common/address_space.h | 6 +++--- src/common/address_space.inc | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'src/common') diff --git a/src/common/address_space.h b/src/common/address_space.h index bf649018c..9222b2fdc 100644 --- a/src/common/address_space.h +++ b/src/common/address_space.h @@ -116,7 +116,7 @@ private: using Base = FlatAddressSpaceMap; public: - explicit FlatAllocator(VaType va_start, VaType va_limit = Base::VaMaximum); + explicit FlatAllocator(VaType virt_start, VaType va_limit = Base::VaMaximum); /** * @brief Allocates a region in the AS of the given size and returns its address @@ -134,12 +134,12 @@ public: void Free(VaType virt, VaType size); VaType GetVAStart() const { - return va_start; + return virt_start; } private: /// The base VA of the allocator, no allocations will be below this - VaType va_start; + VaType virt_start; /** * The end address for the initial linear allocation pass diff --git a/src/common/address_space.inc b/src/common/address_space.inc index 3661b298e..9f957c81d 100644 --- a/src/common/address_space.inc +++ b/src/common/address_space.inc @@ -284,8 +284,8 @@ MAP_MEMBER(void)::UnmapLocked(VaType virt, VaType size) { unmap_callback(virt, size); } -ALLOC_MEMBER_CONST()::FlatAllocator(VaType va_start_, VaType va_limit_) - : Base{va_limit_}, va_start{va_start_}, current_linear_alloc_end{va_start_} {} +ALLOC_MEMBER_CONST()::FlatAllocator(VaType virt_start_, VaType va_limit_) + : Base{va_limit_}, virt_start{virt_start_}, current_linear_alloc_end{virt_start_} {} ALLOC_MEMBER(VaType)::Allocate(VaType size) { std::scoped_lock lock(this->block_mutex); -- cgit v1.2.3 From c80ed6d81fef5858508ac4b841defe8ee3a8663d Mon Sep 17 00:00:00 2001 From: Liam Date: Fri, 19 Aug 2022 21:58:25 -0400 Subject: general: rework usages of UNREACHABLE macro --- src/common/address_space.inc | 31 +++++++++++----------- .../hle/service/nvdrv/core/syncpoint_manager.cpp | 14 +++++----- .../hle/service/nvdrv/devices/nvhost_as_gpu.cpp | 10 +++---- 3 files changed, 28 insertions(+), 27 deletions(-) (limited to 'src/common') diff --git a/src/common/address_space.inc b/src/common/address_space.inc index 9f957c81d..2195dabd5 100644 --- a/src/common/address_space.inc +++ b/src/common/address_space.inc @@ -34,7 +34,7 @@ MAP_MEMBER_CONST()::FlatAddressSpaceMap(VaType va_limit_, std::function unmap_callback_) : va_limit{va_limit_}, unmap_callback{std::move(unmap_callback_)} { if (va_limit > VaMaximum) { - UNREACHABLE_MSG("Invalid VA limit!"); + ASSERT_MSG(false, "Invalid VA limit!"); } } @@ -42,14 +42,14 @@ MAP_MEMBER(void)::MapLocked(VaType virt, PaType phys, VaType size, ExtraBlockInf VaType virt_end{virt + size}; if (virt_end > va_limit) { - UNREACHABLE_MSG( - "Trying to map a block past the VA limit: virt_end: 0x{:X}, va_limit: 0x{:X}", virt_end, - va_limit); + ASSERT_MSG(false, + "Trying to map a block past the VA limit: virt_end: 0x{:X}, va_limit: 0x{:X}", + virt_end, va_limit); } auto block_end_successor{std::lower_bound(blocks.begin(), blocks.end(), virt_end)}; if (block_end_successor == blocks.begin()) { - UNREACHABLE_MSG("Trying to map a block before the VA start: virt_end: 0x{:X}", virt_end); + ASSERT_MSG(false, "Trying to map a block before the VA start: virt_end: 0x{:X}", virt_end); } auto block_end_predecessor{std::prev(block_end_successor)}; @@ -124,7 +124,7 @@ MAP_MEMBER(void)::MapLocked(VaType virt, PaType phys, VaType size, ExtraBlockInf // Check that the start successor is either the end block or something in between if (block_start_successor->virt > virt_end) { - UNREACHABLE_MSG("Unsorted block in AS map: virt: 0x{:X}", block_start_successor->virt); + ASSERT_MSG(false, "Unsorted block in AS map: virt: 0x{:X}", block_start_successor->virt); } else if (block_start_successor->virt == virt_end) { // We need to create a new block as there are none spare that we would overwrite blocks.insert(block_start_successor, Block(virt, phys, extra_info)); @@ -149,14 +149,15 @@ MAP_MEMBER(void)::UnmapLocked(VaType virt, VaType size) { VaType virt_end{virt + size}; if (virt_end > va_limit) { - UNREACHABLE_MSG( - "Trying to map a block past the VA limit: virt_end: 0x{:X}, va_limit: 0x{:X}", virt_end, - va_limit); + ASSERT_MSG(false, + "Trying to map a block past the VA limit: virt_end: 0x{:X}, va_limit: 0x{:X}", + virt_end, va_limit); } auto block_end_successor{std::lower_bound(blocks.begin(), blocks.end(), virt_end)}; if (block_end_successor == blocks.begin()) { - UNREACHABLE_MSG("Trying to unmap a block before the VA start: virt_end: 0x{:X}", virt_end); + ASSERT_MSG(false, "Trying to unmap a block before the VA start: virt_end: 0x{:X}", + virt_end); } auto block_end_predecessor{std::prev(block_end_successor)}; @@ -190,7 +191,7 @@ MAP_MEMBER(void)::UnmapLocked(VaType virt, VaType size) { if (eraseEnd != blocks.end() && (eraseEnd == block_start_successor || (block_start_predecessor->Unmapped() && eraseEnd->Unmapped()))) { - UNREACHABLE_MSG("Multiple contiguous unmapped regions are unsupported!"); + ASSERT_MSG(false, "Multiple contiguous unmapped regions are unsupported!"); } blocks.erase(block_start_successor, eraseEnd); @@ -217,7 +218,7 @@ MAP_MEMBER(void)::UnmapLocked(VaType virt, VaType size) { return; // The region is unmapped here and doesn't need splitting, bail out early } else if (block_end_successor == blocks.end()) { // This should never happen as the end should always follow an unmapped block - UNREACHABLE_MSG("Unexpected Memory Manager state!"); + ASSERT_MSG(false, "Unexpected Memory Manager state!"); } else if (block_end_successor->virt != virt_end) { // If one block is directly in front then we don't have to add a tail @@ -256,7 +257,7 @@ MAP_MEMBER(void)::UnmapLocked(VaType virt, VaType size) { auto block_start_successor{std::next(block_start_predecessor)}; if (block_start_successor->virt > virt_end) { - UNREACHABLE_MSG("Unsorted block in AS map: virt: 0x{:X}", block_start_successor->virt); + ASSERT_MSG(false, "Unsorted block in AS map: virt: 0x{:X}", block_start_successor->virt); } else if (block_start_successor->virt == virt_end) { // There are no blocks between the start and the end that would let us skip inserting a new // one for head @@ -298,7 +299,7 @@ ALLOC_MEMBER(VaType)::Allocate(VaType size) { auto alloc_end_successor{ std::lower_bound(this->blocks.begin(), this->blocks.end(), alloc_end)}; if (alloc_end_successor == this->blocks.begin()) { - UNREACHABLE_MSG("First block in AS map is invalid!"); + ASSERT_MSG(false, "First block in AS map is invalid!"); } auto alloc_end_predecessor{std::prev(alloc_end_successor)}; @@ -332,7 +333,7 @@ ALLOC_MEMBER(VaType)::Allocate(VaType size) { current_linear_alloc_end = alloc_start + size; } else { // If linear allocation overflows the AS then find a gap if (this->blocks.size() <= 2) { - UNREACHABLE_MSG("Unexpected allocator state!"); + ASSERT_MSG(false, "Unexpected allocator state!"); } auto search_predecessor{this->blocks.begin()}; diff --git a/src/core/hle/service/nvdrv/core/syncpoint_manager.cpp b/src/core/hle/service/nvdrv/core/syncpoint_manager.cpp index 0bb2aec97..072b3a22f 100644 --- a/src/core/hle/service/nvdrv/core/syncpoint_manager.cpp +++ b/src/core/hle/service/nvdrv/core/syncpoint_manager.cpp @@ -29,7 +29,7 @@ SyncpointManager::~SyncpointManager() = default; u32 SyncpointManager::ReserveSyncpoint(u32 id, bool clientManaged) { if (syncpoints.at(id).reserved) { - UNREACHABLE_MSG("Requested syncpoint is in use"); + ASSERT_MSG(false, "Requested syncpoint is in use"); return 0; } @@ -45,7 +45,7 @@ u32 SyncpointManager::FindFreeSyncpoint() { return i; } } - UNREACHABLE_MSG("Failed to find a free syncpoint!"); + ASSERT_MSG(false, "Failed to find a free syncpoint!"); return 0; } @@ -68,7 +68,7 @@ bool SyncpointManager::HasSyncpointExpired(u32 id, u32 threshold) { const SyncpointInfo& syncpoint{syncpoints.at(id)}; if (!syncpoint.reserved) { - UNREACHABLE(); + ASSERT(false); return 0; } @@ -83,7 +83,7 @@ bool SyncpointManager::HasSyncpointExpired(u32 id, u32 threshold) { u32 SyncpointManager::IncrementSyncpointMaxExt(u32 id, u32 amount) { if (!syncpoints.at(id).reserved) { - UNREACHABLE(); + ASSERT(false); return 0; } @@ -92,7 +92,7 @@ u32 SyncpointManager::IncrementSyncpointMaxExt(u32 id, u32 amount) { u32 SyncpointManager::ReadSyncpointMinValue(u32 id) { if (!syncpoints.at(id).reserved) { - UNREACHABLE(); + ASSERT(false); return 0; } @@ -101,7 +101,7 @@ u32 SyncpointManager::ReadSyncpointMinValue(u32 id) { u32 SyncpointManager::UpdateMin(u32 id) { if (!syncpoints.at(id).reserved) { - UNREACHABLE(); + ASSERT(false); return 0; } @@ -111,7 +111,7 @@ u32 SyncpointManager::UpdateMin(u32 id) { NvFence SyncpointManager::GetSyncpointFence(u32 id) { if (!syncpoints.at(id).reserved) { - UNREACHABLE(); + ASSERT(false); return NvFence{}; } diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp index 7a95f5305..192503ffc 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp @@ -96,7 +96,7 @@ NvResult nvhost_as_gpu::AllocAsEx(const std::vector& input, std::vector& std::scoped_lock lock(mutex); if (vm.initialised) { - UNREACHABLE_MSG("Cannot initialise an address space twice!"); + ASSERT_MSG(false, "Cannot initialise an address space twice!"); return NvResult::InvalidState; } @@ -174,7 +174,7 @@ NvResult nvhost_as_gpu::AllocateSpace(const std::vector& input, std::vector< } else { params.offset = static_cast(allocator.Allocate(params.pages)) << page_size_bits; if (!params.offset) { - UNREACHABLE_MSG("Failed to allocate free space in the GPU AS!"); + ASSERT_MSG(false, "Failed to allocate free space in the GPU AS!"); return NvResult::InsufficientMemory; } } @@ -372,7 +372,7 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector& input, std::vectoralign, VM::YUZU_PAGESIZE)) return false; else { - UNREACHABLE(); + ASSERT(false); return false; } }()}; @@ -382,7 +382,7 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector& input, std::vectorfirst) + size > alloc->second.size) { - UNREACHABLE_MSG("Cannot perform a fixed mapping into an unallocated region!"); + ASSERT_MSG(false, "Cannot perform a fixed mapping into an unallocated region!"); return NvResult::BadValue; } @@ -403,7 +403,7 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector& input, std::vector(Common::AlignUp(size, page_size) >> page_size_bits))) << page_size_bits; if (!params.offset) { - UNREACHABLE_MSG("Failed to allocate free space in the GPU AS!"); + ASSERT_MSG(false, "Failed to allocate free space in the GPU AS!"); return NvResult::InsufficientMemory; } -- cgit v1.2.3 From ca3db0d7c94a20668781830ff852dbf512598efb Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 1 Sep 2022 05:45:22 +0200 Subject: General: address feedback --- src/common/multi_level_page_table.h | 8 +-- src/core/hle/service/nvdrv/core/container.cpp | 12 ++++- src/core/hle/service/nvdrv/core/container.h | 23 ++++++--- src/core/hle/service/nvdrv/core/nvmap.cpp | 4 +- src/core/hle/service/nvdrv/core/nvmap.h | 59 +++++++++++----------- .../hle/service/nvdrv/core/syncpoint_manager.cpp | 32 ++++++------ .../hle/service/nvdrv/core/syncpoint_manager.h | 34 ++++++------- .../hle/service/nvdrv/devices/nvhost_as_gpu.cpp | 28 +++++----- src/core/hle/service/nvdrv/devices/nvhost_ctrl.h | 2 +- src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp | 2 +- .../hle/service/nvdrv/devices/nvhost_nvdec.cpp | 13 ++--- src/core/hle/service/nvdrv/devices/nvhost_nvdec.h | 3 -- .../service/nvdrv/devices/nvhost_nvdec_common.cpp | 13 ++--- .../service/nvdrv/devices/nvhost_nvdec_common.h | 5 -- src/core/hle/service/nvdrv/devices/nvhost_vic.cpp | 16 +++--- src/core/hle/service/nvdrv/devices/nvhost_vic.h | 3 -- src/core/hle/service/nvdrv/devices/nvmap.h | 4 +- src/core/hle/service/nvdrv/nvdrv.cpp | 4 +- src/core/hle/service/nvdrv/nvdrv.h | 2 +- src/video_core/control/channel_state.cpp | 7 +-- src/video_core/control/channel_state.h | 4 +- src/video_core/control/channel_state_cache.h | 2 +- src/video_core/control/scheduler.cpp | 6 ++- src/video_core/control/scheduler.h | 2 +- src/video_core/engines/maxwell_dma.h | 2 +- src/video_core/gpu.cpp | 2 +- src/video_core/host1x/host1x.h | 2 +- src/video_core/host1x/syncpoint_manager.cpp | 12 ++--- src/video_core/host1x/syncpoint_manager.h | 24 ++++----- src/video_core/memory_manager.h | 2 +- 30 files changed, 167 insertions(+), 165 deletions(-) (limited to 'src/common') diff --git a/src/common/multi_level_page_table.h b/src/common/multi_level_page_table.h index 08092c89a..31f6676a0 100644 --- a/src/common/multi_level_page_table.h +++ b/src/common/multi_level_page_table.h @@ -46,19 +46,19 @@ public: void ReserveRange(u64 start, std::size_t size); - [[nodiscard]] constexpr const BaseAddr& operator[](std::size_t index) const { + [[nodiscard]] const BaseAddr& operator[](std::size_t index) const { return base_ptr[index]; } - [[nodiscard]] constexpr BaseAddr& operator[](std::size_t index) { + [[nodiscard]] BaseAddr& operator[](std::size_t index) { return base_ptr[index]; } - [[nodiscard]] constexpr BaseAddr* data() { + [[nodiscard]] BaseAddr* data() { return base_ptr; } - [[nodiscard]] constexpr const BaseAddr* data() const { + [[nodiscard]] const BaseAddr* data() const { return base_ptr; } diff --git a/src/core/hle/service/nvdrv/core/container.cpp b/src/core/hle/service/nvdrv/core/container.cpp index d2a632646..37ca24f5d 100644 --- a/src/core/hle/service/nvdrv/core/container.cpp +++ b/src/core/hle/service/nvdrv/core/container.cpp @@ -10,9 +10,11 @@ namespace Service::Nvidia::NvCore { struct ContainerImpl { - ContainerImpl(Tegra::Host1x::Host1x& host1x_) : file{host1x_}, manager{host1x_} {} + explicit ContainerImpl(Tegra::Host1x::Host1x& host1x_) + : file{host1x_}, manager{host1x_}, device_file_data{} {} NvMap file; SyncpointManager manager; + Container::Host1xDeviceFileData device_file_data; }; Container::Container(Tegra::Host1x::Host1x& host1x_) { @@ -29,6 +31,14 @@ const NvMap& Container::GetNvMapFile() const { return impl->file; } +Container::Host1xDeviceFileData& Container::Host1xDeviceFile() { + return impl->device_file_data; +} + +const Container::Host1xDeviceFileData& Container::Host1xDeviceFile() const { + return impl->device_file_data; +} + SyncpointManager& Container::GetSyncpointManager() { return impl->manager; } diff --git a/src/core/hle/service/nvdrv/core/container.h b/src/core/hle/service/nvdrv/core/container.h index 5c8b95803..b4b63ac90 100644 --- a/src/core/hle/service/nvdrv/core/container.h +++ b/src/core/hle/service/nvdrv/core/container.h @@ -4,15 +4,15 @@ #pragma once +#include #include +#include -namespace Tegra { +#include "core/hle/service/nvdrv/nvdata.h" -namespace Host1x { +namespace Tegra::Host1x { class Host1x; -} // namespace Host1x - -} // namespace Tegra +} // namespace Tegra::Host1x namespace Service::Nvidia::NvCore { @@ -23,7 +23,7 @@ struct ContainerImpl; class Container { public: - Container(Tegra::Host1x::Host1x& host1x); + explicit Container(Tegra::Host1x::Host1x& host1x); ~Container(); NvMap& GetNvMapFile(); @@ -34,6 +34,17 @@ public: const SyncpointManager& GetSyncpointManager() const; + struct Host1xDeviceFileData { + std::unordered_map fd_to_id{}; + std::deque syncpts_accumulated{}; + u32 nvdec_next_id{}; + u32 vic_next_id{}; + }; + + Host1xDeviceFileData& Host1xDeviceFile(); + + const Host1xDeviceFileData& Host1xDeviceFile() const; + private: std::unique_ptr impl; }; diff --git a/src/core/hle/service/nvdrv/core/nvmap.cpp b/src/core/hle/service/nvdrv/core/nvmap.cpp index e63ec7717..fbd8a74a5 100644 --- a/src/core/hle/service/nvdrv/core/nvmap.cpp +++ b/src/core/hle/service/nvdrv/core/nvmap.cpp @@ -119,7 +119,7 @@ std::shared_ptr NvMap::GetHandle(Handle::Id handle) { std::scoped_lock lock(handles_lock); try { return handles.at(handle); - } catch ([[maybe_unused]] std::out_of_range& e) { + } catch (std::out_of_range&) { return nullptr; } } @@ -128,7 +128,7 @@ VAddr NvMap::GetHandleAddress(Handle::Id handle) { std::scoped_lock lock(handles_lock); try { return handles.at(handle)->address; - } catch ([[maybe_unused]] std::out_of_range& e) { + } catch (std::out_of_range&) { return 0; } } diff --git a/src/core/hle/service/nvdrv/core/nvmap.h b/src/core/hle/service/nvdrv/core/nvmap.h index 6d6dac023..b9dd3801f 100644 --- a/src/core/hle/service/nvdrv/core/nvmap.h +++ b/src/core/hle/service/nvdrv/core/nvmap.h @@ -98,35 +98,6 @@ public: } }; -private: - std::list> unmap_queue{}; - std::mutex unmap_queue_lock{}; //!< Protects access to `unmap_queue` - - std::unordered_map> - handles{}; //!< Main owning map of handles - std::mutex handles_lock; //!< Protects access to `handles` - - static constexpr u32 HandleIdIncrement{ - 4}; //!< Each new handle ID is an increment of 4 from the previous - std::atomic next_handle_id{HandleIdIncrement}; - Tegra::Host1x::Host1x& host1x; - - void AddHandle(std::shared_ptr handle); - - /** - * @brief Unmaps and frees the SMMU memory region a handle is mapped to - * @note Both `unmap_queue_lock` and `handle_description.mutex` MUST be locked when calling this - */ - void UnmapHandle(Handle& handle_description); - - /** - * @brief Removes a handle from the map taking its dupes into account - * @note handle_description.mutex MUST be locked when calling this - * @return If the handle was removed from the map - */ - bool TryRemoveHandle(const Handle& handle_description); - -public: /** * @brief Encapsulates the result of a FreeHandle operation */ @@ -136,7 +107,7 @@ public: bool was_uncached; //!< If the handle was allocated as uncached }; - NvMap(Tegra::Host1x::Host1x& host1x); + explicit NvMap(Tegra::Host1x::Host1x& host1x); /** * @brief Creates an unallocated handle of the given size @@ -172,5 +143,33 @@ public: * describing the prior state of the handle */ std::optional FreeHandle(Handle::Id handle, bool internal_session); + +private: + std::list> unmap_queue{}; + std::mutex unmap_queue_lock{}; //!< Protects access to `unmap_queue` + + std::unordered_map> + handles{}; //!< Main owning map of handles + std::mutex handles_lock; //!< Protects access to `handles` + + static constexpr u32 HandleIdIncrement{ + 4}; //!< Each new handle ID is an increment of 4 from the previous + std::atomic next_handle_id{HandleIdIncrement}; + Tegra::Host1x::Host1x& host1x; + + void AddHandle(std::shared_ptr handle); + + /** + * @brief Unmaps and frees the SMMU memory region a handle is mapped to + * @note Both `unmap_queue_lock` and `handle_description.mutex` MUST be locked when calling this + */ + void UnmapHandle(Handle& handle_description); + + /** + * @brief Removes a handle from the map taking its dupes into account + * @note handle_description.mutex MUST be locked when calling this + * @return If the handle was removed from the map + */ + bool TryRemoveHandle(const Handle& handle_description); }; } // namespace Service::Nvidia::NvCore diff --git a/src/core/hle/service/nvdrv/core/syncpoint_manager.cpp b/src/core/hle/service/nvdrv/core/syncpoint_manager.cpp index 072b3a22f..eda2041a0 100644 --- a/src/core/hle/service/nvdrv/core/syncpoint_manager.cpp +++ b/src/core/hle/service/nvdrv/core/syncpoint_manager.cpp @@ -18,23 +18,23 @@ SyncpointManager::SyncpointManager(Tegra::Host1x::Host1x& host1x_) : host1x{host ReserveSyncpoint(VBlank0SyncpointId, true); ReserveSyncpoint(VBlank1SyncpointId, true); - for (u32 syncpointId : channel_syncpoints) { - if (syncpointId) { - ReserveSyncpoint(syncpointId, false); + for (u32 syncpoint_id : channel_syncpoints) { + if (syncpoint_id) { + ReserveSyncpoint(syncpoint_id, false); } } } SyncpointManager::~SyncpointManager() = default; -u32 SyncpointManager::ReserveSyncpoint(u32 id, bool clientManaged) { +u32 SyncpointManager::ReserveSyncpoint(u32 id, bool client_managed) { if (syncpoints.at(id).reserved) { ASSERT_MSG(false, "Requested syncpoint is in use"); return 0; } syncpoints.at(id).reserved = true; - syncpoints.at(id).interfaceManaged = clientManaged; + syncpoints.at(id).interface_managed = client_managed; return id; } @@ -49,9 +49,9 @@ u32 SyncpointManager::FindFreeSyncpoint() { return 0; } -u32 SyncpointManager::AllocateSyncpoint(bool clientManaged) { +u32 SyncpointManager::AllocateSyncpoint(bool client_managed) { std::lock_guard lock(reservation_lock); - return ReserveSyncpoint(FindFreeSyncpoint(), clientManaged); + return ReserveSyncpoint(FindFreeSyncpoint(), client_managed); } void SyncpointManager::FreeSyncpoint(u32 id) { @@ -64,7 +64,7 @@ bool SyncpointManager::IsSyncpointAllocated(u32 id) { return (id <= SyncpointCount) && syncpoints[id].reserved; } -bool SyncpointManager::HasSyncpointExpired(u32 id, u32 threshold) { +bool SyncpointManager::HasSyncpointExpired(u32 id, u32 threshold) const { const SyncpointInfo& syncpoint{syncpoints.at(id)}; if (!syncpoint.reserved) { @@ -74,10 +74,10 @@ bool SyncpointManager::HasSyncpointExpired(u32 id, u32 threshold) { // If the interface manages counters then we don't keep track of the maximum value as it handles // sanity checking the values then - if (syncpoint.interfaceManaged) { - return static_cast(syncpoint.counterMin - threshold) >= 0; + if (syncpoint.interface_managed) { + return static_cast(syncpoint.counter_min - threshold) >= 0; } else { - return (syncpoint.counterMax - threshold) >= (syncpoint.counterMin - threshold); + return (syncpoint.counter_max - threshold) >= (syncpoint.counter_min - threshold); } } @@ -87,7 +87,7 @@ u32 SyncpointManager::IncrementSyncpointMaxExt(u32 id, u32 amount) { return 0; } - return syncpoints.at(id).counterMax += amount; + return syncpoints.at(id).counter_max += amount; } u32 SyncpointManager::ReadSyncpointMinValue(u32 id) { @@ -96,7 +96,7 @@ u32 SyncpointManager::ReadSyncpointMinValue(u32 id) { return 0; } - return syncpoints.at(id).counterMin; + return syncpoints.at(id).counter_min; } u32 SyncpointManager::UpdateMin(u32 id) { @@ -105,8 +105,8 @@ u32 SyncpointManager::UpdateMin(u32 id) { return 0; } - syncpoints.at(id).counterMin = host1x.GetSyncpointManager().GetHostSyncpointValue(id); - return syncpoints.at(id).counterMin; + syncpoints.at(id).counter_min = host1x.GetSyncpointManager().GetHostSyncpointValue(id); + return syncpoints.at(id).counter_min; } NvFence SyncpointManager::GetSyncpointFence(u32 id) { @@ -115,7 +115,7 @@ NvFence SyncpointManager::GetSyncpointFence(u32 id) { return NvFence{}; } - return {.id = static_cast(id), .value = syncpoints.at(id).counterMax}; + return {.id = static_cast(id), .value = syncpoints.at(id).counter_max}; } } // namespace Service::Nvidia::NvCore diff --git a/src/core/hle/service/nvdrv/core/syncpoint_manager.h b/src/core/hle/service/nvdrv/core/syncpoint_manager.h index 6b71cd33d..b76ef9032 100644 --- a/src/core/hle/service/nvdrv/core/syncpoint_manager.h +++ b/src/core/hle/service/nvdrv/core/syncpoint_manager.h @@ -11,13 +11,9 @@ #include "common/common_types.h" #include "core/hle/service/nvdrv/nvdata.h" -namespace Tegra { - -namespace Host1x { +namespace Tegra::Host1x { class Host1x; -} // namespace Host1x - -} // namespace Tegra +} // namespace Tegra::Host1x namespace Service::Nvidia::NvCore { @@ -54,15 +50,15 @@ public: * @brief Finds a free syncpoint and reserves it * @return The ID of the reserved syncpoint */ - u32 AllocateSyncpoint(bool clientManaged); + u32 AllocateSyncpoint(bool client_managed); /** * @url * https://github.com/Jetson-TX1-AndroidTV/android_kernel_jetson_tx1_hdmi_primary/blob/8f74a72394efb871cb3f886a3de2998cd7ff2990/drivers/gpu/host1x/syncpt.c#L259 */ - bool HasSyncpointExpired(u32 id, u32 threshold); + bool HasSyncpointExpired(u32 id, u32 threshold) const; - bool IsFenceSignalled(NvFence fence) { + bool IsFenceSignalled(NvFence fence) const { return HasSyncpointExpired(fence.id, fence.value); } @@ -107,7 +103,7 @@ private: /** * @note reservation_lock should be locked when calling this */ - u32 ReserveSyncpoint(u32 id, bool clientManaged); + u32 ReserveSyncpoint(u32 id, bool client_managed); /** * @return The ID of the first free syncpoint @@ -115,15 +111,15 @@ private: u32 FindFreeSyncpoint(); struct SyncpointInfo { - std::atomic counterMin; //!< The least value the syncpoint can be (The value it was - //!< when it was last synchronized with host1x) - std::atomic counterMax; //!< The maximum value the syncpoint can reach according to the - //!< current usage - bool interfaceManaged; //!< If the syncpoint is managed by a host1x client interface, a - //!< client interface is a HW block that can handle host1x - //!< transactions on behalf of a host1x client (Which would otherwise - //!< need to be manually synced using PIO which is synchronous and - //!< requires direct cooperation of the CPU) + std::atomic counter_min; //!< The least value the syncpoint can be (The value it was + //!< when it was last synchronized with host1x) + std::atomic counter_max; //!< The maximum value the syncpoint can reach according to + //!< the current usage + bool interface_managed; //!< If the syncpoint is managed by a host1x client interface, a + //!< client interface is a HW block that can handle host1x + //!< transactions on behalf of a host1x client (Which would + //!< otherwise need to be manually synced using PIO which is + //!< synchronous and requires direct cooperation of the CPU) bool reserved; //!< If the syncpoint is reserved or not, not to be confused with a reserved //!< value }; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp index 192503ffc..6411dbf43 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp @@ -106,7 +106,7 @@ NvResult nvhost_as_gpu::AllocAsEx(const std::vector& input, std::vector& return NvResult::BadValue; } - if (!(params.big_page_size & VM::SUPPORTED_BIG_PAGE_SIZES)) { + if ((params.big_page_size & VM::SUPPORTED_BIG_PAGE_SIZES) == 0) { LOG_ERROR(Service_NVDRV, "Unsupported big page size: 0x{:X}!", params.big_page_size); return NvResult::BadValue; } @@ -124,12 +124,13 @@ NvResult nvhost_as_gpu::AllocAsEx(const std::vector& input, std::vector& vm.va_range_end = params.va_range_end; } - const u64 start_pages{vm.va_range_start >> VM::PAGE_SIZE_BITS}; - const u64 end_pages{vm.va_range_split >> VM::PAGE_SIZE_BITS}; + const auto start_pages{static_cast(vm.va_range_start >> VM::PAGE_SIZE_BITS)}; + const auto end_pages{static_cast(vm.va_range_split >> VM::PAGE_SIZE_BITS)}; vm.small_page_allocator = std::make_shared(start_pages, end_pages); - const u64 start_big_pages{vm.va_range_split >> vm.big_page_size_bits}; - const u64 end_big_pages{(vm.va_range_end - vm.va_range_split) >> vm.big_page_size_bits}; + const auto start_big_pages{static_cast(vm.va_range_split >> vm.big_page_size_bits)}; + const auto end_big_pages{ + static_cast((vm.va_range_end - vm.va_range_split) >> vm.big_page_size_bits)}; vm.big_page_allocator = std::make_unique(start_big_pages, end_big_pages); gmmu = std::make_shared(system, 40, vm.big_page_size_bits, @@ -210,10 +211,11 @@ void nvhost_as_gpu::FreeMappingLocked(u64 offset) { // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state // Only FreeSpace can unmap them fully - if (mapping->sparse_alloc) + if (mapping->sparse_alloc) { gmmu->MapSparse(offset, mapping->size, mapping->big_page); - else + } else { gmmu->Unmap(offset, mapping->size); + } mapping_map.erase(offset); } @@ -256,7 +258,7 @@ NvResult nvhost_as_gpu::FreeSpace(const std::vector& input, std::vector& allocator.Free(static_cast(params.offset >> page_size_bits), static_cast(allocation.size >> page_size_bits)); allocation_map.erase(params.offset); - } catch ([[maybe_unused]] const std::out_of_range& e) { + } catch (const std::out_of_range&) { return NvResult::BadValue; } @@ -351,7 +353,7 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector& input, std::vectorMap(gpu_address, cpu_address, params.mapping_size, mapping->big_page); return NvResult::Success; - } catch ([[maybe_unused]] const std::out_of_range& e) { + } catch (const std::out_of_range&) { LOG_WARNING(Service_NVDRV, "Cannot remap an unmapped GPU address space region: 0x{:X}", params.offset); return NvResult::BadValue; @@ -367,11 +369,11 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector& input, std::vectororig_size}; bool big_page{[&]() { - if (Common::IsAligned(handle->align, vm.big_page_size)) + if (Common::IsAligned(handle->align, vm.big_page_size)) { return true; - else if (Common::IsAligned(handle->align, VM::YUZU_PAGESIZE)) + } else if (Common::IsAligned(handle->align, VM::YUZU_PAGESIZE)) { return false; - else { + } else { ASSERT(false); return false; } @@ -450,7 +452,7 @@ NvResult nvhost_as_gpu::UnmapBuffer(const std::vector& input, std::vector& input, std::vectorinitiated) { + if (channel_state->initialized) { LOG_CRITICAL(Service_NVDRV, "Already allocated!"); return NvResult::AlreadyAllocated; } diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp index fed537039..1703f9cc3 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp @@ -5,13 +5,12 @@ #include "common/assert.h" #include "common/logging/log.h" #include "core/core.h" +#include "core/hle/service/nvdrv/core/container.h" #include "core/hle/service/nvdrv/devices/nvhost_nvdec.h" #include "video_core/renderer_base.h" namespace Service::Nvidia::Devices { -u32 nvhost_nvdec::next_id{}; - nvhost_nvdec::nvhost_nvdec(Core::System& system_, NvCore::Container& core_) : nvhost_nvdec_common{system_, core_, NvCore::ChannelType::NvDec} {} nvhost_nvdec::~nvhost_nvdec() = default; @@ -22,8 +21,9 @@ NvResult nvhost_nvdec::Ioctl1(DeviceFD fd, Ioctl command, const std::vector& case 0x0: switch (command.cmd) { case 0x1: { - if (!fd_to_id.contains(fd)) { - fd_to_id[fd] = next_id++; + auto& host1x_file = core.Host1xDeviceFile(); + if (!host1x_file.fd_to_id.contains(fd)) { + host1x_file.fd_to_id[fd] = host1x_file.nvdec_next_id++; } return Submit(fd, input, output); } @@ -74,8 +74,9 @@ void nvhost_nvdec::OnOpen(DeviceFD fd) { void nvhost_nvdec::OnClose(DeviceFD fd) { LOG_INFO(Service_NVDRV, "NVDEC video stream ended"); - const auto iter = fd_to_id.find(fd); - if (iter != fd_to_id.end()) { + auto& host1x_file = core.Host1xDeviceFile(); + const auto iter = host1x_file.fd_to_id.find(fd); + if (iter != host1x_file.fd_to_id.end()) { system.GPU().ClearCdmaInstance(iter->second); } system.AudioCore().SetNVDECActive(false); diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h index 3261ce1d4..c1b4e53e8 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h @@ -22,9 +22,6 @@ public: void OnOpen(DeviceFD fd) override; void OnClose(DeviceFD fd) override; - -private: - static u32 next_id; }; } // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp index 2ec1ad3e9..99eede702 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp @@ -46,13 +46,11 @@ std::size_t WriteVectors(std::vector& dst, const std::vector& src, std::s } } // Anonymous namespace -std::unordered_map nvhost_nvdec_common::fd_to_id{}; -std::deque nvhost_nvdec_common::syncpts_accumulated{}; - nvhost_nvdec_common::nvhost_nvdec_common(Core::System& system_, NvCore::Container& core_, NvCore::ChannelType channel_type_) : nvdevice{system_}, core{core_}, syncpoint_manager{core.GetSyncpointManager()}, nvmap{core.GetNvMapFile()}, channel_type{channel_type_} { + auto& syncpts_accumulated = core.Host1xDeviceFile().syncpts_accumulated; if (syncpts_accumulated.empty()) { channel_syncpoint = syncpoint_manager.AllocateSyncpoint(false); } else { @@ -60,8 +58,9 @@ nvhost_nvdec_common::nvhost_nvdec_common(Core::System& system_, NvCore::Containe syncpts_accumulated.pop_front(); } } + nvhost_nvdec_common::~nvhost_nvdec_common() { - syncpts_accumulated.push_back(channel_syncpoint); + core.Host1xDeviceFile().syncpts_accumulated.push_back(channel_syncpoint); } NvResult nvhost_nvdec_common::SetNVMAPfd(const std::vector& input) { @@ -108,7 +107,7 @@ NvResult nvhost_nvdec_common::Submit(DeviceFD fd, const std::vector& input, Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count); system.Memory().ReadBlock(object->address + cmd_buffer.offset, cmdlist.data(), cmdlist.size() * sizeof(u32)); - gpu.PushCommandBuffer(fd_to_id[fd], cmdlist); + gpu.PushCommandBuffer(core.Host1xDeviceFile().fd_to_id[fd], cmdlist); } std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmit)); // Some games expect command_buffers to be written back @@ -186,8 +185,4 @@ Kernel::KEvent* nvhost_nvdec_common::QueryEvent(u32 event_id) { return nullptr; } -void nvhost_nvdec_common::Reset() { - fd_to_id.clear(); -} - } // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h index 93990bb9b..fe76100c8 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h @@ -25,8 +25,6 @@ public: NvCore::ChannelType channel_type); ~nvhost_nvdec_common() override; - static void Reset(); - protected: struct IoctlSetNvmapFD { s32_le nvmap_fd{}; @@ -119,7 +117,6 @@ protected: Kernel::KEvent* QueryEvent(u32 event_id) override; - static std::unordered_map fd_to_id; u32 channel_syncpoint; s32_le nvmap_fd{}; u32_le submit_timeout{}; @@ -128,8 +125,6 @@ protected: NvCore::NvMap& nvmap; NvCore::ChannelType channel_type; std::array device_syncpoints{}; - - static std::deque syncpts_accumulated; }; }; // namespace Devices } // namespace Service::Nvidia diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp index 2e4ff988c..73f97136e 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp @@ -4,13 +4,12 @@ #include "common/assert.h" #include "common/logging/log.h" #include "core/core.h" +#include "core/hle/service/nvdrv/core/container.h" #include "core/hle/service/nvdrv/devices/nvhost_vic.h" #include "video_core/renderer_base.h" namespace Service::Nvidia::Devices { -u32 nvhost_vic::next_id{}; - nvhost_vic::nvhost_vic(Core::System& system_, NvCore::Container& core_) : nvhost_nvdec_common{system_, core_, NvCore::ChannelType::VIC} {} @@ -21,11 +20,13 @@ NvResult nvhost_vic::Ioctl1(DeviceFD fd, Ioctl command, const std::vector& i switch (command.group) { case 0x0: switch (command.cmd) { - case 0x1: - if (!fd_to_id.contains(fd)) { - fd_to_id[fd] = next_id++; + case 0x1: { + auto& host1x_file = core.Host1xDeviceFile(); + if (!host1x_file.fd_to_id.contains(fd)) { + host1x_file.fd_to_id[fd] = host1x_file.vic_next_id++; } return Submit(fd, input, output); + } case 0x2: return GetSyncpoint(input, output); case 0x3: @@ -69,8 +70,9 @@ NvResult nvhost_vic::Ioctl3(DeviceFD fd, Ioctl command, const std::vector& i void nvhost_vic::OnOpen(DeviceFD fd) {} void nvhost_vic::OnClose(DeviceFD fd) { - const auto iter = fd_to_id.find(fd); - if (iter != fd_to_id.end()) { + auto& host1x_file = core.Host1xDeviceFile(); + const auto iter = host1x_file.fd_to_id.find(fd); + if (iter != host1x_file.fd_to_id.end()) { system.GPU().ClearCdmaInstance(iter->second); } } diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.h b/src/core/hle/service/nvdrv/devices/nvhost_vic.h index 59e23b41e..f164caafb 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_vic.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.h @@ -21,8 +21,5 @@ public: void OnOpen(DeviceFD fd) override; void OnClose(DeviceFD fd) override; - -private: - static u32 next_id; }; } // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvmap.h b/src/core/hle/service/nvdrv/devices/nvmap.h index 52e1d7cff..e9bfd0358 100644 --- a/src/core/hle/service/nvdrv/devices/nvmap.h +++ b/src/core/hle/service/nvdrv/devices/nvmap.h @@ -23,8 +23,8 @@ public: explicit nvmap(Core::System& system_, NvCore::Container& container); ~nvmap() override; - nvmap(nvmap const&) = delete; - nvmap& operator=(nvmap const&) = delete; + nvmap(const nvmap&) = delete; + nvmap& operator=(const nvmap&) = delete; NvResult Ioctl1(DeviceFD fd, Ioctl command, const std::vector& input, std::vector& output) override; diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp index 7929443d2..5e7b7468f 100644 --- a/src/core/hle/service/nvdrv/nvdrv.cpp +++ b/src/core/hle/service/nvdrv/nvdrv.cpp @@ -101,9 +101,7 @@ Module::Module(Core::System& system) }; } -Module::~Module() { - Devices::nvhost_nvdec_common::Reset(); -} +Module::~Module() {} NvResult Module::VerifyFD(DeviceFD fd) const { if (fd < 0) { diff --git a/src/core/hle/service/nvdrv/nvdrv.h b/src/core/hle/service/nvdrv/nvdrv.h index a2aeb80b4..146d046a9 100644 --- a/src/core/hle/service/nvdrv/nvdrv.h +++ b/src/core/hle/service/nvdrv/nvdrv.h @@ -46,7 +46,7 @@ class Module; class EventInterface { public: - EventInterface(Module& module_); + explicit EventInterface(Module& module_); ~EventInterface(); Kernel::KEvent* CreateEvent(std::string name); diff --git a/src/video_core/control/channel_state.cpp b/src/video_core/control/channel_state.cpp index b04922ac0..cdecc3a91 100644 --- a/src/video_core/control/channel_state.cpp +++ b/src/video_core/control/channel_state.cpp @@ -14,10 +14,7 @@ namespace Tegra::Control { -ChannelState::ChannelState(s32 bind_id_) { - bind_id = bind_id_; - initiated = false; -} +ChannelState::ChannelState(s32 bind_id_) : bind_id{bind_id_}, initialized{} {} void ChannelState::Init(Core::System& system, GPU& gpu) { ASSERT(memory_manager); @@ -27,7 +24,7 @@ void ChannelState::Init(Core::System& system, GPU& gpu) { kepler_compute = std::make_unique(system, *memory_manager); maxwell_dma = std::make_unique(system, *memory_manager); kepler_memory = std::make_unique(system, *memory_manager); - initiated = true; + initialized = true; } void ChannelState::BindRasterizer(VideoCore::RasterizerInterface* rasterizer) { diff --git a/src/video_core/control/channel_state.h b/src/video_core/control/channel_state.h index 305b21cba..3a7b9872c 100644 --- a/src/video_core/control/channel_state.h +++ b/src/video_core/control/channel_state.h @@ -34,7 +34,7 @@ class DmaPusher; namespace Control { struct ChannelState { - ChannelState(s32 bind_id); + explicit ChannelState(s32 bind_id); ChannelState(const ChannelState& state) = delete; ChannelState& operator=(const ChannelState&) = delete; ChannelState(ChannelState&& other) noexcept = default; @@ -60,7 +60,7 @@ struct ChannelState { std::unique_ptr dma_pusher; - bool initiated{}; + bool initialized{}; }; } // namespace Control diff --git a/src/video_core/control/channel_state_cache.h b/src/video_core/control/channel_state_cache.h index 5246192a8..584a0c26c 100644 --- a/src/video_core/control/channel_state_cache.h +++ b/src/video_core/control/channel_state_cache.h @@ -32,7 +32,7 @@ namespace VideoCommon { class ChannelInfo { public: ChannelInfo() = delete; - ChannelInfo(Tegra::Control::ChannelState& state); + explicit ChannelInfo(Tegra::Control::ChannelState& state); ChannelInfo(const ChannelInfo& state) = delete; ChannelInfo& operator=(const ChannelInfo&) = delete; ChannelInfo(ChannelInfo&& other) = default; diff --git a/src/video_core/control/scheduler.cpp b/src/video_core/control/scheduler.cpp index 733042690..f7cbe204e 100644 --- a/src/video_core/control/scheduler.cpp +++ b/src/video_core/control/scheduler.cpp @@ -3,6 +3,7 @@ #include +#include "common/assert.h" #include "video_core/control/channel_state.h" #include "video_core/control/scheduler.h" #include "video_core/gpu.h" @@ -13,8 +14,9 @@ Scheduler::Scheduler(GPU& gpu_) : gpu{gpu_} {} Scheduler::~Scheduler() = default; void Scheduler::Push(s32 channel, CommandList&& entries) { - std::unique_lock lk(scheduling_guard); + std::unique_lock lk(scheduling_guard); auto it = channels.find(channel); + ASSERT(it != channels.end()); auto channel_state = it->second; gpu.BindChannel(channel_state->bind_id); channel_state->dma_pusher->Push(std::move(entries)); @@ -23,7 +25,7 @@ void Scheduler::Push(s32 channel, CommandList&& entries) { void Scheduler::DeclareChannel(std::shared_ptr new_channel) { s32 channel = new_channel->bind_id; - std::unique_lock lk(scheduling_guard); + std::unique_lock lk(scheduling_guard); channels.emplace(channel, new_channel); } diff --git a/src/video_core/control/scheduler.h b/src/video_core/control/scheduler.h index 305a01e0a..44addf61c 100644 --- a/src/video_core/control/scheduler.h +++ b/src/video_core/control/scheduler.h @@ -19,7 +19,7 @@ struct ChannelState; class Scheduler { public: - Scheduler(GPU& gpu_); + explicit Scheduler(GPU& gpu_); ~Scheduler(); void Push(s32 channel, CommandList&& entries); diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 9c5d567a6..bc48320ce 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -195,7 +195,7 @@ public: BitField<24, 2, u32> num_dst_components_minus_one; }; - Swizzle GetComponent(size_t i) { + Swizzle GetComponent(size_t i) const { const u32 raw = dst_components_raw; return static_cast((raw >> (i * 3)) & 0x7); } diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index d7a3dd96b..28b38273e 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -355,7 +355,7 @@ struct GPU::Impl { std::condition_variable sync_cv; - std::list> sync_requests; + std::list> sync_requests; std::atomic current_sync_fence{}; u64 last_sync_fence{}; std::mutex sync_request_mutex; diff --git a/src/video_core/host1x/host1x.h b/src/video_core/host1x/host1x.h index 7ecf853d9..57082ae54 100644 --- a/src/video_core/host1x/host1x.h +++ b/src/video_core/host1x/host1x.h @@ -19,7 +19,7 @@ namespace Host1x { class Host1x { public: - Host1x(Core::System& system); + explicit Host1x(Core::System& system); SyncpointManager& GetSyncpointManager() { return syncpoint_manager; diff --git a/src/video_core/host1x/syncpoint_manager.cpp b/src/video_core/host1x/syncpoint_manager.cpp index 4471bacae..326e8355a 100644 --- a/src/video_core/host1x/syncpoint_manager.cpp +++ b/src/video_core/host1x/syncpoint_manager.cpp @@ -12,13 +12,13 @@ MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); SyncpointManager::ActionHandle SyncpointManager::RegisterAction( std::atomic& syncpoint, std::list& action_storage, u32 expected_value, - std::function& action) { + std::function&& action) { if (syncpoint.load(std::memory_order_acquire) >= expected_value) { action(); return {}; } - std::unique_lock lk(guard); + std::unique_lock lk(guard); if (syncpoint.load(std::memory_order_relaxed) >= expected_value) { action(); return {}; @@ -30,12 +30,12 @@ SyncpointManager::ActionHandle SyncpointManager::RegisterAction( } ++it; } - return action_storage.emplace(it, expected_value, action); + return action_storage.emplace(it, expected_value, std::move(action)); } void SyncpointManager::DeregisterAction(std::list& action_storage, ActionHandle& handle) { - std::unique_lock lk(guard); + std::unique_lock lk(guard); action_storage.erase(handle); } @@ -68,7 +68,7 @@ void SyncpointManager::Increment(std::atomic& syncpoint, std::condition_var std::list& action_storage) { auto new_value{syncpoint.fetch_add(1, std::memory_order_acq_rel) + 1}; - std::unique_lock lk(guard); + std::unique_lock lk(guard); auto it = action_storage.begin(); while (it != action_storage.end()) { if (it->expected_value > new_value) { @@ -87,7 +87,7 @@ void SyncpointManager::Wait(std::atomic& syncpoint, std::condition_variable return; } - std::unique_lock lk(guard); + std::unique_lock lk(guard); wait_cv.wait(lk, pred); } diff --git a/src/video_core/host1x/syncpoint_manager.h b/src/video_core/host1x/syncpoint_manager.h index 72220a09a..50a264e23 100644 --- a/src/video_core/host1x/syncpoint_manager.h +++ b/src/video_core/host1x/syncpoint_manager.h @@ -18,34 +18,34 @@ namespace Host1x { class SyncpointManager { public: - u32 GetGuestSyncpointValue(u32 id) { + u32 GetGuestSyncpointValue(u32 id) const { return syncpoints_guest[id].load(std::memory_order_acquire); } - u32 GetHostSyncpointValue(u32 id) { + u32 GetHostSyncpointValue(u32 id) const { return syncpoints_host[id].load(std::memory_order_acquire); } struct RegisteredAction { - RegisteredAction(u32 expected_value_, std::function& action_) - : expected_value{expected_value_}, action{action_} {} + explicit RegisteredAction(u32 expected_value_, std::function&& action_) + : expected_value{expected_value_}, action{std::move(action_)} {} u32 expected_value; - std::function action; + std::function action; }; using ActionHandle = std::list::iterator; template ActionHandle RegisterGuestAction(u32 syncpoint_id, u32 expected_value, Func&& action) { - std::function func(action); + std::function func(action); return RegisterAction(syncpoints_guest[syncpoint_id], guest_action_storage[syncpoint_id], - expected_value, func); + expected_value, std::move(func)); } template ActionHandle RegisterHostAction(u32 syncpoint_id, u32 expected_value, Func&& action) { - std::function func(action); + std::function func(action); return RegisterAction(syncpoints_host[syncpoint_id], host_action_storage[syncpoint_id], - expected_value, func); + expected_value, std::move(func)); } void DeregisterGuestAction(u32 syncpoint_id, ActionHandle& handle); @@ -60,11 +60,11 @@ public: void WaitHost(u32 syncpoint_id, u32 expected_value); - bool IsReadyGuest(u32 syncpoint_id, u32 expected_value) { + bool IsReadyGuest(u32 syncpoint_id, u32 expected_value) const { return syncpoints_guest[syncpoint_id].load(std::memory_order_acquire) >= expected_value; } - bool IsReadyHost(u32 syncpoint_id, u32 expected_value) { + bool IsReadyHost(u32 syncpoint_id, u32 expected_value) const { return syncpoints_host[syncpoint_id].load(std::memory_order_acquire) >= expected_value; } @@ -74,7 +74,7 @@ private: ActionHandle RegisterAction(std::atomic& syncpoint, std::list& action_storage, u32 expected_value, - std::function& action); + std::function&& action); void DeregisterAction(std::list& action_storage, ActionHandle& handle); diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index ae4fd98df..f992e29f3 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -126,7 +126,7 @@ private: void WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size); template - [[nodiscard]] inline std::size_t PageEntryIndex(GPUVAddr gpu_addr) const { + [[nodiscard]] std::size_t PageEntryIndex(GPUVAddr gpu_addr) const { if constexpr (is_big_page) { return (gpu_addr >> big_page_bits) & big_page_table_mask; } else { -- cgit v1.2.3