From 6c7eb81f7d871f5c08a4844471633a67725aae73 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 4 Jan 2023 22:05:20 -0500 Subject: video_core: Cache GPU internal writes. --- src/video_core/memory_manager.cpp | 62 ++++++++++++++++++++++++++++++--------- 1 file changed, 48 insertions(+), 14 deletions(-) (limited to 'src/video_core/memory_manager.cpp') diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 3a5cdeb39..83924475b 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -11,6 +11,7 @@ #include "core/hle/kernel/k_page_table.h" #include "core/hle/kernel/k_process.h" #include "core/memory.h" +#include "video_core/invalidation_accumulator.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_base.h" @@ -26,7 +27,8 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38, page_bits != big_page_bits ? page_bits : 0}, kind_map{PTEKind::INVALID}, unique_identifier{unique_identifier_generator.fetch_add( - 1, std::memory_order_acq_rel)} { + 1, std::memory_order_acq_rel)}, + accumulator{std::make_unique()} { address_space_size = 1ULL << address_space_bits; page_size = 1ULL << page_bits; page_mask = page_size - 1ULL; @@ -185,15 +187,12 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { if (size == 0) { return; } - const auto submapped_ranges = GetSubmappedRange(gpu_addr, size); + GetSubmappedRangeImpl(gpu_addr, size, page_stash); - for (const auto& [map_addr, map_size] : submapped_ranges) { - // Flush and invalidate through the GPU interface, to be asynchronous if possible. - const std::optional cpu_addr = GpuToCpuAddress(map_addr); - ASSERT(cpu_addr); - - rasterizer->UnmapMemory(*cpu_addr, map_size); + for (const auto& [map_addr, map_size] : page_stash) { + rasterizer->UnmapMemory(map_addr, map_size); } + page_stash.clear(); BigPageTableOp(gpu_addr, 0, size, PTEKind::INVALID); PageTableOp(gpu_addr, 0, size, PTEKind::INVALID); @@ -454,6 +453,12 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buf WriteBlockImpl(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None); } +void MemoryManager::WriteBlockCached(GPUVAddr gpu_dest_addr, const void* src_buffer, + std::size_t size) { + WriteBlockImpl(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None); + accumulator->Add(gpu_dest_addr, size); +} + void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size, VideoCommon::CacheType which) const { auto do_nothing = [&]([[maybe_unused]] std::size_t page_index, @@ -663,7 +668,17 @@ bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) cons std::vector> MemoryManager::GetSubmappedRange( GPUVAddr gpu_addr, std::size_t size) const { std::vector> result{}; - std::optional> last_segment{}; + GetSubmappedRangeImpl(gpu_addr, size, result); + return result; +} + +template +void MemoryManager::GetSubmappedRangeImpl( + GPUVAddr gpu_addr, std::size_t size, + std::vector, std::size_t>>& + result) const { + std::optional, std::size_t>> + last_segment{}; std::optional old_page_addr{}; const auto split = [&last_segment, &result]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, @@ -685,8 +700,12 @@ std::vector> MemoryManager::GetSubmappedRange( } old_page_addr = {cpu_addr_base + copy_amount}; if (!last_segment) { - const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset; - last_segment = {new_base_addr, copy_amount}; + if constexpr (is_gpu_address) { + const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset; + last_segment = {new_base_addr, copy_amount}; + } else { + last_segment = {cpu_addr_base, copy_amount}; + } } else { last_segment->second += copy_amount; } @@ -703,8 +722,12 @@ std::vector> MemoryManager::GetSubmappedRange( } old_page_addr = {cpu_addr_base + copy_amount}; if (!last_segment) { - const GPUVAddr new_base_addr = (page_index << page_bits) + offset; - last_segment = {new_base_addr, copy_amount}; + if constexpr (is_gpu_address) { + const GPUVAddr new_base_addr = (page_index << page_bits) + offset; + last_segment = {new_base_addr, copy_amount}; + } else { + last_segment = {cpu_addr_base, copy_amount}; + } } else { last_segment->second += copy_amount; } @@ -715,7 +738,18 @@ std::vector> MemoryManager::GetSubmappedRange( }; MemoryOperation(gpu_addr, size, extend_size_big, split, do_short_pages); split(0, 0, 0); - return result; +} + +void MemoryManager::FlushCaching() { + if (!accumulator->AnyAccumulated()) { + return; + } + accumulator->Callback([this](GPUVAddr addr, size_t size) { + GetSubmappedRangeImpl(addr, size, page_stash); + }); + rasterizer->InnerInvalidation(page_stash); + page_stash.clear(); + accumulator->Clear(); } } // namespace Tegra -- cgit v1.2.3 From af5ecb0b15d4449f58434e70eed835cf71fc5527 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 5 Jan 2023 06:06:33 -0500 Subject: MemoryManager: use fastmem directly. --- src/video_core/memory_manager.cpp | 40 ++++++++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 9 deletions(-) (limited to 'src/video_core/memory_manager.cpp') diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 83924475b..0a6390054 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -6,6 +6,7 @@ #include "common/alignment.h" #include "common/assert.h" #include "common/logging/log.h" +#include "common/settings.h" #include "core/core.h" #include "core/device_memory.h" #include "core/hle/kernel/k_page_table.h" @@ -45,6 +46,11 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 big_page_table_cpu.resize(big_page_table_size); big_page_continous.resize(big_page_table_size / continous_bits, 0); entries.resize(page_table_size / 32, 0); + if (!Settings::IsGPULevelExtreme()) { + fastmem_arena = system.DeviceMemory().buffer.VirtualBasePointer(); + } else { + fastmem_arena = nullptr; + } } MemoryManager::~MemoryManager() = default; @@ -354,7 +360,7 @@ inline void MemoryManager::MemoryOperation(GPUVAddr gpu_src_addr, std::size_t si } } -template +template void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, [[maybe_unused]] VideoCommon::CacheType which) const { auto set_to_zero = [&]([[maybe_unused]] std::size_t page_index, @@ -368,8 +374,12 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std: if constexpr (is_safe) { rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); } - u8* physical = memory.GetPointer(cpu_addr_base); - std::memcpy(dest_buffer, physical, copy_amount); + if constexpr (use_fastmem) { + std::memcpy(dest_buffer, &fastmem_arena[cpu_addr_base], copy_amount); + } else { + u8* physical = memory.GetPointer(cpu_addr_base); + std::memcpy(dest_buffer, physical, copy_amount); + } dest_buffer = static_cast(dest_buffer) + copy_amount; }; auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { @@ -378,11 +388,15 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std: if constexpr (is_safe) { rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); } - if (!IsBigPageContinous(page_index)) [[unlikely]] { - memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount); + if constexpr (use_fastmem) { + std::memcpy(dest_buffer, &fastmem_arena[cpu_addr_base], copy_amount); } else { - u8* physical = memory.GetPointer(cpu_addr_base); - std::memcpy(dest_buffer, physical, copy_amount); + if (!IsBigPageContinous(page_index)) [[unlikely]] { + memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount); + } else { + u8* physical = memory.GetPointer(cpu_addr_base); + std::memcpy(dest_buffer, physical, copy_amount); + } } dest_buffer = static_cast(dest_buffer) + copy_amount; }; @@ -396,12 +410,20 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std: void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, VideoCommon::CacheType which) const { - ReadBlockImpl(gpu_src_addr, dest_buffer, size, which); + if (fastmem_arena) [[likely]] { + ReadBlockImpl(gpu_src_addr, dest_buffer, size, which); + return; + } + ReadBlockImpl(gpu_src_addr, dest_buffer, size, which); } void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, const std::size_t size) const { - ReadBlockImpl(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None); + if (fastmem_arena) [[likely]] { + ReadBlockImpl(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None); + return; + } + ReadBlockImpl(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None); } template -- cgit v1.2.3 From b56ad93bbc9ac38820c1e1cb4b03256dd50aa17a Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 5 Jan 2023 06:43:54 -0500 Subject: BufferBase: Don't ignore GPU pages. --- src/video_core/memory_manager.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/memory_manager.cpp') diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 0a6390054..3bcae3503 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -46,7 +46,7 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 big_page_table_cpu.resize(big_page_table_size); big_page_continous.resize(big_page_table_size / continous_bits, 0); entries.resize(page_table_size / 32, 0); - if (!Settings::IsGPULevelExtreme()) { + if (!Settings::IsGPULevelExtreme() && Settings::IsFastmemEnabled()) { fastmem_arena = system.DeviceMemory().buffer.VirtualBasePointer(); } else { fastmem_arena = nullptr; -- cgit v1.2.3