From 47d0d292d5cc5f0404e126023279db7decd532ac Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 28 Jun 2023 06:28:13 +0200 Subject: MemoryTracking: Initial setup of atomic writes. --- src/core/gpu_dirty_memory_manager.h | 112 ++++++++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 src/core/gpu_dirty_memory_manager.h (limited to 'src/core/gpu_dirty_memory_manager.h') diff --git a/src/core/gpu_dirty_memory_manager.h b/src/core/gpu_dirty_memory_manager.h new file mode 100644 index 000000000..9c3d41d11 --- /dev/null +++ b/src/core/gpu_dirty_memory_manager.h @@ -0,0 +1,112 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "core/memory.h" + +namespace Core { + +class GPUDirtyMemoryManager { +public: + GPUDirtyMemoryManager() : current{default_transform} {} + + ~GPUDirtyMemoryManager() = default; + + void Collect(VAddr address, size_t size) { + TransformAddress t = BuildTransform(address, size); + TransformAddress tmp, original; + do { + tmp = current.load(std::memory_order_acquire); + original = tmp; + if (tmp.address != t.address) { + if (IsValid(tmp.address)) { + std::scoped_lock lk(guard); + back_buffer.emplace_back(tmp); + current.exchange(t, std::memory_order_relaxed); + return; + } + tmp.address = t.address; + tmp.mask = 0; + } + if ((tmp.mask | t.mask) == tmp.mask) { + return; + } + tmp.mask |= t.mask; + } while (!current.compare_exchange_weak(original, tmp, std::memory_order_release, + std::memory_order_relaxed)); + } + + void Gather(std::function& callback) { + { + std::scoped_lock lk(guard); + TransformAddress t = current.exchange(default_transform, std::memory_order_relaxed); + front_buffer.swap(back_buffer); + if (IsValid(t.address)) { + front_buffer.emplace_back(t); + } + } + for (auto& transform : front_buffer) { + size_t offset = 0; + u64 mask = transform.mask; + while (mask != 0) { + const size_t empty_bits = std::countr_zero(mask); + offset += empty_bits << align_bits; + mask = mask >> empty_bits; + + const size_t continuous_bits = std::countr_one(mask); + callback((transform.address << Memory::YUZU_PAGEBITS) + offset, + continuous_bits << align_bits); + mask = continuous_bits < align_size ? (mask >> continuous_bits) : 0; + offset += continuous_bits << align_bits; + } + } + front_buffer.clear(); + } + +private: + struct alignas(16) TransformAddress { + VAddr address; + u64 mask; + }; + + constexpr static size_t align_bits = 6U; + constexpr static size_t align_size = 1U << align_bits; + constexpr static size_t align_mask = align_size - 1; + constexpr static TransformAddress default_transform = {.address = ~0ULL, .mask = 0ULL}; + + bool IsValid(VAddr address) { + return address < (1ULL << 39); + } + + template + T CreateMask(size_t top_bit, size_t minor_bit) { + T mask = ~T(0); + mask <<= (sizeof(T) * 8 - top_bit); + mask >>= (sizeof(T) * 8 - top_bit); + mask >>= minor_bit; + mask <<= minor_bit; + return mask; + } + + TransformAddress BuildTransform(VAddr address, size_t size) { + const size_t minor_address = address & Memory::YUZU_PAGEMASK; + const size_t minor_bit = minor_address >> align_bits; + const size_t top_bit = (minor_address + size + align_mask) >> align_bits; + TransformAddress result{}; + result.address = address >> Memory::YUZU_PAGEBITS; + result.mask = CreateMask(top_bit, minor_bit); + return result; + } + + std::atomic current{}; + std::mutex guard; + std::vector back_buffer; + std::vector front_buffer; +}; + +} // namespace Core -- cgit v1.2.3 From da440da9f54cc860f3c69da685a415d5ec9d7b64 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 28 Jun 2023 19:32:50 +0200 Subject: Memory Tracking: Optimize tracking to only use atomic writes when contested with the host GPU --- src/core/gpu_dirty_memory_manager.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'src/core/gpu_dirty_memory_manager.h') diff --git a/src/core/gpu_dirty_memory_manager.h b/src/core/gpu_dirty_memory_manager.h index 9c3d41d11..789b7530f 100644 --- a/src/core/gpu_dirty_memory_manager.h +++ b/src/core/gpu_dirty_memory_manager.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + #pragma once #include @@ -59,8 +62,7 @@ public: mask = mask >> empty_bits; const size_t continuous_bits = std::countr_one(mask); - callback((transform.address << Memory::YUZU_PAGEBITS) + offset, - continuous_bits << align_bits); + callback((transform.address << page_bits) + offset, continuous_bits << align_bits); mask = continuous_bits < align_size ? (mask >> continuous_bits) : 0; offset += continuous_bits << align_bits; } @@ -74,6 +76,10 @@ private: u64 mask; }; + constexpr static size_t page_bits = Memory::YUZU_PAGEBITS; + constexpr static size_t page_size = 1ULL << page_bits; + constexpr static size_t page_mask = page_size - 1; + constexpr static size_t align_bits = 6U; constexpr static size_t align_size = 1U << align_bits; constexpr static size_t align_mask = align_size - 1; @@ -94,11 +100,11 @@ private: } TransformAddress BuildTransform(VAddr address, size_t size) { - const size_t minor_address = address & Memory::YUZU_PAGEMASK; + const size_t minor_address = address & page_mask; const size_t minor_bit = minor_address >> align_bits; const size_t top_bit = (minor_address + size + align_mask) >> align_bits; TransformAddress result{}; - result.address = address >> Memory::YUZU_PAGEBITS; + result.address = address >> page_bits; result.mask = CreateMask(top_bit, minor_bit); return result; } -- cgit v1.2.3 From 0e6b559c98e3dee54c3c9eaef2d3e59f3871882d Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 29 Jun 2023 12:24:56 +0200 Subject: Memory Tracker: Use 64 bit atomics instead of 128 bits --- src/core/gpu_dirty_memory_manager.h | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'src/core/gpu_dirty_memory_manager.h') diff --git a/src/core/gpu_dirty_memory_manager.h b/src/core/gpu_dirty_memory_manager.h index 789b7530f..9687531e8 100644 --- a/src/core/gpu_dirty_memory_manager.h +++ b/src/core/gpu_dirty_memory_manager.h @@ -16,7 +16,10 @@ namespace Core { class GPUDirtyMemoryManager { public: - GPUDirtyMemoryManager() : current{default_transform} {} + GPUDirtyMemoryManager() : current{default_transform} { + back_buffer.reserve(256); + front_buffer.reserve(256); + } ~GPUDirtyMemoryManager() = default; @@ -62,7 +65,8 @@ public: mask = mask >> empty_bits; const size_t continuous_bits = std::countr_one(mask); - callback((transform.address << page_bits) + offset, continuous_bits << align_bits); + callback((static_cast(transform.address) << page_bits) + offset, + continuous_bits << align_bits); mask = continuous_bits < align_size ? (mask >> continuous_bits) : 0; offset += continuous_bits << align_bits; } @@ -71,19 +75,19 @@ public: } private: - struct alignas(16) TransformAddress { - VAddr address; - u64 mask; + struct alignas(8) TransformAddress { + u32 address; + u32 mask; }; - constexpr static size_t page_bits = Memory::YUZU_PAGEBITS; + constexpr static size_t page_bits = Memory::YUZU_PAGEBITS - 1; constexpr static size_t page_size = 1ULL << page_bits; constexpr static size_t page_mask = page_size - 1; constexpr static size_t align_bits = 6U; constexpr static size_t align_size = 1U << align_bits; constexpr static size_t align_mask = align_size - 1; - constexpr static TransformAddress default_transform = {.address = ~0ULL, .mask = 0ULL}; + constexpr static TransformAddress default_transform = {.address = ~0U, .mask = 0U}; bool IsValid(VAddr address) { return address < (1ULL << 39); @@ -104,8 +108,8 @@ private: const size_t minor_bit = minor_address >> align_bits; const size_t top_bit = (minor_address + size + align_mask) >> align_bits; TransformAddress result{}; - result.address = address >> page_bits; - result.mask = CreateMask(top_bit, minor_bit); + result.address = static_cast(address >> page_bits); + result.mask = CreateMask(top_bit, minor_bit); return result; } -- cgit v1.2.3