diff options
Diffstat (limited to 'src/video_core/buffer_cache')
| -rw-r--r-- | src/video_core/buffer_cache/buffer_block.h | 27 | ||||
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 570 | ||||
| -rw-r--r-- | src/video_core/buffer_cache/map_interval.cpp | 33 | ||||
| -rw-r--r-- | src/video_core/buffer_cache/map_interval.h | 113 |
4 files changed, 444 insertions, 299 deletions
diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h index e35ee0b67..e64170e66 100644 --- a/src/video_core/buffer_cache/buffer_block.h +++ b/src/video_core/buffer_cache/buffer_block.h @@ -15,48 +15,47 @@ namespace VideoCommon { class BufferBlock { public: - bool Overlaps(const VAddr start, const VAddr end) const { + bool Overlaps(VAddr start, VAddr end) const { return (cpu_addr < end) && (cpu_addr_end > start); } - bool IsInside(const VAddr other_start, const VAddr other_end) const { + bool IsInside(VAddr other_start, VAddr other_end) const { return cpu_addr <= other_start && other_end <= cpu_addr_end; } - std::size_t GetOffset(const VAddr in_addr) { + std::size_t Offset(VAddr in_addr) const { return static_cast<std::size_t>(in_addr - cpu_addr); } - VAddr GetCpuAddr() const { + VAddr CpuAddr() const { return cpu_addr; } - VAddr GetCpuAddrEnd() const { + VAddr CpuAddrEnd() const { return cpu_addr_end; } - void SetCpuAddr(const VAddr new_addr) { + void SetCpuAddr(VAddr new_addr) { cpu_addr = new_addr; cpu_addr_end = new_addr + size; } - std::size_t GetSize() const { + std::size_t Size() const { return size; } - void SetEpoch(u64 new_epoch) { - epoch = new_epoch; + u64 Epoch() const { + return epoch; } - u64 GetEpoch() { - return epoch; + void SetEpoch(u64 new_epoch) { + epoch = new_epoch; } protected: - explicit BufferBlock(VAddr cpu_addr, const std::size_t size) : size{size} { - SetCpuAddr(cpu_addr); + explicit BufferBlock(VAddr cpu_addr_, std::size_t size_) : size{size_} { + SetCpuAddr(cpu_addr_); } - ~BufferBlock() = default; private: VAddr cpu_addr{}; diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index b57c0d4d4..e7edd733f 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -4,7 +4,7 @@ #pragma once -#include <array> +#include <list> #include <memory> #include <mutex> #include <unordered_map> @@ -12,14 +12,17 @@ #include <utility> #include <vector> -#include <boost/icl/interval_map.hpp> +#include <boost/container/small_vector.hpp> #include <boost/icl/interval_set.hpp> -#include <boost/range/iterator_range.hpp> +#include <boost/intrusive/set.hpp> #include "common/alignment.h" +#include "common/assert.h" #include "common/common_types.h" +#include "common/logging/log.h" #include "core/core.h" #include "core/memory.h" +#include "core/settings.h" #include "video_core/buffer_cache/buffer_block.h" #include "video_core/buffer_cache/map_interval.h" #include "video_core/memory_manager.h" @@ -27,105 +30,122 @@ namespace VideoCommon { -using MapInterval = std::shared_ptr<MapIntervalBase>; - -template <typename TBuffer, typename TBufferType, typename StreamBuffer> +template <typename Buffer, typename BufferType, typename StreamBuffer> class BufferCache { + using IntervalSet = boost::icl::interval_set<VAddr>; + using IntervalType = typename IntervalSet::interval_type; + using VectorMapInterval = boost::container::small_vector<MapInterval*, 1>; + + static constexpr u64 WRITE_PAGE_BIT = 11; + static constexpr u64 BLOCK_PAGE_BITS = 21; + static constexpr u64 BLOCK_PAGE_SIZE = 1ULL << BLOCK_PAGE_BITS; + public: - using BufferInfo = std::pair<const TBufferType*, u64>; + struct BufferInfo { + BufferType handle; + u64 offset; + u64 address; + }; BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, bool is_written = false, bool use_fast_cbuf = false) { std::lock_guard lock{mutex}; - const std::optional<VAddr> cpu_addr_opt = - system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); - - if (!cpu_addr_opt) { - return {GetEmptyBuffer(size), 0}; + const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + if (!cpu_addr) { + return GetEmptyBuffer(size); } - VAddr cpu_addr = *cpu_addr_opt; - // Cache management is a big overhead, so only cache entries with a given size. // TODO: Figure out which size is the best for given games. constexpr std::size_t max_stream_size = 0x800; if (use_fast_cbuf || size < max_stream_size) { - if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) { - auto& memory_manager = system.GPU().MemoryManager(); + if (!is_written && !IsRegionWritten(*cpu_addr, *cpu_addr + size - 1)) { + const bool is_granular = gpu_memory.IsGranularRange(gpu_addr, size); if (use_fast_cbuf) { - if (memory_manager.IsGranularRange(gpu_addr, size)) { - const auto host_ptr = memory_manager.GetPointer(gpu_addr); - return ConstBufferUpload(host_ptr, size); + u8* dest; + if (is_granular) { + dest = gpu_memory.GetPointer(gpu_addr); } else { staging_buffer.resize(size); - memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); - return ConstBufferUpload(staging_buffer.data(), size); + dest = staging_buffer.data(); + gpu_memory.ReadBlockUnsafe(gpu_addr, dest, size); } + return ConstBufferUpload(dest, size); + } + if (is_granular) { + u8* const host_ptr = gpu_memory.GetPointer(gpu_addr); + return StreamBufferUpload(size, alignment, [host_ptr, size](u8* dest) { + std::memcpy(dest, host_ptr, size); + }); } else { - if (memory_manager.IsGranularRange(gpu_addr, size)) { - const auto host_ptr = memory_manager.GetPointer(gpu_addr); - return StreamBufferUpload(host_ptr, size, alignment); - } else { - staging_buffer.resize(size); - memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); - return StreamBufferUpload(staging_buffer.data(), size, alignment); - } + return StreamBufferUpload(size, alignment, [this, gpu_addr, size](u8* dest) { + gpu_memory.ReadBlockUnsafe(gpu_addr, dest, size); + }); } } } - auto block = GetBlock(cpu_addr, size); - auto map = MapAddress(block, gpu_addr, cpu_addr, size); + Buffer* const block = GetBlock(*cpu_addr, size); + MapInterval* const map = MapAddress(block, gpu_addr, *cpu_addr, size); + if (!map) { + return GetEmptyBuffer(size); + } if (is_written) { map->MarkAsModified(true, GetModifiedTicks()); - if (!map->IsWritten()) { - map->MarkAsWritten(true); - MarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); + if (Settings::IsGPULevelHigh() && + Settings::values.use_asynchronous_gpu_emulation.GetValue()) { + MarkForAsyncFlush(map); } - } else { - if (map->IsWritten()) { - WriteBarrier(); + if (!map->is_written) { + map->is_written = true; + MarkRegionAsWritten(map->start, map->end - 1); } } - const u64 offset = static_cast<u64>(block->GetOffset(cpu_addr)); - - return {ToHandle(block), offset}; + return BufferInfo{block->Handle(), block->Offset(*cpu_addr), block->Address()}; } /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size, std::size_t alignment = 4) { std::lock_guard lock{mutex}; - return StreamBufferUpload(raw_pointer, size, alignment); + return StreamBufferUpload(size, alignment, [raw_pointer, size](u8* dest) { + std::memcpy(dest, raw_pointer, size); + }); } - void Map(std::size_t max_size) { + /// Prepares the buffer cache for data uploading + /// @param max_size Maximum number of bytes that will be uploaded + /// @return True when a stream buffer invalidation was required, false otherwise + bool Map(std::size_t max_size) { std::lock_guard lock{mutex}; + bool invalidated; std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4); buffer_offset = buffer_offset_base; + + return invalidated; } - /// Finishes the upload stream, returns true on bindings invalidation. - bool Unmap() { + /// Finishes the upload stream + void Unmap() { std::lock_guard lock{mutex}; - stream_buffer->Unmap(buffer_offset - buffer_offset_base); - return std::exchange(invalidated, false); } + /// Function called at the end of each frame, inteded for deferred operations void TickFrame() { ++epoch; + while (!pending_destruction.empty()) { // Delay at least 4 frames before destruction. // This is due to triple buffering happening on some drivers. static constexpr u64 epochs_to_destroy = 5; - if (pending_destruction.front()->GetEpoch() + epochs_to_destroy > epoch) { + if (pending_destruction.front()->Epoch() + epochs_to_destroy > epoch) { break; } - pending_destruction.pop_front(); + pending_destruction.pop(); } } @@ -133,117 +153,193 @@ public: void FlushRegion(VAddr addr, std::size_t size) { std::lock_guard lock{mutex}; - std::vector<MapInterval> objects = GetMapsInRange(addr, size); - std::sort(objects.begin(), objects.end(), [](const MapInterval& a, const MapInterval& b) { - return a->GetModificationTick() < b->GetModificationTick(); - }); - for (auto& object : objects) { - if (object->IsModified() && object->IsRegistered()) { + VectorMapInterval objects = GetMapsInRange(addr, size); + std::sort(objects.begin(), objects.end(), + [](MapInterval* lhs, MapInterval* rhs) { return lhs->ticks < rhs->ticks; }); + for (MapInterval* object : objects) { + if (object->is_modified && object->is_registered) { + mutex.unlock(); FlushMap(object); + mutex.lock(); } } } + bool MustFlushRegion(VAddr addr, std::size_t size) { + std::lock_guard lock{mutex}; + + const VectorMapInterval objects = GetMapsInRange(addr, size); + return std::any_of(objects.cbegin(), objects.cend(), [](const MapInterval* map) { + return map->is_modified && map->is_registered; + }); + } + /// Mark the specified region as being invalidated void InvalidateRegion(VAddr addr, u64 size) { std::lock_guard lock{mutex}; - std::vector<MapInterval> objects = GetMapsInRange(addr, size); - for (auto& object : objects) { - if (object->IsRegistered()) { + for (auto& object : GetMapsInRange(addr, size)) { + if (object->is_registered) { Unregister(object); } } } - virtual const TBufferType* GetEmptyBuffer(std::size_t size) = 0; + void OnCPUWrite(VAddr addr, std::size_t size) { + std::lock_guard lock{mutex}; -protected: - explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, - std::unique_ptr<StreamBuffer> stream_buffer) - : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)}, - stream_buffer_handle{this->stream_buffer->GetHandle()} {} + for (MapInterval* object : GetMapsInRange(addr, size)) { + if (object->is_memory_marked && object->is_registered) { + UnmarkMemory(object); + object->is_sync_pending = true; + marked_for_unregister.emplace_back(object); + } + } + } - ~BufferCache() = default; + void SyncGuestHost() { + std::lock_guard lock{mutex}; + + for (auto& object : marked_for_unregister) { + if (object->is_registered) { + object->is_sync_pending = false; + Unregister(object); + } + } + marked_for_unregister.clear(); + } + + void CommitAsyncFlushes() { + if (uncommitted_flushes) { + auto commit_list = std::make_shared<std::list<MapInterval*>>(); + for (MapInterval* map : *uncommitted_flushes) { + if (map->is_registered && map->is_modified) { + // TODO(Blinkhawk): Implement backend asynchronous flushing + // AsyncFlushMap(map) + commit_list->push_back(map); + } + } + if (!commit_list->empty()) { + committed_flushes.push_back(commit_list); + } else { + committed_flushes.emplace_back(); + } + } else { + committed_flushes.emplace_back(); + } + uncommitted_flushes.reset(); + } - virtual const TBufferType* ToHandle(const TBuffer& storage) = 0; + bool ShouldWaitAsyncFlushes() const { + return !committed_flushes.empty() && committed_flushes.front() != nullptr; + } - virtual void WriteBarrier() = 0; + bool HasUncommittedFlushes() const { + return uncommitted_flushes != nullptr; + } + + void PopAsyncFlushes() { + if (committed_flushes.empty()) { + return; + } + auto& flush_list = committed_flushes.front(); + if (!flush_list) { + committed_flushes.pop_front(); + return; + } + for (MapInterval* map : *flush_list) { + if (map->is_registered) { + // TODO(Blinkhawk): Replace this for reading the asynchronous flush + FlushMap(map); + } + } + committed_flushes.pop_front(); + } - virtual TBuffer CreateBlock(VAddr cpu_addr, std::size_t size) = 0; + virtual BufferInfo GetEmptyBuffer(std::size_t size) = 0; - virtual void UploadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size, - const u8* data) = 0; +protected: + explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, + Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, + std::unique_ptr<StreamBuffer> stream_buffer_) + : rasterizer{rasterizer_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, + stream_buffer{std::move(stream_buffer_)}, stream_buffer_handle{stream_buffer->Handle()} {} - virtual void DownloadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size, - u8* data) = 0; + ~BufferCache() = default; - virtual void CopyBlock(const TBuffer& src, const TBuffer& dst, std::size_t src_offset, - std::size_t dst_offset, std::size_t size) = 0; + virtual std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) = 0; virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) { return {}; } /// Register an object into the cache - void Register(const MapInterval& new_map, bool inherit_written = false) { - const VAddr cpu_addr = new_map->GetStart(); + MapInterval* Register(MapInterval new_map, bool inherit_written = false) { + const VAddr cpu_addr = new_map.start; if (!cpu_addr) { LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}", - new_map->GetGpuAddress()); - return; + new_map.gpu_addr); + return nullptr; } - const std::size_t size = new_map->GetEnd() - new_map->GetStart(); - new_map->MarkAsRegistered(true); - const IntervalType interval{new_map->GetStart(), new_map->GetEnd()}; - mapped_addresses.insert({interval, new_map}); + const std::size_t size = new_map.end - new_map.start; + new_map.is_registered = true; rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); + new_map.is_memory_marked = true; if (inherit_written) { - MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1); - new_map->MarkAsWritten(true); + MarkRegionAsWritten(new_map.start, new_map.end - 1); + new_map.is_written = true; } + MapInterval* const storage = mapped_addresses_allocator.Allocate(); + *storage = new_map; + mapped_addresses.insert(*storage); + return storage; } - /// Unregisters an object from the cache - void Unregister(MapInterval& map) { - const std::size_t size = map->GetEnd() - map->GetStart(); - rasterizer.UpdatePagesCachedCount(map->GetStart(), size, -1); - map->MarkAsRegistered(false); - if (map->IsWritten()) { - UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); + void UnmarkMemory(MapInterval* map) { + if (!map->is_memory_marked) { + return; } - const IntervalType delete_interval{map->GetStart(), map->GetEnd()}; - mapped_addresses.erase(delete_interval); + const std::size_t size = map->end - map->start; + rasterizer.UpdatePagesCachedCount(map->start, size, -1); + map->is_memory_marked = false; } -private: - MapInterval CreateMap(const VAddr start, const VAddr end, const GPUVAddr gpu_addr) { - return std::make_shared<MapIntervalBase>(start, end, gpu_addr); + /// Unregisters an object from the cache + void Unregister(MapInterval* map) { + UnmarkMemory(map); + map->is_registered = false; + if (map->is_sync_pending) { + map->is_sync_pending = false; + marked_for_unregister.remove(map); + } + if (map->is_written) { + UnmarkRegionAsWritten(map->start, map->end - 1); + } + const auto it = mapped_addresses.find(*map); + ASSERT(it != mapped_addresses.end()); + mapped_addresses.erase(it); + mapped_addresses_allocator.Release(map); } - MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, const VAddr cpu_addr, - const std::size_t size) { - - std::vector<MapInterval> overlaps = GetMapsInRange(cpu_addr, size); +private: + MapInterval* MapAddress(Buffer* block, GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size) { + const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size); if (overlaps.empty()) { - auto& memory_manager = system.GPU().MemoryManager(); const VAddr cpu_addr_end = cpu_addr + size; - MapInterval new_map = CreateMap(cpu_addr, cpu_addr_end, gpu_addr); - if (memory_manager.IsGranularRange(gpu_addr, size)) { - u8* host_ptr = memory_manager.GetPointer(gpu_addr); - UploadBlockData(block, block->GetOffset(cpu_addr), size, host_ptr); + if (gpu_memory.IsGranularRange(gpu_addr, size)) { + u8* const host_ptr = gpu_memory.GetPointer(gpu_addr); + block->Upload(block->Offset(cpu_addr), size, host_ptr); } else { staging_buffer.resize(size); - memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); - UploadBlockData(block, block->GetOffset(cpu_addr), size, staging_buffer.data()); + gpu_memory.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); + block->Upload(block->Offset(cpu_addr), size, staging_buffer.data()); } - Register(new_map); - return new_map; + return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr)); } const VAddr cpu_addr_end = cpu_addr + size; if (overlaps.size() == 1) { - MapInterval& current_map = overlaps[0]; + MapInterval* const current_map = overlaps[0]; if (current_map->IsInside(cpu_addr, cpu_addr_end)) { return current_map; } @@ -253,57 +349,70 @@ private: bool write_inheritance = false; bool modified_inheritance = false; // Calculate new buffer parameters - for (auto& overlap : overlaps) { - new_start = std::min(overlap->GetStart(), new_start); - new_end = std::max(overlap->GetEnd(), new_end); - write_inheritance |= overlap->IsWritten(); - modified_inheritance |= overlap->IsModified(); + for (MapInterval* overlap : overlaps) { + new_start = std::min(overlap->start, new_start); + new_end = std::max(overlap->end, new_end); + write_inheritance |= overlap->is_written; + modified_inheritance |= overlap->is_modified; } GPUVAddr new_gpu_addr = gpu_addr + new_start - cpu_addr; for (auto& overlap : overlaps) { Unregister(overlap); } UpdateBlock(block, new_start, new_end, overlaps); - MapInterval new_map = CreateMap(new_start, new_end, new_gpu_addr); + + const MapInterval new_map{new_start, new_end, new_gpu_addr}; + MapInterval* const map = Register(new_map, write_inheritance); + if (!map) { + return nullptr; + } if (modified_inheritance) { - new_map->MarkAsModified(true, GetModifiedTicks()); + map->MarkAsModified(true, GetModifiedTicks()); + if (Settings::IsGPULevelHigh() && + Settings::values.use_asynchronous_gpu_emulation.GetValue()) { + MarkForAsyncFlush(map); + } } - Register(new_map, write_inheritance); - return new_map; + return map; } - void UpdateBlock(const TBuffer& block, VAddr start, VAddr end, - std::vector<MapInterval>& overlaps) { + void UpdateBlock(Buffer* block, VAddr start, VAddr end, const VectorMapInterval& overlaps) { const IntervalType base_interval{start, end}; IntervalSet interval_set{}; interval_set.add(base_interval); for (auto& overlap : overlaps) { - const IntervalType subtract{overlap->GetStart(), overlap->GetEnd()}; + const IntervalType subtract{overlap->start, overlap->end}; interval_set.subtract(subtract); } for (auto& interval : interval_set) { - std::size_t size = interval.upper() - interval.lower(); - if (size > 0) { - staging_buffer.resize(size); - system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size); - UploadBlockData(block, block->GetOffset(interval.lower()), size, - staging_buffer.data()); + const std::size_t size = interval.upper() - interval.lower(); + if (size == 0) { + continue; } + staging_buffer.resize(size); + cpu_memory.ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size); + block->Upload(block->Offset(interval.lower()), size, staging_buffer.data()); } } - std::vector<MapInterval> GetMapsInRange(VAddr addr, std::size_t size) { + VectorMapInterval GetMapsInRange(VAddr addr, std::size_t size) { + VectorMapInterval result; if (size == 0) { - return {}; + return result; } - std::vector<MapInterval> objects{}; - const IntervalType interval{addr, addr + size}; - for (auto& pair : boost::make_iterator_range(mapped_addresses.equal_range(interval))) { - objects.push_back(pair.second); + const VAddr addr_end = addr + size; + auto it = mapped_addresses.lower_bound(addr); + if (it != mapped_addresses.begin()) { + --it; } - - return objects; + while (it != mapped_addresses.end() && it->start < addr_end) { + if (it->Overlaps(addr, addr_end)) { + result.push_back(&*it); + } + ++it; + } + return result; } /// Returns a ticks counter used for tracking when cached objects were last modified @@ -311,24 +420,28 @@ private: return ++modified_ticks; } - void FlushMap(MapInterval map) { - std::size_t size = map->GetEnd() - map->GetStart(); - TBuffer block = blocks[map->GetStart() >> block_page_bits]; + void FlushMap(MapInterval* map) { + const auto it = blocks.find(map->start >> BLOCK_PAGE_BITS); + ASSERT_OR_EXECUTE(it != blocks.end(), return;); + + std::shared_ptr<Buffer> block = it->second; + + const std::size_t size = map->end - map->start; staging_buffer.resize(size); - DownloadBlockData(block, block->GetOffset(map->GetStart()), size, staging_buffer.data()); - system.Memory().WriteBlockUnsafe(map->GetStart(), staging_buffer.data(), size); + block->Download(block->Offset(map->start), size, staging_buffer.data()); + cpu_memory.WriteBlockUnsafe(map->start, staging_buffer.data(), size); map->MarkAsModified(false, 0); } - BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size, - std::size_t alignment) { + template <typename Callable> + BufferInfo StreamBufferUpload(std::size_t size, std::size_t alignment, Callable&& callable) { AlignBuffer(alignment); const std::size_t uploaded_offset = buffer_offset; - std::memcpy(buffer_ptr, raw_pointer, size); + callable(buffer_ptr); buffer_ptr += size; buffer_offset += size; - return {&stream_buffer_handle, uploaded_offset}; + return BufferInfo{stream_buffer->Handle(), uploaded_offset, stream_buffer->Address()}; } void AlignBuffer(std::size_t alignment) { @@ -338,151 +451,148 @@ private: buffer_offset = offset_aligned; } - TBuffer EnlargeBlock(TBuffer buffer) { - const std::size_t old_size = buffer->GetSize(); - const std::size_t new_size = old_size + block_page_size; - const VAddr cpu_addr = buffer->GetCpuAddr(); - TBuffer new_buffer = CreateBlock(cpu_addr, new_size); - CopyBlock(buffer, new_buffer, 0, 0, old_size); - buffer->SetEpoch(epoch); - pending_destruction.push_back(buffer); + std::shared_ptr<Buffer> EnlargeBlock(std::shared_ptr<Buffer> buffer) { + const std::size_t old_size = buffer->Size(); + const std::size_t new_size = old_size + BLOCK_PAGE_SIZE; + const VAddr cpu_addr = buffer->CpuAddr(); + std::shared_ptr<Buffer> new_buffer = CreateBlock(cpu_addr, new_size); + new_buffer->CopyFrom(*buffer, 0, 0, old_size); + QueueDestruction(std::move(buffer)); + const VAddr cpu_addr_end = cpu_addr + new_size - 1; - u64 page_start = cpu_addr >> block_page_bits; - const u64 page_end = cpu_addr_end >> block_page_bits; - while (page_start <= page_end) { - blocks[page_start] = new_buffer; - ++page_start; + const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS; + for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) { + blocks.insert_or_assign(page_start, new_buffer); } + return new_buffer; } - TBuffer MergeBlocks(TBuffer first, TBuffer second) { - const std::size_t size_1 = first->GetSize(); - const std::size_t size_2 = second->GetSize(); - const VAddr first_addr = first->GetCpuAddr(); - const VAddr second_addr = second->GetCpuAddr(); + std::shared_ptr<Buffer> MergeBlocks(std::shared_ptr<Buffer> first, + std::shared_ptr<Buffer> second) { + const std::size_t size_1 = first->Size(); + const std::size_t size_2 = second->Size(); + const VAddr first_addr = first->CpuAddr(); + const VAddr second_addr = second->CpuAddr(); const VAddr new_addr = std::min(first_addr, second_addr); const std::size_t new_size = size_1 + size_2; - TBuffer new_buffer = CreateBlock(new_addr, new_size); - CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1); - CopyBlock(second, new_buffer, 0, new_buffer->GetOffset(second_addr), size_2); - first->SetEpoch(epoch); - second->SetEpoch(epoch); - pending_destruction.push_back(first); - pending_destruction.push_back(second); + + std::shared_ptr<Buffer> new_buffer = CreateBlock(new_addr, new_size); + new_buffer->CopyFrom(*first, 0, new_buffer->Offset(first_addr), size_1); + new_buffer->CopyFrom(*second, 0, new_buffer->Offset(second_addr), size_2); + QueueDestruction(std::move(first)); + QueueDestruction(std::move(second)); + const VAddr cpu_addr_end = new_addr + new_size - 1; - u64 page_start = new_addr >> block_page_bits; - const u64 page_end = cpu_addr_end >> block_page_bits; - while (page_start <= page_end) { - blocks[page_start] = new_buffer; - ++page_start; + const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS; + for (u64 page_start = new_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) { + blocks.insert_or_assign(page_start, new_buffer); } return new_buffer; } - TBuffer GetBlock(const VAddr cpu_addr, const std::size_t size) { - TBuffer found{}; + Buffer* GetBlock(VAddr cpu_addr, std::size_t size) { + std::shared_ptr<Buffer> found; + const VAddr cpu_addr_end = cpu_addr + size - 1; - u64 page_start = cpu_addr >> block_page_bits; - const u64 page_end = cpu_addr_end >> block_page_bits; - while (page_start <= page_end) { + const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS; + for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) { auto it = blocks.find(page_start); if (it == blocks.end()) { if (found) { found = EnlargeBlock(found); - } else { - const VAddr start_addr = (page_start << block_page_bits); - found = CreateBlock(start_addr, block_page_size); - blocks[page_start] = found; - } - } else { - if (found) { - if (found == it->second) { - ++page_start; - continue; - } - found = MergeBlocks(found, it->second); - } else { - found = it->second; + continue; } + const VAddr start_addr = page_start << BLOCK_PAGE_BITS; + found = CreateBlock(start_addr, BLOCK_PAGE_SIZE); + blocks.insert_or_assign(page_start, found); + continue; + } + if (!found) { + found = it->second; + continue; + } + if (found != it->second) { + found = MergeBlocks(std::move(found), it->second); } - ++page_start; } - return found; + return found.get(); } - void MarkRegionAsWritten(const VAddr start, const VAddr end) { - u64 page_start = start >> write_page_bit; - const u64 page_end = end >> write_page_bit; - while (page_start <= page_end) { - auto it = written_pages.find(page_start); - if (it != written_pages.end()) { - it->second = it->second + 1; - } else { - written_pages[page_start] = 1; + void MarkRegionAsWritten(VAddr start, VAddr end) { + const u64 page_end = end >> WRITE_PAGE_BIT; + for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) { + if (const auto [it, inserted] = written_pages.emplace(page_start, 1); !inserted) { + ++it->second; } - page_start++; } } - void UnmarkRegionAsWritten(const VAddr start, const VAddr end) { - u64 page_start = start >> write_page_bit; - const u64 page_end = end >> write_page_bit; - while (page_start <= page_end) { + void UnmarkRegionAsWritten(VAddr start, VAddr end) { + const u64 page_end = end >> WRITE_PAGE_BIT; + for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) { auto it = written_pages.find(page_start); if (it != written_pages.end()) { if (it->second > 1) { - it->second = it->second - 1; + --it->second; } else { written_pages.erase(it); } } - page_start++; } } - bool IsRegionWritten(const VAddr start, const VAddr end) const { - u64 page_start = start >> write_page_bit; - const u64 page_end = end >> write_page_bit; - while (page_start <= page_end) { + bool IsRegionWritten(VAddr start, VAddr end) const { + const u64 page_end = end >> WRITE_PAGE_BIT; + for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) { if (written_pages.count(page_start) > 0) { return true; } - page_start++; } return false; } + void QueueDestruction(std::shared_ptr<Buffer> buffer) { + buffer->SetEpoch(epoch); + pending_destruction.push(std::move(buffer)); + } + + void MarkForAsyncFlush(MapInterval* map) { + if (!uncommitted_flushes) { + uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval*>>(); + } + uncommitted_flushes->insert(map); + } + VideoCore::RasterizerInterface& rasterizer; - Core::System& system; + Tegra::MemoryManager& gpu_memory; + Core::Memory::Memory& cpu_memory; std::unique_ptr<StreamBuffer> stream_buffer; - TBufferType stream_buffer_handle{}; - - bool invalidated = false; + BufferType stream_buffer_handle; u8* buffer_ptr = nullptr; u64 buffer_offset = 0; u64 buffer_offset_base = 0; - using IntervalSet = boost::icl::interval_set<VAddr>; - using IntervalCache = boost::icl::interval_map<VAddr, MapInterval>; - using IntervalType = typename IntervalCache::interval_type; - IntervalCache mapped_addresses; + MapIntervalAllocator mapped_addresses_allocator; + boost::intrusive::set<MapInterval, boost::intrusive::compare<MapIntervalCompare>> + mapped_addresses; - static constexpr u64 write_page_bit = 11; std::unordered_map<u64, u32> written_pages; + std::unordered_map<u64, std::shared_ptr<Buffer>> blocks; - static constexpr u64 block_page_bits = 21; - static constexpr u64 block_page_size = 1ULL << block_page_bits; - std::unordered_map<u64, TBuffer> blocks; - - std::list<TBuffer> pending_destruction; + std::queue<std::shared_ptr<Buffer>> pending_destruction; u64 epoch = 0; u64 modified_ticks = 0; std::vector<u8> staging_buffer; + std::list<MapInterval*> marked_for_unregister; + + std::shared_ptr<std::unordered_set<MapInterval*>> uncommitted_flushes; + std::list<std::shared_ptr<std::list<MapInterval*>>> committed_flushes; + std::recursive_mutex mutex; }; diff --git a/src/video_core/buffer_cache/map_interval.cpp b/src/video_core/buffer_cache/map_interval.cpp new file mode 100644 index 000000000..62587e18a --- /dev/null +++ b/src/video_core/buffer_cache/map_interval.cpp @@ -0,0 +1,33 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <array> +#include <cstddef> +#include <memory> + +#include "video_core/buffer_cache/map_interval.h" + +namespace VideoCommon { + +MapIntervalAllocator::MapIntervalAllocator() { + FillFreeList(first_chunk); +} + +MapIntervalAllocator::~MapIntervalAllocator() = default; + +void MapIntervalAllocator::AllocateNewChunk() { + *new_chunk = std::make_unique<Chunk>(); + FillFreeList(**new_chunk); + new_chunk = &(*new_chunk)->next; +} + +void MapIntervalAllocator::FillFreeList(Chunk& chunk) { + const std::size_t old_size = free_list.size(); + free_list.resize(old_size + chunk.data.size()); + std::transform(chunk.data.rbegin(), chunk.data.rend(), free_list.begin() + old_size, + [](MapInterval& interval) { return &interval; }); +} + +} // namespace VideoCommon diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h index b0956029d..fe0bcd1d8 100644 --- a/src/video_core/buffer_cache/map_interval.h +++ b/src/video_core/buffer_cache/map_interval.h @@ -4,86 +4,89 @@ #pragma once +#include <array> +#include <cstddef> +#include <memory> +#include <vector> + +#include <boost/intrusive/set_hook.hpp> + #include "common/common_types.h" #include "video_core/gpu.h" namespace VideoCommon { -class MapIntervalBase { -public: - MapIntervalBase(const VAddr start, const VAddr end, const GPUVAddr gpu_addr) - : start{start}, end{end}, gpu_addr{gpu_addr} {} +struct MapInterval : public boost::intrusive::set_base_hook<boost::intrusive::optimize_size<true>> { + MapInterval() = default; - void SetCpuAddress(VAddr new_cpu_addr) { - cpu_addr = new_cpu_addr; - } + /*implicit*/ MapInterval(VAddr start_) noexcept : start{start_} {} - VAddr GetCpuAddress() const { - return cpu_addr; - } + explicit MapInterval(VAddr start_, VAddr end_, GPUVAddr gpu_addr_) noexcept + : start{start_}, end{end_}, gpu_addr{gpu_addr_} {} - GPUVAddr GetGpuAddress() const { - return gpu_addr; + bool IsInside(VAddr other_start, VAddr other_end) const noexcept { + return start <= other_start && other_end <= end; } - bool IsInside(const VAddr other_start, const VAddr other_end) const { - return (start <= other_start && other_end <= end); + bool Overlaps(VAddr other_start, VAddr other_end) const noexcept { + return start < other_end && other_start < end; } - bool operator==(const MapIntervalBase& rhs) const { - return std::tie(start, end) == std::tie(rhs.start, rhs.end); - } - - bool operator!=(const MapIntervalBase& rhs) const { - return !operator==(rhs); - } - - void MarkAsRegistered(const bool registered) { - is_registered = registered; + void MarkAsModified(bool is_modified_, u64 ticks_) noexcept { + is_modified = is_modified_; + ticks = ticks_; } - bool IsRegistered() const { - return is_registered; - } + boost::intrusive::set_member_hook<> member_hook_; + VAddr start = 0; + VAddr end = 0; + GPUVAddr gpu_addr = 0; + u64 ticks = 0; + bool is_written = false; + bool is_modified = false; + bool is_registered = false; + bool is_memory_marked = false; + bool is_sync_pending = false; +}; - VAddr GetStart() const { - return start; +struct MapIntervalCompare { + constexpr bool operator()(const MapInterval& lhs, const MapInterval& rhs) const noexcept { + return lhs.start < rhs.start; } +}; - VAddr GetEnd() const { - return end; +class MapIntervalAllocator { +public: + MapIntervalAllocator(); + ~MapIntervalAllocator(); + + MapInterval* Allocate() { + if (free_list.empty()) { + AllocateNewChunk(); + } + MapInterval* const interval = free_list.back(); + free_list.pop_back(); + return interval; } - void MarkAsModified(const bool is_modified_, const u64 tick) { - is_modified = is_modified_; - ticks = tick; + void Release(MapInterval* interval) { + free_list.push_back(interval); } - bool IsModified() const { - return is_modified; - } +private: + struct Chunk { + std::unique_ptr<Chunk> next; + std::array<MapInterval, 0x8000> data; + }; - u64 GetModificationTick() const { - return ticks; - } + void AllocateNewChunk(); - void MarkAsWritten(const bool is_written_) { - is_written = is_written_; - } + void FillFreeList(Chunk& chunk); - bool IsWritten() const { - return is_written; - } + std::vector<MapInterval*> free_list; + std::unique_ptr<Chunk>* new_chunk = &first_chunk.next; -private: - VAddr start; - VAddr end; - GPUVAddr gpu_addr; - VAddr cpu_addr{}; - bool is_written{}; - bool is_modified{}; - bool is_registered{}; - u64 ticks{}; + Chunk first_chunk; }; } // namespace VideoCommon |
