aboutsummaryrefslogtreecommitdiff
path: root/src/video_core/buffer_cache
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/buffer_cache')
-rw-r--r--src/video_core/buffer_cache/buffer_block.h27
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h570
-rw-r--r--src/video_core/buffer_cache/map_interval.cpp33
-rw-r--r--src/video_core/buffer_cache/map_interval.h113
4 files changed, 444 insertions, 299 deletions
diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h
index e35ee0b67..e64170e66 100644
--- a/src/video_core/buffer_cache/buffer_block.h
+++ b/src/video_core/buffer_cache/buffer_block.h
@@ -15,48 +15,47 @@ namespace VideoCommon {
class BufferBlock {
public:
- bool Overlaps(const VAddr start, const VAddr end) const {
+ bool Overlaps(VAddr start, VAddr end) const {
return (cpu_addr < end) && (cpu_addr_end > start);
}
- bool IsInside(const VAddr other_start, const VAddr other_end) const {
+ bool IsInside(VAddr other_start, VAddr other_end) const {
return cpu_addr <= other_start && other_end <= cpu_addr_end;
}
- std::size_t GetOffset(const VAddr in_addr) {
+ std::size_t Offset(VAddr in_addr) const {
return static_cast<std::size_t>(in_addr - cpu_addr);
}
- VAddr GetCpuAddr() const {
+ VAddr CpuAddr() const {
return cpu_addr;
}
- VAddr GetCpuAddrEnd() const {
+ VAddr CpuAddrEnd() const {
return cpu_addr_end;
}
- void SetCpuAddr(const VAddr new_addr) {
+ void SetCpuAddr(VAddr new_addr) {
cpu_addr = new_addr;
cpu_addr_end = new_addr + size;
}
- std::size_t GetSize() const {
+ std::size_t Size() const {
return size;
}
- void SetEpoch(u64 new_epoch) {
- epoch = new_epoch;
+ u64 Epoch() const {
+ return epoch;
}
- u64 GetEpoch() {
- return epoch;
+ void SetEpoch(u64 new_epoch) {
+ epoch = new_epoch;
}
protected:
- explicit BufferBlock(VAddr cpu_addr, const std::size_t size) : size{size} {
- SetCpuAddr(cpu_addr);
+ explicit BufferBlock(VAddr cpu_addr_, std::size_t size_) : size{size_} {
+ SetCpuAddr(cpu_addr_);
}
- ~BufferBlock() = default;
private:
VAddr cpu_addr{};
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index b57c0d4d4..e7edd733f 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -4,7 +4,7 @@
#pragma once
-#include <array>
+#include <list>
#include <memory>
#include <mutex>
#include <unordered_map>
@@ -12,14 +12,17 @@
#include <utility>
#include <vector>
-#include <boost/icl/interval_map.hpp>
+#include <boost/container/small_vector.hpp>
#include <boost/icl/interval_set.hpp>
-#include <boost/range/iterator_range.hpp>
+#include <boost/intrusive/set.hpp>
#include "common/alignment.h"
+#include "common/assert.h"
#include "common/common_types.h"
+#include "common/logging/log.h"
#include "core/core.h"
#include "core/memory.h"
+#include "core/settings.h"
#include "video_core/buffer_cache/buffer_block.h"
#include "video_core/buffer_cache/map_interval.h"
#include "video_core/memory_manager.h"
@@ -27,105 +30,122 @@
namespace VideoCommon {
-using MapInterval = std::shared_ptr<MapIntervalBase>;
-
-template <typename TBuffer, typename TBufferType, typename StreamBuffer>
+template <typename Buffer, typename BufferType, typename StreamBuffer>
class BufferCache {
+ using IntervalSet = boost::icl::interval_set<VAddr>;
+ using IntervalType = typename IntervalSet::interval_type;
+ using VectorMapInterval = boost::container::small_vector<MapInterval*, 1>;
+
+ static constexpr u64 WRITE_PAGE_BIT = 11;
+ static constexpr u64 BLOCK_PAGE_BITS = 21;
+ static constexpr u64 BLOCK_PAGE_SIZE = 1ULL << BLOCK_PAGE_BITS;
+
public:
- using BufferInfo = std::pair<const TBufferType*, u64>;
+ struct BufferInfo {
+ BufferType handle;
+ u64 offset;
+ u64 address;
+ };
BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
bool is_written = false, bool use_fast_cbuf = false) {
std::lock_guard lock{mutex};
- const std::optional<VAddr> cpu_addr_opt =
- system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
-
- if (!cpu_addr_opt) {
- return {GetEmptyBuffer(size), 0};
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ if (!cpu_addr) {
+ return GetEmptyBuffer(size);
}
- VAddr cpu_addr = *cpu_addr_opt;
-
// Cache management is a big overhead, so only cache entries with a given size.
// TODO: Figure out which size is the best for given games.
constexpr std::size_t max_stream_size = 0x800;
if (use_fast_cbuf || size < max_stream_size) {
- if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) {
- auto& memory_manager = system.GPU().MemoryManager();
+ if (!is_written && !IsRegionWritten(*cpu_addr, *cpu_addr + size - 1)) {
+ const bool is_granular = gpu_memory.IsGranularRange(gpu_addr, size);
if (use_fast_cbuf) {
- if (memory_manager.IsGranularRange(gpu_addr, size)) {
- const auto host_ptr = memory_manager.GetPointer(gpu_addr);
- return ConstBufferUpload(host_ptr, size);
+ u8* dest;
+ if (is_granular) {
+ dest = gpu_memory.GetPointer(gpu_addr);
} else {
staging_buffer.resize(size);
- memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
- return ConstBufferUpload(staging_buffer.data(), size);
+ dest = staging_buffer.data();
+ gpu_memory.ReadBlockUnsafe(gpu_addr, dest, size);
}
+ return ConstBufferUpload(dest, size);
+ }
+ if (is_granular) {
+ u8* const host_ptr = gpu_memory.GetPointer(gpu_addr);
+ return StreamBufferUpload(size, alignment, [host_ptr, size](u8* dest) {
+ std::memcpy(dest, host_ptr, size);
+ });
} else {
- if (memory_manager.IsGranularRange(gpu_addr, size)) {
- const auto host_ptr = memory_manager.GetPointer(gpu_addr);
- return StreamBufferUpload(host_ptr, size, alignment);
- } else {
- staging_buffer.resize(size);
- memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
- return StreamBufferUpload(staging_buffer.data(), size, alignment);
- }
+ return StreamBufferUpload(size, alignment, [this, gpu_addr, size](u8* dest) {
+ gpu_memory.ReadBlockUnsafe(gpu_addr, dest, size);
+ });
}
}
}
- auto block = GetBlock(cpu_addr, size);
- auto map = MapAddress(block, gpu_addr, cpu_addr, size);
+ Buffer* const block = GetBlock(*cpu_addr, size);
+ MapInterval* const map = MapAddress(block, gpu_addr, *cpu_addr, size);
+ if (!map) {
+ return GetEmptyBuffer(size);
+ }
if (is_written) {
map->MarkAsModified(true, GetModifiedTicks());
- if (!map->IsWritten()) {
- map->MarkAsWritten(true);
- MarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
+ if (Settings::IsGPULevelHigh() &&
+ Settings::values.use_asynchronous_gpu_emulation.GetValue()) {
+ MarkForAsyncFlush(map);
}
- } else {
- if (map->IsWritten()) {
- WriteBarrier();
+ if (!map->is_written) {
+ map->is_written = true;
+ MarkRegionAsWritten(map->start, map->end - 1);
}
}
- const u64 offset = static_cast<u64>(block->GetOffset(cpu_addr));
-
- return {ToHandle(block), offset};
+ return BufferInfo{block->Handle(), block->Offset(*cpu_addr), block->Address()};
}
/// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset.
BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size,
std::size_t alignment = 4) {
std::lock_guard lock{mutex};
- return StreamBufferUpload(raw_pointer, size, alignment);
+ return StreamBufferUpload(size, alignment, [raw_pointer, size](u8* dest) {
+ std::memcpy(dest, raw_pointer, size);
+ });
}
- void Map(std::size_t max_size) {
+ /// Prepares the buffer cache for data uploading
+ /// @param max_size Maximum number of bytes that will be uploaded
+ /// @return True when a stream buffer invalidation was required, false otherwise
+ bool Map(std::size_t max_size) {
std::lock_guard lock{mutex};
+ bool invalidated;
std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4);
buffer_offset = buffer_offset_base;
+
+ return invalidated;
}
- /// Finishes the upload stream, returns true on bindings invalidation.
- bool Unmap() {
+ /// Finishes the upload stream
+ void Unmap() {
std::lock_guard lock{mutex};
-
stream_buffer->Unmap(buffer_offset - buffer_offset_base);
- return std::exchange(invalidated, false);
}
+ /// Function called at the end of each frame, inteded for deferred operations
void TickFrame() {
++epoch;
+
while (!pending_destruction.empty()) {
// Delay at least 4 frames before destruction.
// This is due to triple buffering happening on some drivers.
static constexpr u64 epochs_to_destroy = 5;
- if (pending_destruction.front()->GetEpoch() + epochs_to_destroy > epoch) {
+ if (pending_destruction.front()->Epoch() + epochs_to_destroy > epoch) {
break;
}
- pending_destruction.pop_front();
+ pending_destruction.pop();
}
}
@@ -133,117 +153,193 @@ public:
void FlushRegion(VAddr addr, std::size_t size) {
std::lock_guard lock{mutex};
- std::vector<MapInterval> objects = GetMapsInRange(addr, size);
- std::sort(objects.begin(), objects.end(), [](const MapInterval& a, const MapInterval& b) {
- return a->GetModificationTick() < b->GetModificationTick();
- });
- for (auto& object : objects) {
- if (object->IsModified() && object->IsRegistered()) {
+ VectorMapInterval objects = GetMapsInRange(addr, size);
+ std::sort(objects.begin(), objects.end(),
+ [](MapInterval* lhs, MapInterval* rhs) { return lhs->ticks < rhs->ticks; });
+ for (MapInterval* object : objects) {
+ if (object->is_modified && object->is_registered) {
+ mutex.unlock();
FlushMap(object);
+ mutex.lock();
}
}
}
+ bool MustFlushRegion(VAddr addr, std::size_t size) {
+ std::lock_guard lock{mutex};
+
+ const VectorMapInterval objects = GetMapsInRange(addr, size);
+ return std::any_of(objects.cbegin(), objects.cend(), [](const MapInterval* map) {
+ return map->is_modified && map->is_registered;
+ });
+ }
+
/// Mark the specified region as being invalidated
void InvalidateRegion(VAddr addr, u64 size) {
std::lock_guard lock{mutex};
- std::vector<MapInterval> objects = GetMapsInRange(addr, size);
- for (auto& object : objects) {
- if (object->IsRegistered()) {
+ for (auto& object : GetMapsInRange(addr, size)) {
+ if (object->is_registered) {
Unregister(object);
}
}
}
- virtual const TBufferType* GetEmptyBuffer(std::size_t size) = 0;
+ void OnCPUWrite(VAddr addr, std::size_t size) {
+ std::lock_guard lock{mutex};
-protected:
- explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
- std::unique_ptr<StreamBuffer> stream_buffer)
- : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)},
- stream_buffer_handle{this->stream_buffer->GetHandle()} {}
+ for (MapInterval* object : GetMapsInRange(addr, size)) {
+ if (object->is_memory_marked && object->is_registered) {
+ UnmarkMemory(object);
+ object->is_sync_pending = true;
+ marked_for_unregister.emplace_back(object);
+ }
+ }
+ }
- ~BufferCache() = default;
+ void SyncGuestHost() {
+ std::lock_guard lock{mutex};
+
+ for (auto& object : marked_for_unregister) {
+ if (object->is_registered) {
+ object->is_sync_pending = false;
+ Unregister(object);
+ }
+ }
+ marked_for_unregister.clear();
+ }
+
+ void CommitAsyncFlushes() {
+ if (uncommitted_flushes) {
+ auto commit_list = std::make_shared<std::list<MapInterval*>>();
+ for (MapInterval* map : *uncommitted_flushes) {
+ if (map->is_registered && map->is_modified) {
+ // TODO(Blinkhawk): Implement backend asynchronous flushing
+ // AsyncFlushMap(map)
+ commit_list->push_back(map);
+ }
+ }
+ if (!commit_list->empty()) {
+ committed_flushes.push_back(commit_list);
+ } else {
+ committed_flushes.emplace_back();
+ }
+ } else {
+ committed_flushes.emplace_back();
+ }
+ uncommitted_flushes.reset();
+ }
- virtual const TBufferType* ToHandle(const TBuffer& storage) = 0;
+ bool ShouldWaitAsyncFlushes() const {
+ return !committed_flushes.empty() && committed_flushes.front() != nullptr;
+ }
- virtual void WriteBarrier() = 0;
+ bool HasUncommittedFlushes() const {
+ return uncommitted_flushes != nullptr;
+ }
+
+ void PopAsyncFlushes() {
+ if (committed_flushes.empty()) {
+ return;
+ }
+ auto& flush_list = committed_flushes.front();
+ if (!flush_list) {
+ committed_flushes.pop_front();
+ return;
+ }
+ for (MapInterval* map : *flush_list) {
+ if (map->is_registered) {
+ // TODO(Blinkhawk): Replace this for reading the asynchronous flush
+ FlushMap(map);
+ }
+ }
+ committed_flushes.pop_front();
+ }
- virtual TBuffer CreateBlock(VAddr cpu_addr, std::size_t size) = 0;
+ virtual BufferInfo GetEmptyBuffer(std::size_t size) = 0;
- virtual void UploadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size,
- const u8* data) = 0;
+protected:
+ explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
+ Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
+ std::unique_ptr<StreamBuffer> stream_buffer_)
+ : rasterizer{rasterizer_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_},
+ stream_buffer{std::move(stream_buffer_)}, stream_buffer_handle{stream_buffer->Handle()} {}
- virtual void DownloadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size,
- u8* data) = 0;
+ ~BufferCache() = default;
- virtual void CopyBlock(const TBuffer& src, const TBuffer& dst, std::size_t src_offset,
- std::size_t dst_offset, std::size_t size) = 0;
+ virtual std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) = 0;
virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) {
return {};
}
/// Register an object into the cache
- void Register(const MapInterval& new_map, bool inherit_written = false) {
- const VAddr cpu_addr = new_map->GetStart();
+ MapInterval* Register(MapInterval new_map, bool inherit_written = false) {
+ const VAddr cpu_addr = new_map.start;
if (!cpu_addr) {
LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}",
- new_map->GetGpuAddress());
- return;
+ new_map.gpu_addr);
+ return nullptr;
}
- const std::size_t size = new_map->GetEnd() - new_map->GetStart();
- new_map->MarkAsRegistered(true);
- const IntervalType interval{new_map->GetStart(), new_map->GetEnd()};
- mapped_addresses.insert({interval, new_map});
+ const std::size_t size = new_map.end - new_map.start;
+ new_map.is_registered = true;
rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
+ new_map.is_memory_marked = true;
if (inherit_written) {
- MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1);
- new_map->MarkAsWritten(true);
+ MarkRegionAsWritten(new_map.start, new_map.end - 1);
+ new_map.is_written = true;
}
+ MapInterval* const storage = mapped_addresses_allocator.Allocate();
+ *storage = new_map;
+ mapped_addresses.insert(*storage);
+ return storage;
}
- /// Unregisters an object from the cache
- void Unregister(MapInterval& map) {
- const std::size_t size = map->GetEnd() - map->GetStart();
- rasterizer.UpdatePagesCachedCount(map->GetStart(), size, -1);
- map->MarkAsRegistered(false);
- if (map->IsWritten()) {
- UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
+ void UnmarkMemory(MapInterval* map) {
+ if (!map->is_memory_marked) {
+ return;
}
- const IntervalType delete_interval{map->GetStart(), map->GetEnd()};
- mapped_addresses.erase(delete_interval);
+ const std::size_t size = map->end - map->start;
+ rasterizer.UpdatePagesCachedCount(map->start, size, -1);
+ map->is_memory_marked = false;
}
-private:
- MapInterval CreateMap(const VAddr start, const VAddr end, const GPUVAddr gpu_addr) {
- return std::make_shared<MapIntervalBase>(start, end, gpu_addr);
+ /// Unregisters an object from the cache
+ void Unregister(MapInterval* map) {
+ UnmarkMemory(map);
+ map->is_registered = false;
+ if (map->is_sync_pending) {
+ map->is_sync_pending = false;
+ marked_for_unregister.remove(map);
+ }
+ if (map->is_written) {
+ UnmarkRegionAsWritten(map->start, map->end - 1);
+ }
+ const auto it = mapped_addresses.find(*map);
+ ASSERT(it != mapped_addresses.end());
+ mapped_addresses.erase(it);
+ mapped_addresses_allocator.Release(map);
}
- MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, const VAddr cpu_addr,
- const std::size_t size) {
-
- std::vector<MapInterval> overlaps = GetMapsInRange(cpu_addr, size);
+private:
+ MapInterval* MapAddress(Buffer* block, GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size) {
+ const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size);
if (overlaps.empty()) {
- auto& memory_manager = system.GPU().MemoryManager();
const VAddr cpu_addr_end = cpu_addr + size;
- MapInterval new_map = CreateMap(cpu_addr, cpu_addr_end, gpu_addr);
- if (memory_manager.IsGranularRange(gpu_addr, size)) {
- u8* host_ptr = memory_manager.GetPointer(gpu_addr);
- UploadBlockData(block, block->GetOffset(cpu_addr), size, host_ptr);
+ if (gpu_memory.IsGranularRange(gpu_addr, size)) {
+ u8* const host_ptr = gpu_memory.GetPointer(gpu_addr);
+ block->Upload(block->Offset(cpu_addr), size, host_ptr);
} else {
staging_buffer.resize(size);
- memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
- UploadBlockData(block, block->GetOffset(cpu_addr), size, staging_buffer.data());
+ gpu_memory.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
+ block->Upload(block->Offset(cpu_addr), size, staging_buffer.data());
}
- Register(new_map);
- return new_map;
+ return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr));
}
const VAddr cpu_addr_end = cpu_addr + size;
if (overlaps.size() == 1) {
- MapInterval& current_map = overlaps[0];
+ MapInterval* const current_map = overlaps[0];
if (current_map->IsInside(cpu_addr, cpu_addr_end)) {
return current_map;
}
@@ -253,57 +349,70 @@ private:
bool write_inheritance = false;
bool modified_inheritance = false;
// Calculate new buffer parameters
- for (auto& overlap : overlaps) {
- new_start = std::min(overlap->GetStart(), new_start);
- new_end = std::max(overlap->GetEnd(), new_end);
- write_inheritance |= overlap->IsWritten();
- modified_inheritance |= overlap->IsModified();
+ for (MapInterval* overlap : overlaps) {
+ new_start = std::min(overlap->start, new_start);
+ new_end = std::max(overlap->end, new_end);
+ write_inheritance |= overlap->is_written;
+ modified_inheritance |= overlap->is_modified;
}
GPUVAddr new_gpu_addr = gpu_addr + new_start - cpu_addr;
for (auto& overlap : overlaps) {
Unregister(overlap);
}
UpdateBlock(block, new_start, new_end, overlaps);
- MapInterval new_map = CreateMap(new_start, new_end, new_gpu_addr);
+
+ const MapInterval new_map{new_start, new_end, new_gpu_addr};
+ MapInterval* const map = Register(new_map, write_inheritance);
+ if (!map) {
+ return nullptr;
+ }
if (modified_inheritance) {
- new_map->MarkAsModified(true, GetModifiedTicks());
+ map->MarkAsModified(true, GetModifiedTicks());
+ if (Settings::IsGPULevelHigh() &&
+ Settings::values.use_asynchronous_gpu_emulation.GetValue()) {
+ MarkForAsyncFlush(map);
+ }
}
- Register(new_map, write_inheritance);
- return new_map;
+ return map;
}
- void UpdateBlock(const TBuffer& block, VAddr start, VAddr end,
- std::vector<MapInterval>& overlaps) {
+ void UpdateBlock(Buffer* block, VAddr start, VAddr end, const VectorMapInterval& overlaps) {
const IntervalType base_interval{start, end};
IntervalSet interval_set{};
interval_set.add(base_interval);
for (auto& overlap : overlaps) {
- const IntervalType subtract{overlap->GetStart(), overlap->GetEnd()};
+ const IntervalType subtract{overlap->start, overlap->end};
interval_set.subtract(subtract);
}
for (auto& interval : interval_set) {
- std::size_t size = interval.upper() - interval.lower();
- if (size > 0) {
- staging_buffer.resize(size);
- system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size);
- UploadBlockData(block, block->GetOffset(interval.lower()), size,
- staging_buffer.data());
+ const std::size_t size = interval.upper() - interval.lower();
+ if (size == 0) {
+ continue;
}
+ staging_buffer.resize(size);
+ cpu_memory.ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size);
+ block->Upload(block->Offset(interval.lower()), size, staging_buffer.data());
}
}
- std::vector<MapInterval> GetMapsInRange(VAddr addr, std::size_t size) {
+ VectorMapInterval GetMapsInRange(VAddr addr, std::size_t size) {
+ VectorMapInterval result;
if (size == 0) {
- return {};
+ return result;
}
- std::vector<MapInterval> objects{};
- const IntervalType interval{addr, addr + size};
- for (auto& pair : boost::make_iterator_range(mapped_addresses.equal_range(interval))) {
- objects.push_back(pair.second);
+ const VAddr addr_end = addr + size;
+ auto it = mapped_addresses.lower_bound(addr);
+ if (it != mapped_addresses.begin()) {
+ --it;
}
-
- return objects;
+ while (it != mapped_addresses.end() && it->start < addr_end) {
+ if (it->Overlaps(addr, addr_end)) {
+ result.push_back(&*it);
+ }
+ ++it;
+ }
+ return result;
}
/// Returns a ticks counter used for tracking when cached objects were last modified
@@ -311,24 +420,28 @@ private:
return ++modified_ticks;
}
- void FlushMap(MapInterval map) {
- std::size_t size = map->GetEnd() - map->GetStart();
- TBuffer block = blocks[map->GetStart() >> block_page_bits];
+ void FlushMap(MapInterval* map) {
+ const auto it = blocks.find(map->start >> BLOCK_PAGE_BITS);
+ ASSERT_OR_EXECUTE(it != blocks.end(), return;);
+
+ std::shared_ptr<Buffer> block = it->second;
+
+ const std::size_t size = map->end - map->start;
staging_buffer.resize(size);
- DownloadBlockData(block, block->GetOffset(map->GetStart()), size, staging_buffer.data());
- system.Memory().WriteBlockUnsafe(map->GetStart(), staging_buffer.data(), size);
+ block->Download(block->Offset(map->start), size, staging_buffer.data());
+ cpu_memory.WriteBlockUnsafe(map->start, staging_buffer.data(), size);
map->MarkAsModified(false, 0);
}
- BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size,
- std::size_t alignment) {
+ template <typename Callable>
+ BufferInfo StreamBufferUpload(std::size_t size, std::size_t alignment, Callable&& callable) {
AlignBuffer(alignment);
const std::size_t uploaded_offset = buffer_offset;
- std::memcpy(buffer_ptr, raw_pointer, size);
+ callable(buffer_ptr);
buffer_ptr += size;
buffer_offset += size;
- return {&stream_buffer_handle, uploaded_offset};
+ return BufferInfo{stream_buffer->Handle(), uploaded_offset, stream_buffer->Address()};
}
void AlignBuffer(std::size_t alignment) {
@@ -338,151 +451,148 @@ private:
buffer_offset = offset_aligned;
}
- TBuffer EnlargeBlock(TBuffer buffer) {
- const std::size_t old_size = buffer->GetSize();
- const std::size_t new_size = old_size + block_page_size;
- const VAddr cpu_addr = buffer->GetCpuAddr();
- TBuffer new_buffer = CreateBlock(cpu_addr, new_size);
- CopyBlock(buffer, new_buffer, 0, 0, old_size);
- buffer->SetEpoch(epoch);
- pending_destruction.push_back(buffer);
+ std::shared_ptr<Buffer> EnlargeBlock(std::shared_ptr<Buffer> buffer) {
+ const std::size_t old_size = buffer->Size();
+ const std::size_t new_size = old_size + BLOCK_PAGE_SIZE;
+ const VAddr cpu_addr = buffer->CpuAddr();
+ std::shared_ptr<Buffer> new_buffer = CreateBlock(cpu_addr, new_size);
+ new_buffer->CopyFrom(*buffer, 0, 0, old_size);
+ QueueDestruction(std::move(buffer));
+
const VAddr cpu_addr_end = cpu_addr + new_size - 1;
- u64 page_start = cpu_addr >> block_page_bits;
- const u64 page_end = cpu_addr_end >> block_page_bits;
- while (page_start <= page_end) {
- blocks[page_start] = new_buffer;
- ++page_start;
+ const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS;
+ for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) {
+ blocks.insert_or_assign(page_start, new_buffer);
}
+
return new_buffer;
}
- TBuffer MergeBlocks(TBuffer first, TBuffer second) {
- const std::size_t size_1 = first->GetSize();
- const std::size_t size_2 = second->GetSize();
- const VAddr first_addr = first->GetCpuAddr();
- const VAddr second_addr = second->GetCpuAddr();
+ std::shared_ptr<Buffer> MergeBlocks(std::shared_ptr<Buffer> first,
+ std::shared_ptr<Buffer> second) {
+ const std::size_t size_1 = first->Size();
+ const std::size_t size_2 = second->Size();
+ const VAddr first_addr = first->CpuAddr();
+ const VAddr second_addr = second->CpuAddr();
const VAddr new_addr = std::min(first_addr, second_addr);
const std::size_t new_size = size_1 + size_2;
- TBuffer new_buffer = CreateBlock(new_addr, new_size);
- CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1);
- CopyBlock(second, new_buffer, 0, new_buffer->GetOffset(second_addr), size_2);
- first->SetEpoch(epoch);
- second->SetEpoch(epoch);
- pending_destruction.push_back(first);
- pending_destruction.push_back(second);
+
+ std::shared_ptr<Buffer> new_buffer = CreateBlock(new_addr, new_size);
+ new_buffer->CopyFrom(*first, 0, new_buffer->Offset(first_addr), size_1);
+ new_buffer->CopyFrom(*second, 0, new_buffer->Offset(second_addr), size_2);
+ QueueDestruction(std::move(first));
+ QueueDestruction(std::move(second));
+
const VAddr cpu_addr_end = new_addr + new_size - 1;
- u64 page_start = new_addr >> block_page_bits;
- const u64 page_end = cpu_addr_end >> block_page_bits;
- while (page_start <= page_end) {
- blocks[page_start] = new_buffer;
- ++page_start;
+ const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS;
+ for (u64 page_start = new_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) {
+ blocks.insert_or_assign(page_start, new_buffer);
}
return new_buffer;
}
- TBuffer GetBlock(const VAddr cpu_addr, const std::size_t size) {
- TBuffer found{};
+ Buffer* GetBlock(VAddr cpu_addr, std::size_t size) {
+ std::shared_ptr<Buffer> found;
+
const VAddr cpu_addr_end = cpu_addr + size - 1;
- u64 page_start = cpu_addr >> block_page_bits;
- const u64 page_end = cpu_addr_end >> block_page_bits;
- while (page_start <= page_end) {
+ const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS;
+ for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) {
auto it = blocks.find(page_start);
if (it == blocks.end()) {
if (found) {
found = EnlargeBlock(found);
- } else {
- const VAddr start_addr = (page_start << block_page_bits);
- found = CreateBlock(start_addr, block_page_size);
- blocks[page_start] = found;
- }
- } else {
- if (found) {
- if (found == it->second) {
- ++page_start;
- continue;
- }
- found = MergeBlocks(found, it->second);
- } else {
- found = it->second;
+ continue;
}
+ const VAddr start_addr = page_start << BLOCK_PAGE_BITS;
+ found = CreateBlock(start_addr, BLOCK_PAGE_SIZE);
+ blocks.insert_or_assign(page_start, found);
+ continue;
+ }
+ if (!found) {
+ found = it->second;
+ continue;
+ }
+ if (found != it->second) {
+ found = MergeBlocks(std::move(found), it->second);
}
- ++page_start;
}
- return found;
+ return found.get();
}
- void MarkRegionAsWritten(const VAddr start, const VAddr end) {
- u64 page_start = start >> write_page_bit;
- const u64 page_end = end >> write_page_bit;
- while (page_start <= page_end) {
- auto it = written_pages.find(page_start);
- if (it != written_pages.end()) {
- it->second = it->second + 1;
- } else {
- written_pages[page_start] = 1;
+ void MarkRegionAsWritten(VAddr start, VAddr end) {
+ const u64 page_end = end >> WRITE_PAGE_BIT;
+ for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) {
+ if (const auto [it, inserted] = written_pages.emplace(page_start, 1); !inserted) {
+ ++it->second;
}
- page_start++;
}
}
- void UnmarkRegionAsWritten(const VAddr start, const VAddr end) {
- u64 page_start = start >> write_page_bit;
- const u64 page_end = end >> write_page_bit;
- while (page_start <= page_end) {
+ void UnmarkRegionAsWritten(VAddr start, VAddr end) {
+ const u64 page_end = end >> WRITE_PAGE_BIT;
+ for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) {
auto it = written_pages.find(page_start);
if (it != written_pages.end()) {
if (it->second > 1) {
- it->second = it->second - 1;
+ --it->second;
} else {
written_pages.erase(it);
}
}
- page_start++;
}
}
- bool IsRegionWritten(const VAddr start, const VAddr end) const {
- u64 page_start = start >> write_page_bit;
- const u64 page_end = end >> write_page_bit;
- while (page_start <= page_end) {
+ bool IsRegionWritten(VAddr start, VAddr end) const {
+ const u64 page_end = end >> WRITE_PAGE_BIT;
+ for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) {
if (written_pages.count(page_start) > 0) {
return true;
}
- page_start++;
}
return false;
}
+ void QueueDestruction(std::shared_ptr<Buffer> buffer) {
+ buffer->SetEpoch(epoch);
+ pending_destruction.push(std::move(buffer));
+ }
+
+ void MarkForAsyncFlush(MapInterval* map) {
+ if (!uncommitted_flushes) {
+ uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval*>>();
+ }
+ uncommitted_flushes->insert(map);
+ }
+
VideoCore::RasterizerInterface& rasterizer;
- Core::System& system;
+ Tegra::MemoryManager& gpu_memory;
+ Core::Memory::Memory& cpu_memory;
std::unique_ptr<StreamBuffer> stream_buffer;
- TBufferType stream_buffer_handle{};
-
- bool invalidated = false;
+ BufferType stream_buffer_handle;
u8* buffer_ptr = nullptr;
u64 buffer_offset = 0;
u64 buffer_offset_base = 0;
- using IntervalSet = boost::icl::interval_set<VAddr>;
- using IntervalCache = boost::icl::interval_map<VAddr, MapInterval>;
- using IntervalType = typename IntervalCache::interval_type;
- IntervalCache mapped_addresses;
+ MapIntervalAllocator mapped_addresses_allocator;
+ boost::intrusive::set<MapInterval, boost::intrusive::compare<MapIntervalCompare>>
+ mapped_addresses;
- static constexpr u64 write_page_bit = 11;
std::unordered_map<u64, u32> written_pages;
+ std::unordered_map<u64, std::shared_ptr<Buffer>> blocks;
- static constexpr u64 block_page_bits = 21;
- static constexpr u64 block_page_size = 1ULL << block_page_bits;
- std::unordered_map<u64, TBuffer> blocks;
-
- std::list<TBuffer> pending_destruction;
+ std::queue<std::shared_ptr<Buffer>> pending_destruction;
u64 epoch = 0;
u64 modified_ticks = 0;
std::vector<u8> staging_buffer;
+ std::list<MapInterval*> marked_for_unregister;
+
+ std::shared_ptr<std::unordered_set<MapInterval*>> uncommitted_flushes;
+ std::list<std::shared_ptr<std::list<MapInterval*>>> committed_flushes;
+
std::recursive_mutex mutex;
};
diff --git a/src/video_core/buffer_cache/map_interval.cpp b/src/video_core/buffer_cache/map_interval.cpp
new file mode 100644
index 000000000..62587e18a
--- /dev/null
+++ b/src/video_core/buffer_cache/map_interval.cpp
@@ -0,0 +1,33 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <cstddef>
+#include <memory>
+
+#include "video_core/buffer_cache/map_interval.h"
+
+namespace VideoCommon {
+
+MapIntervalAllocator::MapIntervalAllocator() {
+ FillFreeList(first_chunk);
+}
+
+MapIntervalAllocator::~MapIntervalAllocator() = default;
+
+void MapIntervalAllocator::AllocateNewChunk() {
+ *new_chunk = std::make_unique<Chunk>();
+ FillFreeList(**new_chunk);
+ new_chunk = &(*new_chunk)->next;
+}
+
+void MapIntervalAllocator::FillFreeList(Chunk& chunk) {
+ const std::size_t old_size = free_list.size();
+ free_list.resize(old_size + chunk.data.size());
+ std::transform(chunk.data.rbegin(), chunk.data.rend(), free_list.begin() + old_size,
+ [](MapInterval& interval) { return &interval; });
+}
+
+} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h
index b0956029d..fe0bcd1d8 100644
--- a/src/video_core/buffer_cache/map_interval.h
+++ b/src/video_core/buffer_cache/map_interval.h
@@ -4,86 +4,89 @@
#pragma once
+#include <array>
+#include <cstddef>
+#include <memory>
+#include <vector>
+
+#include <boost/intrusive/set_hook.hpp>
+
#include "common/common_types.h"
#include "video_core/gpu.h"
namespace VideoCommon {
-class MapIntervalBase {
-public:
- MapIntervalBase(const VAddr start, const VAddr end, const GPUVAddr gpu_addr)
- : start{start}, end{end}, gpu_addr{gpu_addr} {}
+struct MapInterval : public boost::intrusive::set_base_hook<boost::intrusive::optimize_size<true>> {
+ MapInterval() = default;
- void SetCpuAddress(VAddr new_cpu_addr) {
- cpu_addr = new_cpu_addr;
- }
+ /*implicit*/ MapInterval(VAddr start_) noexcept : start{start_} {}
- VAddr GetCpuAddress() const {
- return cpu_addr;
- }
+ explicit MapInterval(VAddr start_, VAddr end_, GPUVAddr gpu_addr_) noexcept
+ : start{start_}, end{end_}, gpu_addr{gpu_addr_} {}
- GPUVAddr GetGpuAddress() const {
- return gpu_addr;
+ bool IsInside(VAddr other_start, VAddr other_end) const noexcept {
+ return start <= other_start && other_end <= end;
}
- bool IsInside(const VAddr other_start, const VAddr other_end) const {
- return (start <= other_start && other_end <= end);
+ bool Overlaps(VAddr other_start, VAddr other_end) const noexcept {
+ return start < other_end && other_start < end;
}
- bool operator==(const MapIntervalBase& rhs) const {
- return std::tie(start, end) == std::tie(rhs.start, rhs.end);
- }
-
- bool operator!=(const MapIntervalBase& rhs) const {
- return !operator==(rhs);
- }
-
- void MarkAsRegistered(const bool registered) {
- is_registered = registered;
+ void MarkAsModified(bool is_modified_, u64 ticks_) noexcept {
+ is_modified = is_modified_;
+ ticks = ticks_;
}
- bool IsRegistered() const {
- return is_registered;
- }
+ boost::intrusive::set_member_hook<> member_hook_;
+ VAddr start = 0;
+ VAddr end = 0;
+ GPUVAddr gpu_addr = 0;
+ u64 ticks = 0;
+ bool is_written = false;
+ bool is_modified = false;
+ bool is_registered = false;
+ bool is_memory_marked = false;
+ bool is_sync_pending = false;
+};
- VAddr GetStart() const {
- return start;
+struct MapIntervalCompare {
+ constexpr bool operator()(const MapInterval& lhs, const MapInterval& rhs) const noexcept {
+ return lhs.start < rhs.start;
}
+};
- VAddr GetEnd() const {
- return end;
+class MapIntervalAllocator {
+public:
+ MapIntervalAllocator();
+ ~MapIntervalAllocator();
+
+ MapInterval* Allocate() {
+ if (free_list.empty()) {
+ AllocateNewChunk();
+ }
+ MapInterval* const interval = free_list.back();
+ free_list.pop_back();
+ return interval;
}
- void MarkAsModified(const bool is_modified_, const u64 tick) {
- is_modified = is_modified_;
- ticks = tick;
+ void Release(MapInterval* interval) {
+ free_list.push_back(interval);
}
- bool IsModified() const {
- return is_modified;
- }
+private:
+ struct Chunk {
+ std::unique_ptr<Chunk> next;
+ std::array<MapInterval, 0x8000> data;
+ };
- u64 GetModificationTick() const {
- return ticks;
- }
+ void AllocateNewChunk();
- void MarkAsWritten(const bool is_written_) {
- is_written = is_written_;
- }
+ void FillFreeList(Chunk& chunk);
- bool IsWritten() const {
- return is_written;
- }
+ std::vector<MapInterval*> free_list;
+ std::unique_ptr<Chunk>* new_chunk = &first_chunk.next;
-private:
- VAddr start;
- VAddr end;
- GPUVAddr gpu_addr;
- VAddr cpu_addr{};
- bool is_written{};
- bool is_modified{};
- bool is_registered{};
- u64 ticks{};
+ Chunk first_chunk;
};
} // namespace VideoCommon