From 3b2dee88e67eaa968fd0dba19d10871e0b6570b9 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 27 May 2020 23:05:50 -0300 Subject: buffer_cache: Avoid copying twice on certain cases Avoid copying to a staging buffer on non-granular memory addresses. Add a callable argument to StreamBufferUpload to be able to copy to the staging buffer directly from ReadBlockUnsafe. --- src/video_core/buffer_cache/buffer_cache.h | 40 +++++++++++++++++------------- 1 file changed, 23 insertions(+), 17 deletions(-) (limited to 'src/video_core/buffer_cache') diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index d9a4a1b4d..b88fce2cd 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -56,24 +56,28 @@ public: if (use_fast_cbuf || size < max_stream_size) { if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) { auto& memory_manager = system.GPU().MemoryManager(); + const bool is_granular = memory_manager.IsGranularRange(gpu_addr, size); if (use_fast_cbuf) { - if (memory_manager.IsGranularRange(gpu_addr, size)) { - const auto host_ptr = memory_manager.GetPointer(gpu_addr); - return ConstBufferUpload(host_ptr, size); + u8* dest; + if (is_granular) { + dest = memory_manager.GetPointer(gpu_addr); } else { staging_buffer.resize(size); - memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); - return ConstBufferUpload(staging_buffer.data(), size); + dest = staging_buffer.data(); + memory_manager.ReadBlockUnsafe(gpu_addr, dest, size); } + return ConstBufferUpload(dest, size); + } + if (is_granular) { + u8* const host_ptr = memory_manager.GetPointer(gpu_addr); + return StreamBufferUpload(size, alignment, [host_ptr, size](u8* dest) { + std::memcpy(dest, host_ptr, size); + }); } else { - if (memory_manager.IsGranularRange(gpu_addr, size)) { - const auto host_ptr = memory_manager.GetPointer(gpu_addr); - return StreamBufferUpload(host_ptr, size, alignment); - } else { - staging_buffer.resize(size); - memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); - return StreamBufferUpload(staging_buffer.data(), size, alignment); - } + return StreamBufferUpload( + size, alignment, [&memory_manager, gpu_addr, size](u8* dest) { + memory_manager.ReadBlockUnsafe(gpu_addr, dest, size); + }); } } } @@ -101,7 +105,9 @@ public: BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size, std::size_t alignment = 4) { std::lock_guard lock{mutex}; - return StreamBufferUpload(raw_pointer, size, alignment); + return StreamBufferUpload(size, alignment, [raw_pointer, size](u8* dest) { + std::memcpy(dest, raw_pointer, size); + }); } void Map(std::size_t max_size) { @@ -424,11 +430,11 @@ private: map->MarkAsModified(false, 0); } - BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size, - std::size_t alignment) { + template + BufferInfo StreamBufferUpload(std::size_t size, std::size_t alignment, Callable&& callable) { AlignBuffer(alignment); const std::size_t uploaded_offset = buffer_offset; - std::memcpy(buffer_ptr, raw_pointer, size); + callable(buffer_ptr); buffer_ptr += size; buffer_offset += size; -- cgit v1.2.3 From 6e122f0b2c3fe6a2cccd15256a04bcd4342c86f7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 8 Jun 2020 20:22:31 -0300 Subject: buffer_cache: Return stream buffer invalidation in Map instead of Unmap We have to invalidate whatever cache is being used before uploading the data, hence it makes more sense to return this on Map instead of Unmap. --- src/video_core/buffer_cache/buffer_cache.h | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'src/video_core/buffer_cache') diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index b88fce2cd..77ae34339 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -110,19 +110,23 @@ public: }); } - void Map(std::size_t max_size) { + /// Prepares the buffer cache for data uploading + /// @param max_size Maximum number of bytes that will be uploaded + /// @return True when a stream buffer invalidation was required, false otherwise + bool Map(std::size_t max_size) { std::lock_guard lock{mutex}; + bool invalidated; std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4); buffer_offset = buffer_offset_base; + + return invalidated; } - /// Finishes the upload stream, returns true on bindings invalidation. - bool Unmap() { + /// Finishes the upload stream + void Unmap() { std::lock_guard lock{mutex}; - stream_buffer->Unmap(buffer_offset - buffer_offset_base); - return std::exchange(invalidated, false); } void TickFrame() { @@ -576,8 +580,6 @@ private: std::unique_ptr stream_buffer; BufferType stream_buffer_handle{}; - bool invalidated = false; - u8* buffer_ptr = nullptr; u64 buffer_offset = 0; u64 buffer_offset_base = 0; -- cgit v1.2.3 From 6508cdd00351e51c7d5867c00da60781c133ade8 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 9 Jun 2020 18:27:59 -0300 Subject: buffer_cache: Avoid passing references of shared pointers and misc style changes Instead of using as template argument a shared pointer, use the underlying type and manage shared pointers explicitly. This can make removing shared pointers from the cache more easy. While we are at it, make some misc style changes and general improvements (like insert_or_assign instead of operator[] + operator=). --- src/video_core/buffer_cache/buffer_block.h | 27 ++- src/video_core/buffer_cache/buffer_cache.h | 199 ++++++++++----------- src/video_core/renderer_opengl/gl_buffer_cache.cpp | 21 +-- src/video_core/renderer_opengl/gl_buffer_cache.h | 18 +- .../renderer_opengl/gl_stream_buffer.cpp | 8 - src/video_core/renderer_opengl/gl_stream_buffer.h | 11 +- src/video_core/renderer_vulkan/vk_buffer_cache.cpp | 22 +-- src/video_core/renderer_vulkan/vk_buffer_cache.h | 16 +- src/video_core/renderer_vulkan/vk_stream_buffer.h | 2 +- 9 files changed, 150 insertions(+), 174 deletions(-) (limited to 'src/video_core/buffer_cache') diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h index e35ee0b67..e64170e66 100644 --- a/src/video_core/buffer_cache/buffer_block.h +++ b/src/video_core/buffer_cache/buffer_block.h @@ -15,48 +15,47 @@ namespace VideoCommon { class BufferBlock { public: - bool Overlaps(const VAddr start, const VAddr end) const { + bool Overlaps(VAddr start, VAddr end) const { return (cpu_addr < end) && (cpu_addr_end > start); } - bool IsInside(const VAddr other_start, const VAddr other_end) const { + bool IsInside(VAddr other_start, VAddr other_end) const { return cpu_addr <= other_start && other_end <= cpu_addr_end; } - std::size_t GetOffset(const VAddr in_addr) { + std::size_t Offset(VAddr in_addr) const { return static_cast(in_addr - cpu_addr); } - VAddr GetCpuAddr() const { + VAddr CpuAddr() const { return cpu_addr; } - VAddr GetCpuAddrEnd() const { + VAddr CpuAddrEnd() const { return cpu_addr_end; } - void SetCpuAddr(const VAddr new_addr) { + void SetCpuAddr(VAddr new_addr) { cpu_addr = new_addr; cpu_addr_end = new_addr + size; } - std::size_t GetSize() const { + std::size_t Size() const { return size; } - void SetEpoch(u64 new_epoch) { - epoch = new_epoch; + u64 Epoch() const { + return epoch; } - u64 GetEpoch() { - return epoch; + void SetEpoch(u64 new_epoch) { + epoch = new_epoch; } protected: - explicit BufferBlock(VAddr cpu_addr, const std::size_t size) : size{size} { - SetCpuAddr(cpu_addr); + explicit BufferBlock(VAddr cpu_addr_, std::size_t size_) : size{size_} { + SetCpuAddr(cpu_addr_); } - ~BufferBlock() = default; private: VAddr cpu_addr{}; diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index b88fce2cd..efc480d08 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -30,12 +30,16 @@ namespace VideoCommon { -template +template class BufferCache { using IntervalSet = boost::icl::interval_set; using IntervalType = typename IntervalSet::interval_type; using VectorMapInterval = boost::container::small_vector; + static constexpr u64 WRITE_PAGE_BIT = 11; + static constexpr u64 BLOCK_PAGE_BITS = 21; + static constexpr u64 BLOCK_PAGE_SIZE = 1ULL << BLOCK_PAGE_BITS; + public: using BufferInfo = std::pair; @@ -82,7 +86,7 @@ public: } } - OwnerBuffer block = GetBlock(cpu_addr, size); + Buffer* const block = GetBlock(cpu_addr, size); MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size); if (!map) { return {GetEmptyBuffer(size), 0}; @@ -98,7 +102,7 @@ public: } } - return {ToHandle(block), static_cast(block->GetOffset(cpu_addr))}; + return {block->Handle(), static_cast(block->Offset(cpu_addr))}; } /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. @@ -125,16 +129,18 @@ public: return std::exchange(invalidated, false); } + /// Function called at the end of each frame, inteded for deferred operations void TickFrame() { ++epoch; + while (!pending_destruction.empty()) { // Delay at least 4 frames before destruction. // This is due to triple buffering happening on some drivers. static constexpr u64 epochs_to_destroy = 5; - if (pending_destruction.front()->GetEpoch() + epochs_to_destroy > epoch) { + if (pending_destruction.front()->Epoch() + epochs_to_destroy > epoch) { break; } - pending_destruction.pop_front(); + pending_destruction.pop(); } } @@ -249,23 +255,21 @@ public: protected: explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, - std::unique_ptr stream_buffer) - : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)}, - stream_buffer_handle{this->stream_buffer->GetHandle()} {} + std::unique_ptr stream_buffer_) + : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer_)}, + stream_buffer_handle{stream_buffer->Handle()} {} ~BufferCache() = default; - virtual BufferType ToHandle(const OwnerBuffer& storage) = 0; + virtual std::shared_ptr CreateBlock(VAddr cpu_addr, std::size_t size) = 0; - virtual OwnerBuffer CreateBlock(VAddr cpu_addr, std::size_t size) = 0; - - virtual void UploadBlockData(const OwnerBuffer& buffer, std::size_t offset, std::size_t size, + virtual void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, const u8* data) = 0; - virtual void DownloadBlockData(const OwnerBuffer& buffer, std::size_t offset, std::size_t size, + virtual void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, u8* data) = 0; - virtual void CopyBlock(const OwnerBuffer& src, const OwnerBuffer& dst, std::size_t src_offset, + virtual void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, std::size_t dst_offset, std::size_t size) = 0; virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) { @@ -321,7 +325,7 @@ protected: } private: - MapInterval* MapAddress(const OwnerBuffer& block, GPUVAddr gpu_addr, VAddr cpu_addr, + MapInterval* MapAddress(const Buffer* block, GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size) { const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size); if (overlaps.empty()) { @@ -329,11 +333,11 @@ private: const VAddr cpu_addr_end = cpu_addr + size; if (memory_manager.IsGranularRange(gpu_addr, size)) { u8* host_ptr = memory_manager.GetPointer(gpu_addr); - UploadBlockData(block, block->GetOffset(cpu_addr), size, host_ptr); + UploadBlockData(*block, block->Offset(cpu_addr), size, host_ptr); } else { staging_buffer.resize(size); memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); - UploadBlockData(block, block->GetOffset(cpu_addr), size, staging_buffer.data()); + UploadBlockData(*block, block->Offset(cpu_addr), size, staging_buffer.data()); } return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr)); } @@ -376,7 +380,7 @@ private: return map; } - void UpdateBlock(const OwnerBuffer& block, VAddr start, VAddr end, + void UpdateBlock(const Buffer* block, VAddr start, VAddr end, const VectorMapInterval& overlaps) { const IntervalType base_interval{start, end}; IntervalSet interval_set{}; @@ -386,13 +390,13 @@ private: interval_set.subtract(subtract); } for (auto& interval : interval_set) { - std::size_t size = interval.upper() - interval.lower(); - if (size > 0) { - staging_buffer.resize(size); - system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size); - UploadBlockData(block, block->GetOffset(interval.lower()), size, - staging_buffer.data()); + const std::size_t size = interval.upper() - interval.lower(); + if (size == 0) { + continue; } + staging_buffer.resize(size); + system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size); + UploadBlockData(*block, block->Offset(interval.lower()), size, staging_buffer.data()); } } @@ -422,10 +426,14 @@ private: } void FlushMap(MapInterval* map) { + const auto it = blocks.find(map->start >> BLOCK_PAGE_BITS); + ASSERT_OR_EXECUTE(it != blocks.end(), return;); + + std::shared_ptr block = it->second; + const std::size_t size = map->end - map->start; - OwnerBuffer block = blocks[map->start >> block_page_bits]; staging_buffer.resize(size); - DownloadBlockData(block, block->GetOffset(map->start), size, staging_buffer.data()); + DownloadBlockData(*block, block->Offset(map->start), size, staging_buffer.data()); system.Memory().WriteBlockUnsafe(map->start, staging_buffer.data(), size); map->MarkAsModified(false, 0); } @@ -448,97 +456,89 @@ private: buffer_offset = offset_aligned; } - OwnerBuffer EnlargeBlock(OwnerBuffer buffer) { - const std::size_t old_size = buffer->GetSize(); - const std::size_t new_size = old_size + block_page_size; - const VAddr cpu_addr = buffer->GetCpuAddr(); - OwnerBuffer new_buffer = CreateBlock(cpu_addr, new_size); - CopyBlock(buffer, new_buffer, 0, 0, old_size); - buffer->SetEpoch(epoch); - pending_destruction.push_back(buffer); + std::shared_ptr EnlargeBlock(std::shared_ptr buffer) { + const std::size_t old_size = buffer->Size(); + const std::size_t new_size = old_size + BLOCK_PAGE_SIZE; + const VAddr cpu_addr = buffer->CpuAddr(); + std::shared_ptr new_buffer = CreateBlock(cpu_addr, new_size); + CopyBlock(*buffer, *new_buffer, 0, 0, old_size); + QueueDestruction(std::move(buffer)); + const VAddr cpu_addr_end = cpu_addr + new_size - 1; - u64 page_start = cpu_addr >> block_page_bits; - const u64 page_end = cpu_addr_end >> block_page_bits; - while (page_start <= page_end) { - blocks[page_start] = new_buffer; - ++page_start; + const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS; + for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) { + blocks.insert_or_assign(page_start, new_buffer); } + return new_buffer; } - OwnerBuffer MergeBlocks(OwnerBuffer first, OwnerBuffer second) { - const std::size_t size_1 = first->GetSize(); - const std::size_t size_2 = second->GetSize(); - const VAddr first_addr = first->GetCpuAddr(); - const VAddr second_addr = second->GetCpuAddr(); + std::shared_ptr MergeBlocks(std::shared_ptr first, + std::shared_ptr second) { + const std::size_t size_1 = first->Size(); + const std::size_t size_2 = second->Size(); + const VAddr first_addr = first->CpuAddr(); + const VAddr second_addr = second->CpuAddr(); const VAddr new_addr = std::min(first_addr, second_addr); const std::size_t new_size = size_1 + size_2; - OwnerBuffer new_buffer = CreateBlock(new_addr, new_size); - CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1); - CopyBlock(second, new_buffer, 0, new_buffer->GetOffset(second_addr), size_2); - first->SetEpoch(epoch); - second->SetEpoch(epoch); - pending_destruction.push_back(first); - pending_destruction.push_back(second); + + std::shared_ptr new_buffer = CreateBlock(new_addr, new_size); + CopyBlock(*first, *new_buffer, 0, new_buffer->Offset(first_addr), size_1); + CopyBlock(*second, *new_buffer, 0, new_buffer->Offset(second_addr), size_2); + QueueDestruction(std::move(first)); + QueueDestruction(std::move(second)); + const VAddr cpu_addr_end = new_addr + new_size - 1; - u64 page_start = new_addr >> block_page_bits; - const u64 page_end = cpu_addr_end >> block_page_bits; - while (page_start <= page_end) { - blocks[page_start] = new_buffer; - ++page_start; + const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS; + for (u64 page_start = new_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) { + blocks.insert_or_assign(page_start, new_buffer); } return new_buffer; } - OwnerBuffer GetBlock(const VAddr cpu_addr, const std::size_t size) { - OwnerBuffer found; + Buffer* GetBlock(VAddr cpu_addr, std::size_t size) { + std::shared_ptr found; + const VAddr cpu_addr_end = cpu_addr + size - 1; - u64 page_start = cpu_addr >> block_page_bits; - const u64 page_end = cpu_addr_end >> block_page_bits; - while (page_start <= page_end) { + const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS; + for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) { auto it = blocks.find(page_start); if (it == blocks.end()) { if (found) { found = EnlargeBlock(found); - } else { - const VAddr start_addr = (page_start << block_page_bits); - found = CreateBlock(start_addr, block_page_size); - blocks[page_start] = found; - } - } else { - if (found) { - if (found == it->second) { - ++page_start; - continue; - } - found = MergeBlocks(found, it->second); - } else { - found = it->second; + continue; } + const VAddr start_addr = page_start << BLOCK_PAGE_BITS; + found = CreateBlock(start_addr, BLOCK_PAGE_SIZE); + blocks.insert_or_assign(page_start, found); + continue; + } + if (!found) { + found = it->second; + continue; + } + if (found != it->second) { + found = MergeBlocks(std::move(found), it->second); } - ++page_start; } - return found; + return found.get(); } - void MarkRegionAsWritten(const VAddr start, const VAddr end) { - u64 page_start = start >> write_page_bit; - const u64 page_end = end >> write_page_bit; - while (page_start <= page_end) { + void MarkRegionAsWritten(VAddr start, VAddr end) { + const u64 page_end = end >> WRITE_PAGE_BIT; + for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) { auto it = written_pages.find(page_start); if (it != written_pages.end()) { it->second = it->second + 1; } else { - written_pages[page_start] = 1; + written_pages.insert_or_assign(page_start, 1); } - ++page_start; } } - void UnmarkRegionAsWritten(const VAddr start, const VAddr end) { - u64 page_start = start >> write_page_bit; - const u64 page_end = end >> write_page_bit; - while (page_start <= page_end) { + void UnmarkRegionAsWritten(VAddr start, VAddr end) { + const u64 page_end = end >> WRITE_PAGE_BIT; + for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) { auto it = written_pages.find(page_start); if (it != written_pages.end()) { if (it->second > 1) { @@ -547,22 +547,24 @@ private: written_pages.erase(it); } } - ++page_start; } } - bool IsRegionWritten(const VAddr start, const VAddr end) const { - u64 page_start = start >> write_page_bit; - const u64 page_end = end >> write_page_bit; - while (page_start <= page_end) { + bool IsRegionWritten(VAddr start, VAddr end) const { + const u64 page_end = end >> WRITE_PAGE_BIT; + for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) { if (written_pages.count(page_start) > 0) { return true; } - ++page_start; } return false; } + void QueueDestruction(std::shared_ptr buffer) { + buffer->SetEpoch(epoch); + pending_destruction.push(std::move(buffer)); + } + void MarkForAsyncFlush(MapInterval* map) { if (!uncommitted_flushes) { uncommitted_flushes = std::make_shared>(); @@ -574,7 +576,7 @@ private: Core::System& system; std::unique_ptr stream_buffer; - BufferType stream_buffer_handle{}; + BufferType stream_buffer_handle; bool invalidated = false; @@ -586,18 +588,15 @@ private: boost::intrusive::set> mapped_addresses; - static constexpr u64 write_page_bit = 11; std::unordered_map written_pages; + std::unordered_map> blocks; - static constexpr u64 block_page_bits = 21; - static constexpr u64 block_page_size = 1ULL << block_page_bits; - std::unordered_map blocks; - - std::list pending_destruction; + std::queue> pending_destruction; u64 epoch = 0; u64 modified_ticks = 0; std::vector staging_buffer; + std::list marked_for_unregister; std::shared_ptr> uncommitted_flushes; diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 9964ea894..ad0577a4f 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -22,13 +22,12 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs; MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); -CachedBufferBlock::CachedBufferBlock(VAddr cpu_addr, const std::size_t size) - : VideoCommon::BufferBlock{cpu_addr, size} { +Buffer::Buffer(VAddr cpu_addr, const std::size_t size) : VideoCommon::BufferBlock{cpu_addr, size} { gl_buffer.Create(); glNamedBufferData(gl_buffer.handle, static_cast(size), nullptr, GL_DYNAMIC_DRAW); } -CachedBufferBlock::~CachedBufferBlock() = default; +Buffer::~Buffer() = default; OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, const Device& device, std::size_t stream_size) @@ -48,12 +47,8 @@ OGLBufferCache::~OGLBufferCache() { glDeleteBuffers(static_cast(std::size(cbufs)), std::data(cbufs)); } -Buffer OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { - return std::make_shared(cpu_addr, size); -} - -GLuint OGLBufferCache::ToHandle(const Buffer& buffer) { - return buffer->GetHandle(); +std::shared_ptr OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { + return std::make_shared(cpu_addr, size); } GLuint OGLBufferCache::GetEmptyBuffer(std::size_t) { @@ -62,7 +57,7 @@ GLuint OGLBufferCache::GetEmptyBuffer(std::size_t) { void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, const u8* data) { - glNamedBufferSubData(buffer->GetHandle(), static_cast(offset), + glNamedBufferSubData(buffer.Handle(), static_cast(offset), static_cast(size), data); } @@ -70,20 +65,20 @@ void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, u8* data) { MICROPROFILE_SCOPE(OpenGL_Buffer_Download); glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); - glGetNamedBufferSubData(buffer->GetHandle(), static_cast(offset), + glGetNamedBufferSubData(buffer.Handle(), static_cast(offset), static_cast(size), data); } void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, std::size_t dst_offset, std::size_t size) { - glCopyNamedBufferSubData(src->GetHandle(), dst->GetHandle(), static_cast(src_offset), + glCopyNamedBufferSubData(src.Handle(), dst.Handle(), static_cast(src_offset), static_cast(dst_offset), static_cast(size)); } OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer, std::size_t size) { DEBUG_ASSERT(cbuf_cursor < std::size(cbufs)); - const GLuint& cbuf = cbufs[cbuf_cursor++]; + const GLuint cbuf = cbufs[cbuf_cursor++]; glNamedBufferSubData(cbuf, 0, static_cast(size), raw_pointer); return {cbuf, 0}; } diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index a9e86cfc7..7168312b1 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -24,17 +24,12 @@ class Device; class OGLStreamBuffer; class RasterizerOpenGL; -class CachedBufferBlock; - -using Buffer = std::shared_ptr; -using GenericBufferCache = VideoCommon::BufferCache; - -class CachedBufferBlock : public VideoCommon::BufferBlock { +class Buffer : public VideoCommon::BufferBlock { public: - explicit CachedBufferBlock(VAddr cpu_addr, const std::size_t size); - ~CachedBufferBlock(); + explicit Buffer(VAddr cpu_addr, const std::size_t size); + ~Buffer(); - GLuint GetHandle() const { + GLuint Handle() const { return gl_buffer.handle; } @@ -42,6 +37,7 @@ private: OGLBuffer gl_buffer; }; +using GenericBufferCache = VideoCommon::BufferCache; class OGLBufferCache final : public GenericBufferCache { public: explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, @@ -55,9 +51,7 @@ public: } protected: - Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override; - - GLuint ToHandle(const Buffer& buffer) override; + std::shared_ptr CreateBlock(VAddr cpu_addr, std::size_t size) override; void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, const u8* data) override; diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index 6ec328c53..932a2f69e 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp @@ -49,14 +49,6 @@ OGLStreamBuffer::~OGLStreamBuffer() { gl_buffer.Release(); } -GLuint OGLStreamBuffer::GetHandle() const { - return gl_buffer.handle; -} - -GLsizeiptr OGLStreamBuffer::GetSize() const { - return buffer_size; -} - std::tuple OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) { ASSERT(size <= buffer_size); ASSERT(alignment <= buffer_size); diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h index f8383cbd4..866da3594 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.h +++ b/src/video_core/renderer_opengl/gl_stream_buffer.h @@ -17,9 +17,6 @@ public: bool use_persistent = true); ~OGLStreamBuffer(); - GLuint GetHandle() const; - GLsizeiptr GetSize() const; - /* * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes * and the optional alignment requirement. @@ -32,6 +29,14 @@ public: void Unmap(GLsizeiptr size); + GLuint Handle() const { + return gl_buffer.handle; + } + + GLsizeiptr Size() const { + return buffer_size; + } + private: OGLBuffer gl_buffer; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 5f33d9e40..1fde38328 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -37,8 +37,8 @@ std::unique_ptr CreateStreamBuffer(const VKDevice& device, VKSch } // Anonymous namespace -CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, - VAddr cpu_addr, std::size_t size) +Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VAddr cpu_addr, + std::size_t size) : VideoCommon::BufferBlock{cpu_addr, size} { VkBufferCreateInfo ci; ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; @@ -54,7 +54,7 @@ CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& me buffer.commit = memory_manager.Commit(buffer.handle, false); } -CachedBufferBlock::~CachedBufferBlock() = default; +Buffer::~Buffer() = default; VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, const VKDevice& device, VKMemoryManager& memory_manager, @@ -67,12 +67,8 @@ VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::S VKBufferCache::~VKBufferCache() = default; -Buffer VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { - return std::make_shared(device, memory_manager, cpu_addr, size); -} - -VkBuffer VKBufferCache::ToHandle(const Buffer& buffer) { - return buffer->GetHandle(); +std::shared_ptr VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { + return std::make_shared(device, memory_manager, cpu_addr, size); } VkBuffer VKBufferCache::GetEmptyBuffer(std::size_t size) { @@ -91,7 +87,7 @@ void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, st std::memcpy(staging.commit->Map(size), data, size); scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([staging = *staging.handle, buffer = buffer->GetHandle(), offset, + scheduler.Record([staging = *staging.handle, buffer = buffer.Handle(), offset, size](vk::CommandBuffer cmdbuf) { cmdbuf.CopyBuffer(staging, buffer, VkBufferCopy{0, offset, size}); @@ -114,7 +110,7 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, u8* data) { const auto& staging = staging_pool.GetUnusedBuffer(size, true); scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([staging = *staging.handle, buffer = buffer->GetHandle(), offset, + scheduler.Record([staging = *staging.handle, buffer = buffer.Handle(), offset, size](vk::CommandBuffer cmdbuf) { VkBufferMemoryBarrier barrier; barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; @@ -141,8 +137,8 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, std::size_t dst_offset, std::size_t size) { scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([src_buffer = src->GetHandle(), dst_buffer = dst->GetHandle(), src_offset, - dst_offset, size](vk::CommandBuffer cmdbuf) { + scheduler.Record([src_buffer = src.Handle(), dst_buffer = dst.Handle(), src_offset, dst_offset, + size](vk::CommandBuffer cmdbuf) { cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, size}); std::array barriers; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index a54583e7d..cb9734c10 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -24,13 +24,13 @@ class VKDevice; class VKMemoryManager; class VKScheduler; -class CachedBufferBlock final : public VideoCommon::BufferBlock { +class Buffer final : public VideoCommon::BufferBlock { public: - explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, - VAddr cpu_addr, std::size_t size); - ~CachedBufferBlock(); + explicit Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VAddr cpu_addr, + std::size_t size); + ~Buffer(); - VkBuffer GetHandle() const { + VkBuffer Handle() const { return *buffer.handle; } @@ -38,8 +38,6 @@ private: VKBuffer buffer; }; -using Buffer = std::shared_ptr; - class VKBufferCache final : public VideoCommon::BufferCache { public: explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, @@ -50,9 +48,7 @@ public: VkBuffer GetEmptyBuffer(std::size_t size) override; protected: - VkBuffer ToHandle(const Buffer& buffer) override; - - Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override; + std::shared_ptr CreateBlock(VAddr cpu_addr, std::size_t size) override; void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, const u8* data) override; diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h index dfddf7ad6..c765c60a0 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.h +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h @@ -35,7 +35,7 @@ public: /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. void Unmap(u64 size); - VkBuffer GetHandle() const { + VkBuffer Handle() const { return *buffer; } -- cgit v1.2.3 From 4514b80b3eedff01e994f225ea3d2da292c23e01 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Fri, 19 Jun 2020 21:55:00 -0400 Subject: buffer_cache: Eliminate local variable shadowing We can just make use of the instance in the scope above this one. --- src/video_core/buffer_cache/buffer_cache.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/video_core/buffer_cache') diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 308d8b55f..bae1d527c 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -47,7 +47,7 @@ public: bool is_written = false, bool use_fast_cbuf = false) { std::lock_guard lock{mutex}; - const auto& memory_manager = system.GPU().MemoryManager(); + auto& memory_manager = system.GPU().MemoryManager(); const std::optional cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr); if (!cpu_addr_opt) { return {GetEmptyBuffer(size), 0}; @@ -59,7 +59,6 @@ public: constexpr std::size_t max_stream_size = 0x800; if (use_fast_cbuf || size < max_stream_size) { if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) { - auto& memory_manager = system.GPU().MemoryManager(); const bool is_granular = memory_manager.IsGranularRange(gpu_addr, size); if (use_fast_cbuf) { u8* dest; -- cgit v1.2.3 From 32485917ba7cb7b2f0cad766c0897365294650a7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 11 May 2020 16:35:04 -0300 Subject: gl_buffer_cache: Mark buffers as resident Make stream buffer and cached buffers as resident and query their address. This allows us to use GPU addresses for several proprietary Nvidia extensions. --- src/video_core/buffer_cache/buffer_cache.h | 21 ++++++----- src/video_core/renderer_opengl/gl_buffer_cache.cpp | 24 ++++++++---- src/video_core/renderer_opengl/gl_buffer_cache.h | 20 +++++++--- src/video_core/renderer_opengl/gl_rasterizer.cpp | 44 +++++++++++----------- .../renderer_opengl/gl_stream_buffer.cpp | 11 +++++- src/video_core/renderer_opengl/gl_stream_buffer.h | 11 +++++- src/video_core/renderer_vulkan/vk_buffer_cache.cpp | 4 +- src/video_core/renderer_vulkan/vk_buffer_cache.h | 6 ++- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 31 ++++++++------- src/video_core/renderer_vulkan/vk_stream_buffer.h | 6 ++- 10 files changed, 111 insertions(+), 67 deletions(-) (limited to 'src/video_core/buffer_cache') diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index bae1d527c..6ea59253a 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -41,7 +41,11 @@ class BufferCache { static constexpr u64 BLOCK_PAGE_SIZE = 1ULL << BLOCK_PAGE_BITS; public: - using BufferInfo = std::pair; + struct BufferInfo { + BufferType handle; + u64 offset; + u64 address; + }; BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, bool is_written = false, bool use_fast_cbuf = false) { @@ -50,7 +54,7 @@ public: auto& memory_manager = system.GPU().MemoryManager(); const std::optional cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr); if (!cpu_addr_opt) { - return {GetEmptyBuffer(size), 0}; + return GetEmptyBuffer(size); } const VAddr cpu_addr = *cpu_addr_opt; @@ -88,7 +92,7 @@ public: Buffer* const block = GetBlock(cpu_addr, size); MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size); if (!map) { - return {GetEmptyBuffer(size), 0}; + return GetEmptyBuffer(size); } if (is_written) { map->MarkAsModified(true, GetModifiedTicks()); @@ -101,7 +105,7 @@ public: } } - return {block->Handle(), static_cast(block->Offset(cpu_addr))}; + return BufferInfo{block->Handle(), block->Offset(cpu_addr), block->Address()}; } /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. @@ -254,13 +258,12 @@ public: committed_flushes.pop_front(); } - virtual BufferType GetEmptyBuffer(std::size_t size) = 0; + virtual BufferInfo GetEmptyBuffer(std::size_t size) = 0; protected: explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, - std::unique_ptr stream_buffer_) - : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer_)}, - stream_buffer_handle{stream_buffer->Handle()} {} + std::unique_ptr stream_buffer) + : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)} {} ~BufferCache() = default; @@ -449,7 +452,7 @@ private: buffer_ptr += size; buffer_offset += size; - return {stream_buffer_handle, uploaded_offset}; + return BufferInfo{stream_buffer->Handle(), uploaded_offset, stream_buffer->Address()}; } void AlignBuffer(std::size_t alignment) { diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index ad0577a4f..e09b47f57 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -22,21 +22,28 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs; MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); -Buffer::Buffer(VAddr cpu_addr, const std::size_t size) : VideoCommon::BufferBlock{cpu_addr, size} { +Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size) + : VideoCommon::BufferBlock{cpu_addr, size} { gl_buffer.Create(); glNamedBufferData(gl_buffer.handle, static_cast(size), nullptr, GL_DYNAMIC_DRAW); + if (device.HasVertexBufferUnifiedMemory()) { + glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE); + glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address); + } } Buffer::~Buffer() = default; OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, - const Device& device, std::size_t stream_size) - : GenericBufferCache{rasterizer, system, std::make_unique(stream_size, true)} { + const Device& device_, std::size_t stream_size) + : GenericBufferCache{rasterizer, system, + std::make_unique(device_, stream_size, true)}, + device{device_} { if (!device.HasFastBufferSubData()) { return; } - static constexpr auto size = static_cast(Maxwell::MaxConstBufferSize); + static constexpr GLsizeiptr size = static_cast(Maxwell::MaxConstBufferSize); glCreateBuffers(static_cast(std::size(cbufs)), std::data(cbufs)); for (const GLuint cbuf : cbufs) { glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW); @@ -48,11 +55,11 @@ OGLBufferCache::~OGLBufferCache() { } std::shared_ptr OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { - return std::make_shared(cpu_addr, size); + return std::make_shared(device, cpu_addr, size); } -GLuint OGLBufferCache::GetEmptyBuffer(std::size_t) { - return 0; +OGLBufferCache::BufferInfo OGLBufferCache::GetEmptyBuffer(std::size_t) { + return {0, 0, 0}; } void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, @@ -79,8 +86,9 @@ OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_poi std::size_t size) { DEBUG_ASSERT(cbuf_cursor < std::size(cbufs)); const GLuint cbuf = cbufs[cbuf_cursor++]; + glNamedBufferSubData(cbuf, 0, static_cast(size), raw_pointer); - return {cbuf, 0}; + return {cbuf, 0, 0}; } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index a49aaf9c4..6462cfae5 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -25,15 +25,20 @@ class RasterizerOpenGL; class Buffer : public VideoCommon::BufferBlock { public: - explicit Buffer(VAddr cpu_addr, const std::size_t size); + explicit Buffer(const Device& device, VAddr cpu_addr, std::size_t size); ~Buffer(); - GLuint Handle() const { + GLuint Handle() const noexcept { return gl_buffer.handle; } + u64 Address() const noexcept { + return gpu_address; + } + private: OGLBuffer gl_buffer; + u64 gpu_address = 0; }; using GenericBufferCache = VideoCommon::BufferCache; @@ -43,7 +48,7 @@ public: const Device& device, std::size_t stream_size); ~OGLBufferCache(); - GLuint GetEmptyBuffer(std::size_t) override; + BufferInfo GetEmptyBuffer(std::size_t) override; void Acquire() noexcept { cbuf_cursor = 0; @@ -64,10 +69,13 @@ protected: BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override; private: + static constexpr std::size_t NUM_CBUFS = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers * + Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; + + const Device& device; + std::size_t cbuf_cursor = 0; - std::array - cbufs; + std::array cbufs{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 2d6c11320..7cb378a71 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -253,8 +253,8 @@ void RasterizerOpenGL::SetupVertexBuffer() { glBindVertexBuffer(static_cast(index), 0, 0, vertex_array.stride); continue; } - const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size); - glBindVertexBuffer(static_cast(index), vertex_buffer, vertex_buffer_offset, + const auto info = buffer_cache.UploadMemory(start, size); + glBindVertexBuffer(static_cast(index), info.handle, info.offset, vertex_array.stride); } } @@ -285,9 +285,9 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() { MICROPROFILE_SCOPE(OpenGL_Index); const auto& regs = system.GPU().Maxwell3D().regs; const std::size_t size = CalculateIndexBufferSize(); - const auto [buffer, offset] = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size); - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer); - return offset; + const auto info = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, info.handle); + return info.offset; } void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { @@ -643,9 +643,9 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { if (!device.UseAssemblyShaders()) { MaxwellUniformData ubo; ubo.SetFromRegs(gpu); - const auto [buffer, offset] = + const auto info = buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); - glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, buffer, offset, + glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, info.handle, info.offset, static_cast(sizeof(ubo))); } @@ -956,8 +956,7 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding, if (device.UseAssemblyShaders()) { glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0); } else { - glBindBufferRange(GL_UNIFORM_BUFFER, binding, - buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float)); + glBindBufferRange(GL_UNIFORM_BUFFER, binding, 0, 0, sizeof(float)); } return; } @@ -970,24 +969,25 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding, const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment(); const GPUVAddr gpu_addr = buffer.address; - auto [cbuf, offset] = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload); + auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload); if (device.UseAssemblyShaders()) { UNIMPLEMENTED_IF(use_unified); - if (offset != 0) { + if (info.offset != 0) { const GLuint staging_cbuf = staging_cbufs[current_cbuf++]; - glCopyNamedBufferSubData(cbuf, staging_cbuf, offset, 0, size); - cbuf = staging_cbuf; - offset = 0; + glCopyNamedBufferSubData(info.handle, staging_cbuf, info.offset, 0, size); + info.handle = staging_cbuf; + info.offset = 0; } - glBindBufferRangeNV(stage, binding, cbuf, offset, size); + glBindBufferRangeNV(stage, binding, info.handle, info.offset, size); return; } if (use_unified) { - glCopyNamedBufferSubData(cbuf, unified_uniform_buffer.handle, offset, unified_offset, size); + glCopyNamedBufferSubData(info.handle, unified_uniform_buffer.handle, info.offset, + unified_offset, size); } else { - glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size); + glBindBufferRange(GL_UNIFORM_BUFFER, binding, info.handle, info.offset, size); } } @@ -1023,9 +1023,8 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) { void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr, std::size_t size) { const auto alignment{device.GetShaderStorageBufferAlignment()}; - const auto [ssbo, buffer_offset] = - buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written); - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, ssbo, buffer_offset, + const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written); + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset, static_cast(size)); } @@ -1712,8 +1711,9 @@ void RasterizerOpenGL::EndTransformFeedback() { const GLuint handle = transform_feedback_buffers[index].handle; const GPUVAddr gpu_addr = binding.Address(); const std::size_t size = binding.buffer_size; - const auto [dest_buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); - glCopyNamedBufferSubData(handle, dest_buffer, 0, offset, static_cast(size)); + const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true); + glCopyNamedBufferSubData(handle, info.handle, 0, info.offset, + static_cast(size)); } } diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index aeafcfbfe..164df4feb 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp @@ -2,12 +2,13 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include +#include #include #include "common/alignment.h" #include "common/assert.h" #include "common/microprofile.h" +#include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_stream_buffer.h" MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", @@ -15,7 +16,8 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", namespace OpenGL { -OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage) : buffer_size(size) { +OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage) + : buffer_size(size) { gl_buffer.Create(); GLsizeiptr allocate_size = size; @@ -32,6 +34,11 @@ OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage) : buff glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags); mapped_ptr = static_cast( glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT)); + + if (device.HasVertexBufferUnifiedMemory()) { + glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY); + glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address); + } } OGLStreamBuffer::~OGLStreamBuffer() { diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h index 826c2e361..e67a82980 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.h +++ b/src/video_core/renderer_opengl/gl_stream_buffer.h @@ -11,9 +11,11 @@ namespace OpenGL { +class Device; + class OGLStreamBuffer : private NonCopyable { public: - explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage); + explicit OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage); ~OGLStreamBuffer(); /* @@ -32,13 +34,18 @@ public: return gl_buffer.handle; } - GLsizeiptr Size() const { + u64 Address() const { + return gpu_address; + } + + GLsizeiptr Size() const noexcept { return buffer_size; } private: OGLBuffer gl_buffer; + GLuint64EXT gpu_address = 0; GLintptr buffer_pos = 0; GLsizeiptr buffer_size = 0; GLintptr mapped_offset = 0; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 1fde38328..df258d7a4 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -71,14 +71,14 @@ std::shared_ptr VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t s return std::make_shared(device, memory_manager, cpu_addr, size); } -VkBuffer VKBufferCache::GetEmptyBuffer(std::size_t size) { +VKBufferCache::BufferInfo VKBufferCache::GetEmptyBuffer(std::size_t size) { size = std::max(size, std::size_t(4)); const auto& empty = staging_pool.GetUnusedBuffer(size, false); scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) { cmdbuf.FillBuffer(buffer, 0, size, 0); }); - return *empty.handle; + return {*empty.handle, 0, 0}; } void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 9ebbef835..682383ff2 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -33,6 +33,10 @@ public: return *buffer.handle; } + u64 Address() const { + return 0; + } + private: VKBuffer buffer; }; @@ -44,7 +48,7 @@ public: VKScheduler& scheduler, VKStagingBufferPool& staging_pool); ~VKBufferCache(); - VkBuffer GetEmptyBuffer(std::size_t size) override; + BufferInfo GetEmptyBuffer(std::size_t size) override; protected: std::shared_ptr CreateBlock(VAddr cpu_addr, std::size_t size) override; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 29001953c..e3714ee6d 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -870,10 +870,10 @@ void RasterizerVulkan::BeginTransformFeedback() { UNIMPLEMENTED_IF(binding.buffer_offset != 0); const GPUVAddr gpu_addr = binding.Address(); - const auto size = static_cast(binding.buffer_size); - const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); + const VkDeviceSize size = static_cast(binding.buffer_size); + const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true); - scheduler.Record([buffer = buffer, offset = offset, size](vk::CommandBuffer cmdbuf) { + scheduler.Record([buffer = info.handle, offset = info.offset, size](vk::CommandBuffer cmdbuf) { cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size); cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); }); @@ -925,8 +925,8 @@ void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex buffer_bindings.AddVertexBinding(DefaultBuffer(), 0); continue; } - const auto [buffer, offset] = buffer_cache.UploadMemory(start, size); - buffer_bindings.AddVertexBinding(buffer, offset); + const auto info = buffer_cache.UploadMemory(start, size); + buffer_bindings.AddVertexBinding(info.handle, info.offset); } } @@ -948,7 +948,9 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar break; } const GPUVAddr gpu_addr = regs.index_array.IndexStart(); - auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); + const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); + VkBuffer buffer = info.handle; + u64 offset = info.offset; std::tie(buffer, offset) = quad_indexed_pass.Assemble( regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset); @@ -962,7 +964,9 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar break; } const GPUVAddr gpu_addr = regs.index_array.IndexStart(); - auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); + const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); + VkBuffer buffer = info.handle; + u64 offset = info.offset; auto format = regs.index_array.format; const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte; @@ -1109,10 +1113,9 @@ void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry, Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float)); ASSERT(size <= MaxConstbufferSize); - const auto [buffer_handle, offset] = + const auto info = buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment()); - - update_descriptor_queue.AddBuffer(buffer_handle, offset, size); + update_descriptor_queue.AddBuffer(info.handle, info.offset, size); } void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) { @@ -1126,14 +1129,14 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the // default buffer. static constexpr std::size_t dummy_size = 4; - const auto buffer = buffer_cache.GetEmptyBuffer(dummy_size); - update_descriptor_queue.AddBuffer(buffer, 0, dummy_size); + const auto info = buffer_cache.GetEmptyBuffer(dummy_size); + update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size); return; } - const auto [buffer, offset] = buffer_cache.UploadMemory( + const auto info = buffer_cache.UploadMemory( actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten()); - update_descriptor_queue.AddBuffer(buffer, offset, size); + update_descriptor_queue.AddBuffer(info.handle, info.offset, size); } void RasterizerVulkan::SetupUniformTexels(const Tegra::Texture::TICEntry& tic, diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h index c765c60a0..689f0d276 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.h +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h @@ -35,10 +35,14 @@ public: /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. void Unmap(u64 size); - VkBuffer Handle() const { + VkBuffer Handle() const noexcept { return *buffer; } + u64 Address() const noexcept { + return 0; + } + private: struct Watch final { VKFenceWatch fence; -- cgit v1.2.3 From 32a2dcd4153f4e2aea7b5f88c85d8a352f647f12 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 19 Jun 2020 20:47:48 -0300 Subject: buffer_cache: Use buffer methods instead of cache virtual methods --- src/video_core/buffer_cache/buffer_cache.h | 23 ++---- src/video_core/renderer_opengl/gl_buffer_cache.cpp | 38 +++++---- src/video_core/renderer_opengl/gl_buffer_cache.h | 16 ++-- src/video_core/renderer_vulkan/vk_buffer_cache.cpp | 89 +++++++++++----------- src/video_core/renderer_vulkan/vk_buffer_cache.h | 23 +++--- 5 files changed, 90 insertions(+), 99 deletions(-) (limited to 'src/video_core/buffer_cache') diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 6ea59253a..cf8bdd021 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -269,15 +269,6 @@ protected: virtual std::shared_ptr CreateBlock(VAddr cpu_addr, std::size_t size) = 0; - virtual void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - const u8* data) = 0; - - virtual void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - u8* data) = 0; - - virtual void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, - std::size_t dst_offset, std::size_t size) = 0; - virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) { return {}; } @@ -339,11 +330,11 @@ private: const VAddr cpu_addr_end = cpu_addr + size; if (memory_manager.IsGranularRange(gpu_addr, size)) { u8* host_ptr = memory_manager.GetPointer(gpu_addr); - UploadBlockData(*block, block->Offset(cpu_addr), size, host_ptr); + block->Upload(block->Offset(cpu_addr), size, host_ptr); } else { staging_buffer.resize(size); memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); - UploadBlockData(*block, block->Offset(cpu_addr), size, staging_buffer.data()); + block->Upload(block->Offset(cpu_addr), size, staging_buffer.data()); } return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr)); } @@ -402,7 +393,7 @@ private: } staging_buffer.resize(size); system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size); - UploadBlockData(*block, block->Offset(interval.lower()), size, staging_buffer.data()); + block->Upload(block->Offset(interval.lower()), size, staging_buffer.data()); } } @@ -439,7 +430,7 @@ private: const std::size_t size = map->end - map->start; staging_buffer.resize(size); - DownloadBlockData(*block, block->Offset(map->start), size, staging_buffer.data()); + block->Download(block->Offset(map->start), size, staging_buffer.data()); system.Memory().WriteBlockUnsafe(map->start, staging_buffer.data(), size); map->MarkAsModified(false, 0); } @@ -467,7 +458,7 @@ private: const std::size_t new_size = old_size + BLOCK_PAGE_SIZE; const VAddr cpu_addr = buffer->CpuAddr(); std::shared_ptr new_buffer = CreateBlock(cpu_addr, new_size); - CopyBlock(*buffer, *new_buffer, 0, 0, old_size); + new_buffer->CopyFrom(*buffer, 0, 0, old_size); QueueDestruction(std::move(buffer)); const VAddr cpu_addr_end = cpu_addr + new_size - 1; @@ -489,8 +480,8 @@ private: const std::size_t new_size = size_1 + size_2; std::shared_ptr new_buffer = CreateBlock(new_addr, new_size); - CopyBlock(*first, *new_buffer, 0, new_buffer->Offset(first_addr), size_1); - CopyBlock(*second, *new_buffer, 0, new_buffer->Offset(second_addr), size_2); + new_buffer->CopyFrom(*first, 0, new_buffer->Offset(first_addr), size_1); + new_buffer->CopyFrom(*second, 0, new_buffer->Offset(second_addr), size_2); QueueDestruction(std::move(first)); QueueDestruction(std::move(second)); diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index e09b47f57..d9f7b4cc6 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -34,6 +34,24 @@ Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size) Buffer::~Buffer() = default; +void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) const { + glNamedBufferSubData(Handle(), static_cast(offset), static_cast(size), + data); +} + +void Buffer::Download(std::size_t offset, std::size_t size, u8* data) const { + MICROPROFILE_SCOPE(OpenGL_Buffer_Download); + glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); + glGetNamedBufferSubData(Handle(), static_cast(offset), static_cast(size), + data); +} + +void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, + std::size_t size) const { + glCopyNamedBufferSubData(src.Handle(), Handle(), static_cast(src_offset), + static_cast(dst_offset), static_cast(size)); +} + OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, const Device& device_, std::size_t stream_size) : GenericBufferCache{rasterizer, system, @@ -62,26 +80,6 @@ OGLBufferCache::BufferInfo OGLBufferCache::GetEmptyBuffer(std::size_t) { return {0, 0, 0}; } -void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - const u8* data) { - glNamedBufferSubData(buffer.Handle(), static_cast(offset), - static_cast(size), data); -} - -void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - u8* data) { - MICROPROFILE_SCOPE(OpenGL_Buffer_Download); - glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); - glGetNamedBufferSubData(buffer.Handle(), static_cast(offset), - static_cast(size), data); -} - -void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, - std::size_t dst_offset, std::size_t size) { - glCopyNamedBufferSubData(src.Handle(), dst.Handle(), static_cast(src_offset), - static_cast(dst_offset), static_cast(size)); -} - OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer, std::size_t size) { DEBUG_ASSERT(cbuf_cursor < std::size(cbufs)); diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 6462cfae5..59d95adbc 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -28,6 +28,13 @@ public: explicit Buffer(const Device& device, VAddr cpu_addr, std::size_t size); ~Buffer(); + void Upload(std::size_t offset, std::size_t size, const u8* data) const; + + void Download(std::size_t offset, std::size_t size, u8* data) const; + + void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, + std::size_t size) const; + GLuint Handle() const noexcept { return gl_buffer.handle; } @@ -57,15 +64,6 @@ public: protected: std::shared_ptr CreateBlock(VAddr cpu_addr, std::size_t size) override; - void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - const u8* data) override; - - void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - u8* data) override; - - void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, - std::size_t dst_offset, std::size_t size) override; - BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override; private: diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index df258d7a4..f10f96cd8 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -37,9 +37,9 @@ std::unique_ptr CreateStreamBuffer(const VKDevice& device, VKSch } // Anonymous namespace -Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VAddr cpu_addr, - std::size_t size) - : VideoCommon::BufferBlock{cpu_addr, size} { +Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler_, + VKStagingBufferPool& staging_pool_, VAddr cpu_addr, std::size_t size) + : VideoCommon::BufferBlock{cpu_addr, size}, scheduler{scheduler_}, staging_pool{staging_pool_} { VkBufferCreateInfo ci; ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; ci.pNext = nullptr; @@ -56,40 +56,15 @@ Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VAddr cp Buffer::~Buffer() = default; -VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, - const VKDevice& device, VKMemoryManager& memory_manager, - VKScheduler& scheduler, VKStagingBufferPool& staging_pool) - : VideoCommon::BufferCache{rasterizer, system, - CreateStreamBuffer(device, - scheduler)}, - device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{ - staging_pool} {} - -VKBufferCache::~VKBufferCache() = default; - -std::shared_ptr VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { - return std::make_shared(device, memory_manager, cpu_addr, size); -} - -VKBufferCache::BufferInfo VKBufferCache::GetEmptyBuffer(std::size_t size) { - size = std::max(size, std::size_t(4)); - const auto& empty = staging_pool.GetUnusedBuffer(size, false); - scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) { - cmdbuf.FillBuffer(buffer, 0, size, 0); - }); - return {*empty.handle, 0, 0}; -} - -void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - const u8* data) { +void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) const { const auto& staging = staging_pool.GetUnusedBuffer(size, true); std::memcpy(staging.commit->Map(size), data, size); scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([staging = *staging.handle, buffer = buffer.Handle(), offset, - size](vk::CommandBuffer cmdbuf) { - cmdbuf.CopyBuffer(staging, buffer, VkBufferCopy{0, offset, size}); + + const VkBuffer handle = Handle(); + scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) { + cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, size}); VkBufferMemoryBarrier barrier; barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; @@ -98,7 +73,7 @@ void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, st barrier.dstAccessMask = UPLOAD_ACCESS_BARRIERS; barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.buffer = buffer; + barrier.buffer = handle; barrier.offset = offset; barrier.size = size; cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {}, @@ -106,12 +81,12 @@ void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, st }); } -void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - u8* data) { +void Buffer::Download(std::size_t offset, std::size_t size, u8* data) const { const auto& staging = staging_pool.GetUnusedBuffer(size, true); scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([staging = *staging.handle, buffer = buffer.Handle(), offset, - size](vk::CommandBuffer cmdbuf) { + + const VkBuffer handle = Handle(); + scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) { VkBufferMemoryBarrier barrier; barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; barrier.pNext = nullptr; @@ -119,7 +94,7 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.buffer = buffer; + barrier.buffer = handle; barrier.offset = offset; barrier.size = size; @@ -127,17 +102,19 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {}); - cmdbuf.CopyBuffer(buffer, staging, VkBufferCopy{offset, 0, size}); + cmdbuf.CopyBuffer(handle, staging, VkBufferCopy{offset, 0, size}); }); scheduler.Finish(); std::memcpy(data, staging.commit->Map(size), size); } -void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, - std::size_t dst_offset, std::size_t size) { +void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, + std::size_t size) const { scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([src_buffer = src.Handle(), dst_buffer = dst.Handle(), src_offset, dst_offset, + + const VkBuffer dst_buffer = Handle(); + scheduler.Record([src_buffer = src.Handle(), dst_buffer, src_offset, dst_offset, size](vk::CommandBuffer cmdbuf) { cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, size}); @@ -165,4 +142,30 @@ void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t }); } +VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, + const VKDevice& device, VKMemoryManager& memory_manager, + VKScheduler& scheduler, VKStagingBufferPool& staging_pool) + : VideoCommon::BufferCache{rasterizer, system, + CreateStreamBuffer(device, + scheduler)}, + device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{ + staging_pool} {} + +VKBufferCache::~VKBufferCache() = default; + +std::shared_ptr VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { + return std::make_shared(device, memory_manager, scheduler, staging_pool, cpu_addr, + size); +} + +VKBufferCache::BufferInfo VKBufferCache::GetEmptyBuffer(std::size_t size) { + size = std::max(size, std::size_t(4)); + const auto& empty = staging_pool.GetUnusedBuffer(size, false); + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) { + cmdbuf.FillBuffer(buffer, 0, size, 0); + }); + return {*empty.handle, 0, 0}; +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 682383ff2..3630aca77 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -25,10 +25,17 @@ class VKScheduler; class Buffer final : public VideoCommon::BufferBlock { public: - explicit Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VAddr cpu_addr, - std::size_t size); + explicit Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler, + VKStagingBufferPool& staging_pool, VAddr cpu_addr, std::size_t size); ~Buffer(); + void Upload(std::size_t offset, std::size_t size, const u8* data) const; + + void Download(std::size_t offset, std::size_t size, u8* data) const; + + void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, + std::size_t size) const; + VkBuffer Handle() const { return *buffer.handle; } @@ -38,6 +45,9 @@ public: } private: + VKScheduler& scheduler; + VKStagingBufferPool& staging_pool; + VKBuffer buffer; }; @@ -53,15 +63,6 @@ public: protected: std::shared_ptr CreateBlock(VAddr cpu_addr, std::size_t size) override; - void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - const u8* data) override; - - void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - u8* data) override; - - void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, - std::size_t dst_offset, std::size_t size) override; - private: const VKDevice& device; VKMemoryManager& memory_manager; -- cgit v1.2.3