From b780d5b5c580a65a670de73140b743072efc0fd2 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 13 Jul 2021 03:33:08 +0200 Subject: DMAEngine: Accelerate BufferClear --- src/video_core/buffer_cache/buffer_cache.h | 65 ++++++++++++++++++++++++++++-- 1 file changed, 61 insertions(+), 4 deletions(-) (limited to 'src/video_core/buffer_cache') diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 2871682f6..5f5a59bba 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -164,11 +164,16 @@ public: /// Pop asynchronous downloads void PopAsyncFlushes(); - [[nodiscard]] bool DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount); + bool DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount); + + bool DMAClear(GPUVAddr src_address, u64 amount, u32 value); /// Return true when a CPU region is modified from the GPU [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); + /// Return true when a region is registered on the cache + [[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size); + /// Return true when a CPU region is modified from the CPU [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); @@ -469,8 +474,8 @@ bool BufferCache

::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am if (!cpu_src_address || !cpu_dest_address) { return false; } - const bool source_dirty = IsRegionGpuModified(*cpu_src_address, amount); - const bool dest_dirty = IsRegionGpuModified(*cpu_dest_address, amount); + const bool source_dirty = IsRegionRegistered(*cpu_src_address, amount); + const bool dest_dirty = IsRegionRegistered(*cpu_dest_address, amount); if (!source_dirty && !dest_dirty) { return false; } @@ -515,7 +520,7 @@ bool BufferCache

::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am } runtime.CopyBuffer(dest_buffer, src_buffer, copies); - if (source_dirty) { + if (IsRegionGpuModified(*cpu_src_address, amount)) { dest_buffer.MarkRegionAsGpuModified(*cpu_dest_address, amount); } std::vector tmp_buffer(amount); @@ -524,6 +529,37 @@ bool BufferCache

::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am return true; } +template +bool BufferCache

::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) { + const std::optional cpu_dst_address = gpu_memory.GpuToCpuAddress(dst_address); + if (!cpu_dst_address) { + return false; + } + const bool dest_dirty = IsRegionRegistered(*cpu_dst_address, amount); + if (!dest_dirty) { + return false; + } + + const IntervalType subtract_interval{*cpu_dst_address, *cpu_dst_address + amount * sizeof(u32)}; + uncommitted_ranges.subtract(subtract_interval); + for (auto& interval_set : committed_ranges) { + interval_set.subtract(subtract_interval); + } + common_ranges.subtract(subtract_interval); + + const size_t size = amount * sizeof(u32); + BufferId buffer; + do { + has_deleted_buffers = false; + buffer = FindBuffer(*cpu_dst_address, static_cast(size)); + } while (has_deleted_buffers); + + auto& dest_buffer = slot_buffers[buffer]; + const u32 offset = static_cast(*cpu_dst_address - dest_buffer.CpuAddr()); + runtime.ClearBuffer(dest_buffer, offset, size, value); + return true; +} + template void BufferCache

::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) { @@ -781,6 +817,27 @@ bool BufferCache

::IsRegionGpuModified(VAddr addr, size_t size) { return false; } +template +bool BufferCache

::IsRegionRegistered(VAddr addr, size_t size) { + const VAddr end_addr = addr + size; + const u64 page_end = Common::DivCeil(end_addr, PAGE_SIZE); + for (u64 page = addr >> PAGE_BITS; page < page_end;) { + const BufferId buffer_id = page_table[page]; + if (!buffer_id) { + ++page; + continue; + } + Buffer& buffer = slot_buffers[buffer_id]; + const VAddr buf_start_addr = buffer.CpuAddr(); + const VAddr buf_end_addr = buf_start_addr + buffer.SizeBytes(); + if (buf_start_addr < end_addr && addr < buf_end_addr) { + return true; + } + page = Common::DivCeil(end_addr, PAGE_SIZE); + } + return false; +} + template bool BufferCache

::IsRegionCpuModified(VAddr addr, size_t size) { const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE); -- cgit v1.2.3 From 8039be8b195e26b4b4dd4dc6b58e65dfe8eda464 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 13 Jul 2021 16:16:14 +0200 Subject: BufferCache: fix clearing on forced download. --- src/video_core/buffer_cache/buffer_cache.h | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) (limited to 'src/video_core/buffer_cache') diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 5f5a59bba..4def8f076 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -329,6 +329,8 @@ private: [[nodiscard]] bool HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept; + void ClearDownload(IntervalType subtract_interval); + VideoCore::RasterizerInterface& rasterizer; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::Engines::KeplerCompute& kepler_compute; @@ -467,6 +469,14 @@ void BufferCache

::DownloadMemory(VAddr cpu_addr, u64 size) { }); } +template +void BufferCache

::ClearDownload(IntervalType subtract_interval) { + uncommitted_ranges.subtract(subtract_interval); + for (auto& interval_set : committed_ranges) { + interval_set.subtract(subtract_interval); + } +} + template bool BufferCache

::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) { const std::optional cpu_src_address = gpu_memory.GpuToCpuAddress(src_address); @@ -481,10 +491,7 @@ bool BufferCache

::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am } const IntervalType subtract_interval{*cpu_dest_address, *cpu_dest_address + amount}; - uncommitted_ranges.subtract(subtract_interval); - for (auto& interval_set : committed_ranges) { - interval_set.subtract(subtract_interval); - } + ClearDownload(subtract_interval); BufferId buffer_a; BufferId buffer_b; @@ -496,7 +503,6 @@ bool BufferCache

::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am auto& src_buffer = slot_buffers[buffer_a]; auto& dest_buffer = slot_buffers[buffer_b]; SynchronizeBuffer(src_buffer, *cpu_src_address, static_cast(amount)); - SynchronizeBuffer(dest_buffer, *cpu_dest_address, static_cast(amount)); std::array copies{BufferCopy{ .src_offset = src_buffer.Offset(*cpu_src_address), .dst_offset = dest_buffer.Offset(*cpu_dest_address), @@ -515,12 +521,17 @@ bool BufferCache

::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am ForEachWrittenRange(*cpu_src_address, amount, mirror); // This subtraction in this order is important for overlapping copies. common_ranges.subtract(subtract_interval); + bool atleast_1_download = tmp_intervals.size() != 0; for (const IntervalType add_interval : tmp_intervals) { common_ranges.add(add_interval); } + if (dest_buffer.HasCachedWrites()) { + dest_buffer.FlushCachedWrites(); + } runtime.CopyBuffer(dest_buffer, src_buffer, copies); - if (IsRegionGpuModified(*cpu_src_address, amount)) { + dest_buffer.UnmarkRegionAsCpuModified(*cpu_dest_address, amount); + if (atleast_1_download) { dest_buffer.MarkRegionAsGpuModified(*cpu_dest_address, amount); } std::vector tmp_buffer(amount); @@ -541,10 +552,7 @@ bool BufferCache

::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) { } const IntervalType subtract_interval{*cpu_dst_address, *cpu_dst_address + amount * sizeof(u32)}; - uncommitted_ranges.subtract(subtract_interval); - for (auto& interval_set : committed_ranges) { - interval_set.subtract(subtract_interval); - } + ClearDownload(subtract_interval); common_ranges.subtract(subtract_interval); const size_t size = amount * sizeof(u32); @@ -557,6 +565,7 @@ bool BufferCache

::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) { auto& dest_buffer = slot_buffers[buffer]; const u32 offset = static_cast(*cpu_dst_address - dest_buffer.CpuAddr()); runtime.ClearBuffer(dest_buffer, offset, size, value); + dest_buffer.UnmarkRegionAsCpuModified(*cpu_dst_address, size); return true; } @@ -1482,6 +1491,7 @@ void BufferCache

::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si const VAddr end_address = start_address + range_size; ForEachWrittenRange(start_address, range_size, add_download); const IntervalType subtract_interval{start_address, end_address}; + ClearDownload(subtract_interval); common_ranges.subtract(subtract_interval); }); if (total_size_bytes == 0) { -- cgit v1.2.3 From a0eb3f8a3ee511e29ee362687d5f7e2df2e281f5 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 14 Jul 2021 18:25:33 +0200 Subject: Buffer Cache: Fixes to DMA Copy. --- src/video_core/buffer_cache/buffer_cache.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'src/video_core/buffer_cache') diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 4def8f076..9399bcfea 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -172,7 +172,7 @@ public: [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); /// Return true when a region is registered on the cache - [[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size); + [[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size) const; /// Return true when a CPU region is modified from the CPU [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); @@ -503,6 +503,11 @@ bool BufferCache

::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am auto& src_buffer = slot_buffers[buffer_a]; auto& dest_buffer = slot_buffers[buffer_b]; SynchronizeBuffer(src_buffer, *cpu_src_address, static_cast(amount)); + const VAddr aligned_dst = Common::AlignUp(*cpu_dest_address, 64); + const u64 diff = aligned_dst - *cpu_dest_address; + const u64 new_amount = diff > amount ? 0 : amount - diff; + dest_buffer.UnmarkRegionAsCpuModified(aligned_dst, Common::AlignDown(new_amount, 64)); + SynchronizeBuffer(dest_buffer, *cpu_dest_address, static_cast(amount)); std::array copies{BufferCopy{ .src_offset = src_buffer.Offset(*cpu_src_address), .dst_offset = dest_buffer.Offset(*cpu_dest_address), @@ -526,11 +531,7 @@ bool BufferCache

::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am common_ranges.add(add_interval); } - if (dest_buffer.HasCachedWrites()) { - dest_buffer.FlushCachedWrites(); - } runtime.CopyBuffer(dest_buffer, src_buffer, copies); - dest_buffer.UnmarkRegionAsCpuModified(*cpu_dest_address, amount); if (atleast_1_download) { dest_buffer.MarkRegionAsGpuModified(*cpu_dest_address, amount); } @@ -827,7 +828,7 @@ bool BufferCache

::IsRegionGpuModified(VAddr addr, size_t size) { } template -bool BufferCache

::IsRegionRegistered(VAddr addr, size_t size) { +bool BufferCache

::IsRegionRegistered(VAddr addr, size_t size) const { const VAddr end_addr = addr + size; const u64 page_end = Common::DivCeil(end_addr, PAGE_SIZE); for (u64 page = addr >> PAGE_BITS; page < page_end;) { -- cgit v1.2.3 From 1ae4b684fff380035b468086586159a231237ed7 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 14 Jul 2021 19:04:45 +0200 Subject: Buffer cache: Fixes, Clang and Feedback. --- src/video_core/buffer_cache/buffer_cache.h | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'src/video_core/buffer_cache') diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 9399bcfea..7373cb62d 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -172,7 +172,7 @@ public: [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); /// Return true when a region is registered on the cache - [[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size) const; + [[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size); /// Return true when a CPU region is modified from the CPU [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); @@ -503,10 +503,6 @@ bool BufferCache

::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am auto& src_buffer = slot_buffers[buffer_a]; auto& dest_buffer = slot_buffers[buffer_b]; SynchronizeBuffer(src_buffer, *cpu_src_address, static_cast(amount)); - const VAddr aligned_dst = Common::AlignUp(*cpu_dest_address, 64); - const u64 diff = aligned_dst - *cpu_dest_address; - const u64 new_amount = diff > amount ? 0 : amount - diff; - dest_buffer.UnmarkRegionAsCpuModified(aligned_dst, Common::AlignDown(new_amount, 64)); SynchronizeBuffer(dest_buffer, *cpu_dest_address, static_cast(amount)); std::array copies{BufferCopy{ .src_offset = src_buffer.Offset(*cpu_src_address), @@ -552,21 +548,19 @@ bool BufferCache

::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) { return false; } - const IntervalType subtract_interval{*cpu_dst_address, *cpu_dst_address + amount * sizeof(u32)}; + const size_t size = amount * sizeof(u32); + const IntervalType subtract_interval{*cpu_dst_address, *cpu_dst_address + size}; ClearDownload(subtract_interval); common_ranges.subtract(subtract_interval); - const size_t size = amount * sizeof(u32); BufferId buffer; do { has_deleted_buffers = false; buffer = FindBuffer(*cpu_dst_address, static_cast(size)); } while (has_deleted_buffers); - auto& dest_buffer = slot_buffers[buffer]; const u32 offset = static_cast(*cpu_dst_address - dest_buffer.CpuAddr()); runtime.ClearBuffer(dest_buffer, offset, size, value); - dest_buffer.UnmarkRegionAsCpuModified(*cpu_dst_address, size); return true; } @@ -828,7 +822,7 @@ bool BufferCache

::IsRegionGpuModified(VAddr addr, size_t size) { } template -bool BufferCache

::IsRegionRegistered(VAddr addr, size_t size) const { +bool BufferCache

::IsRegionRegistered(VAddr addr, size_t size) { const VAddr end_addr = addr + size; const u64 page_end = Common::DivCeil(end_addr, PAGE_SIZE); for (u64 page = addr >> PAGE_BITS; page < page_end;) { -- cgit v1.2.3