From 6e2ca7fbee8b7de964926da392ab0550b2a76475 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 20 Jul 2021 18:50:48 -0300 Subject: buffer_cache: Simplify clear logic Use existing helper functions and avoid looping when only one buffer has to be active. --- src/video_core/buffer_cache/buffer_cache.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'src/video_core/buffer_cache/buffer_cache.h') diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 7373cb62d..5a0b6f0c0 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -553,13 +553,9 @@ bool BufferCache

::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) { ClearDownload(subtract_interval); common_ranges.subtract(subtract_interval); - BufferId buffer; - do { - has_deleted_buffers = false; - buffer = FindBuffer(*cpu_dst_address, static_cast(size)); - } while (has_deleted_buffers); + const BufferId buffer = FindBuffer(*cpu_dst_address, static_cast(size)); auto& dest_buffer = slot_buffers[buffer]; - const u32 offset = static_cast(*cpu_dst_address - dest_buffer.CpuAddr()); + const u32 offset = dest_buffer.Offset(*cpu_dst_address); runtime.ClearBuffer(dest_buffer, offset, size, value); return true; } -- cgit v1.2.3 From e9a91bc5cc2c39b476ba8946f66930f5ab5608b2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 6 Apr 2021 20:14:55 -0300 Subject: shader: Interact texture buffers with buffer cache --- src/video_core/buffer_cache/buffer_cache.h | 138 +++++++++++++++++++++++++++++ 1 file changed, 138 insertions(+) (limited to 'src/video_core/buffer_cache/buffer_cache.h') diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 7373cb62d..6701aab82 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -31,6 +31,7 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" +#include "video_core/surface.h" #include "video_core/texture_cache/slot_vector.h" #include "video_core/texture_cache/types.h" @@ -42,11 +43,14 @@ MICROPROFILE_DECLARE(GPU_DownloadMemory); using BufferId = SlotId; +using VideoCore::Surface::PixelFormat; + constexpr u32 NUM_VERTEX_BUFFERS = 32; constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4; constexpr u32 NUM_GRAPHICS_UNIFORM_BUFFERS = 18; constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8; constexpr u32 NUM_STORAGE_BUFFERS = 16; +constexpr u32 NUM_TEXTURE_BUFFERS = 16; constexpr u32 NUM_STAGES = 5; using namespace Common::Literals; @@ -66,6 +70,7 @@ class BufferCache { P::HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT; static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX; static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX; + static constexpr bool NEEDS_BIND_TEXTURE_BUFFER_INDEX = P::NEEDS_BIND_TEXTURE_BUFFER_INDEX; static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS; static constexpr BufferId NULL_BUFFER_ID{0}; @@ -96,6 +101,10 @@ class BufferCache { BufferId buffer_id; }; + struct TextureBufferBinding : Binding { + PixelFormat format; + }; + static constexpr Binding NULL_BINDING{ .cpu_addr = 0, .size = 0, @@ -142,11 +151,21 @@ public: void BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, bool is_written); + void UnbindGraphicsTextureBuffers(size_t stage); + + void BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size, + PixelFormat format); + void UnbindComputeStorageBuffers(); void BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, bool is_written); + void UnbindComputeTextureBuffers(); + + void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, + PixelFormat format); + void FlushCachedWrites(); /// Return true when there are uncommitted buffers to be downloaded @@ -254,12 +273,16 @@ private: void BindHostGraphicsStorageBuffers(size_t stage); + void BindHostGraphicsTextureBuffers(size_t stage); + void BindHostTransformFeedbackBuffers(); void BindHostComputeUniformBuffers(); void BindHostComputeStorageBuffers(); + void BindHostComputeTextureBuffers(); + void DoUpdateGraphicsBuffers(bool is_indexed); void DoUpdateComputeBuffers(); @@ -274,6 +297,8 @@ private: void UpdateStorageBuffers(size_t stage); + void UpdateTextureBuffers(size_t stage); + void UpdateTransformFeedbackBuffers(); void UpdateTransformFeedbackBuffer(u32 index); @@ -282,6 +307,8 @@ private: void UpdateComputeStorageBuffers(); + void UpdateComputeTextureBuffers(); + void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size); [[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size); @@ -323,6 +350,9 @@ private: [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr) const; + [[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size, + PixelFormat format); + [[nodiscard]] std::span ImmediateBufferWithData(VAddr cpu_addr, size_t size); [[nodiscard]] std::span ImmediateBuffer(size_t wanted_capacity); @@ -347,10 +377,12 @@ private: std::array vertex_buffers; std::array, NUM_STAGES> uniform_buffers; std::array, NUM_STAGES> storage_buffers; + std::array, NUM_STAGES> texture_buffers; std::array transform_feedback_buffers; std::array compute_uniform_buffers; std::array compute_storage_buffers; + std::array compute_texture_buffers; std::array enabled_uniform_buffers{}; u32 enabled_compute_uniform_buffers = 0; @@ -360,6 +392,9 @@ private: u32 enabled_compute_storage_buffers = 0; u32 written_compute_storage_buffers = 0; + std::array enabled_texture_buffers{}; + u32 enabled_compute_texture_buffers = 0; + std::array fast_bound_uniform_buffers{}; std::array uniform_cache_hits{}; @@ -619,6 +654,7 @@ void BufferCache

::BindHostStageBuffers(size_t stage) { MICROPROFILE_SCOPE(GPU_BindUploadBuffers); BindHostGraphicsUniformBuffers(stage); BindHostGraphicsStorageBuffers(stage); + BindHostGraphicsTextureBuffers(stage); } template @@ -626,6 +662,7 @@ void BufferCache

::BindHostComputeBuffers() { MICROPROFILE_SCOPE(GPU_BindUploadBuffers); BindHostComputeUniformBuffers(); BindHostComputeStorageBuffers(); + BindHostComputeTextureBuffers(); } template @@ -660,6 +697,18 @@ void BufferCache

::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, storage_buffers[stage][ssbo_index] = StorageBufferBinding(ssbo_addr); } +template +void BufferCache

::UnbindGraphicsTextureBuffers(size_t stage) { + enabled_texture_buffers[stage] = 0; +} + +template +void BufferCache

::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, + u32 size, PixelFormat format) { + enabled_texture_buffers[stage] |= 1U << tbo_index; + texture_buffers[stage][tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); +} + template void BufferCache

::UnbindComputeStorageBuffers() { enabled_compute_storage_buffers = 0; @@ -680,6 +729,18 @@ void BufferCache

::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, compute_storage_buffers[ssbo_index] = StorageBufferBinding(ssbo_addr); } +template +void BufferCache

::UnbindComputeTextureBuffers() { + enabled_compute_texture_buffers = 0; +} + +template +void BufferCache

::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, + PixelFormat format) { + enabled_compute_texture_buffers |= 1U << tbo_index; + compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); +} + template void BufferCache

::FlushCachedWrites() { for (const BufferId buffer_id : cached_write_buffer_ids) { @@ -988,6 +1049,26 @@ void BufferCache

::BindHostGraphicsStorageBuffers(size_t stage) { }); } +template +void BufferCache

::BindHostGraphicsTextureBuffers(size_t stage) { + u32 binding_index = 0; + ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) { + const TextureBufferBinding& binding = texture_buffers[stage][index]; + Buffer& buffer = slot_buffers[binding.buffer_id]; + const u32 size = binding.size; + SynchronizeBuffer(buffer, binding.cpu_addr, size); + + const u32 offset = buffer.Offset(binding.cpu_addr); + const PixelFormat format = binding.format; + if constexpr (NEEDS_BIND_TEXTURE_BUFFER_INDEX) { + runtime.BindTextureBuffer(binding_index, buffer, offset, size, format); + ++binding_index; + } else { + runtime.BindTextureBuffer(buffer, offset, size, format); + } + }); +} + template void BufferCache

::BindHostTransformFeedbackBuffers() { if (maxwell3d.regs.tfb_enabled == 0) { @@ -1050,6 +1131,26 @@ void BufferCache

::BindHostComputeStorageBuffers() { }); } +template +void BufferCache

::BindHostComputeTextureBuffers() { + u32 binding_index = 0; + ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) { + const TextureBufferBinding& binding = compute_texture_buffers[index]; + Buffer& buffer = slot_buffers[binding.buffer_id]; + const u32 size = binding.size; + SynchronizeBuffer(buffer, binding.cpu_addr, size); + + const u32 offset = buffer.Offset(binding.cpu_addr); + const PixelFormat format = binding.format; + if constexpr (NEEDS_BIND_TEXTURE_BUFFER_INDEX) { + runtime.BindTextureBuffer(binding_index, buffer, offset, size, format); + ++binding_index; + } else { + runtime.BindTextureBuffer(buffer, offset, size, format); + } + }); +} + template void BufferCache

::DoUpdateGraphicsBuffers(bool is_indexed) { if (is_indexed) { @@ -1060,6 +1161,7 @@ void BufferCache

::DoUpdateGraphicsBuffers(bool is_indexed) { for (size_t stage = 0; stage < NUM_STAGES; ++stage) { UpdateUniformBuffers(stage); UpdateStorageBuffers(stage); + UpdateTextureBuffers(stage); } } @@ -1067,6 +1169,7 @@ template void BufferCache

::DoUpdateComputeBuffers() { UpdateComputeUniformBuffers(); UpdateComputeStorageBuffers(); + UpdateComputeTextureBuffers(); } template @@ -1166,6 +1269,14 @@ void BufferCache

::UpdateStorageBuffers(size_t stage) { }); } +template +void BufferCache

::UpdateTextureBuffers(size_t stage) { + ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) { + Binding& binding = texture_buffers[stage][index]; + binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); + }); +} + template void BufferCache

::UpdateTransformFeedbackBuffers() { if (maxwell3d.regs.tfb_enabled == 0) { @@ -1227,6 +1338,14 @@ void BufferCache

::UpdateComputeStorageBuffers() { }); } +template +void BufferCache

::UpdateComputeTextureBuffers() { + ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) { + Binding& binding = compute_texture_buffers[index]; + binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); + }); +} + template void BufferCache

::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size) { Buffer& buffer = slot_buffers[buffer_id]; @@ -1581,6 +1700,25 @@ typename BufferCache

::Binding BufferCache

::StorageBufferBinding(GPUVAddr s return binding; } +template +typename BufferCache

::TextureBufferBinding BufferCache

::GetTextureBufferBinding( + GPUVAddr gpu_addr, u32 size, PixelFormat format) { + const std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + TextureBufferBinding binding; + if (!cpu_addr || size == 0) { + binding.cpu_addr = 0; + binding.size = 0; + binding.buffer_id = NULL_BUFFER_ID; + binding.format = PixelFormat::Invalid; + } else { + binding.cpu_addr = *cpu_addr; + binding.size = size; + binding.buffer_id = BufferId{}; + binding.format = format; + } + return binding; +} + template std::span BufferCache

::ImmediateBufferWithData(VAddr cpu_addr, size_t size) { u8* const base_pointer = cpu_memory.GetPointer(cpu_addr); -- cgit v1.2.3 From 416e1b7441d34512fcb0ffed014daf7ca4bb62bd Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 14 Apr 2021 21:36:36 -0300 Subject: spirv: Implement image buffers --- src/video_core/buffer_cache/buffer_cache.h | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) (limited to 'src/video_core/buffer_cache/buffer_cache.h') diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 6701aab82..29746f61d 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -154,7 +154,7 @@ public: void UnbindGraphicsTextureBuffers(size_t stage); void BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size, - PixelFormat format); + PixelFormat format, bool is_written); void UnbindComputeStorageBuffers(); @@ -163,8 +163,8 @@ public: void UnbindComputeTextureBuffers(); - void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, - PixelFormat format); + void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format, + bool is_written); void FlushCachedWrites(); @@ -393,7 +393,9 @@ private: u32 written_compute_storage_buffers = 0; std::array enabled_texture_buffers{}; + std::array written_texture_buffers{}; u32 enabled_compute_texture_buffers = 0; + u32 written_compute_texture_buffers = 0; std::array fast_bound_uniform_buffers{}; @@ -700,12 +702,14 @@ void BufferCache

::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, template void BufferCache

::UnbindGraphicsTextureBuffers(size_t stage) { enabled_texture_buffers[stage] = 0; + written_texture_buffers[stage] = 0; } template void BufferCache

::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, - u32 size, PixelFormat format) { + u32 size, PixelFormat format, bool is_written) { enabled_texture_buffers[stage] |= 1U << tbo_index; + written_texture_buffers[stage] |= (is_written ? 1U : 0U) << tbo_index; texture_buffers[stage][tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); } @@ -732,12 +736,14 @@ void BufferCache

::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, template void BufferCache

::UnbindComputeTextureBuffers() { enabled_compute_texture_buffers = 0; + written_compute_texture_buffers = 0; } template void BufferCache

::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, - PixelFormat format) { + PixelFormat format, bool is_written) { enabled_compute_texture_buffers |= 1U << tbo_index; + written_compute_texture_buffers |= (is_written ? 1U : 0U) << tbo_index; compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); } @@ -1274,6 +1280,10 @@ void BufferCache

::UpdateTextureBuffers(size_t stage) { ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) { Binding& binding = texture_buffers[stage][index]; binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); + // Mark buffer as written if needed + if (((written_texture_buffers[stage] >> index) & 1) != 0) { + MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); + } }); } @@ -1343,6 +1353,10 @@ void BufferCache

::UpdateComputeTextureBuffers() { ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) { Binding& binding = compute_texture_buffers[index]; binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); + // Mark as written if needed + if (((written_compute_texture_buffers >> index) & 1) != 0) { + MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); + } }); } -- cgit v1.2.3 From d621e96d0de212cc16897eadf71e8a1b2e1eb5dc Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 23 May 2021 04:28:34 -0300 Subject: shader: Initial OpenGL implementation --- src/video_core/buffer_cache/buffer_cache.h | 53 +++++++++++++++++++----------- 1 file changed, 34 insertions(+), 19 deletions(-) (limited to 'src/video_core/buffer_cache/buffer_cache.h') diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 29746f61d..6c92e4c30 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -70,8 +70,8 @@ class BufferCache { P::HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT; static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX; static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX; - static constexpr bool NEEDS_BIND_TEXTURE_BUFFER_INDEX = P::NEEDS_BIND_TEXTURE_BUFFER_INDEX; static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS; + static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS; static constexpr BufferId NULL_BUFFER_ID{0}; @@ -154,7 +154,7 @@ public: void UnbindGraphicsTextureBuffers(size_t stage); void BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size, - PixelFormat format, bool is_written); + PixelFormat format, bool is_written, bool is_image); void UnbindComputeStorageBuffers(); @@ -164,7 +164,7 @@ public: void UnbindComputeTextureBuffers(); void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format, - bool is_written); + bool is_written, bool is_image); void FlushCachedWrites(); @@ -197,6 +197,7 @@ public: [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); std::mutex mutex; + Runtime& runtime; private: template @@ -366,7 +367,6 @@ private: Tegra::Engines::KeplerCompute& kepler_compute; Tegra::MemoryManager& gpu_memory; Core::Memory::Memory& cpu_memory; - Runtime& runtime; SlotVector slot_buffers; DelayedDestructionRing delayed_destruction_ring; @@ -394,8 +394,10 @@ private: std::array enabled_texture_buffers{}; std::array written_texture_buffers{}; + std::array image_texture_buffers{}; u32 enabled_compute_texture_buffers = 0; u32 written_compute_texture_buffers = 0; + u32 image_compute_texture_buffers = 0; std::array fast_bound_uniform_buffers{}; @@ -431,8 +433,8 @@ BufferCache

::BufferCache(VideoCore::RasterizerInterface& rasterizer_, Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, Runtime& runtime_) - : rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, - gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} { + : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, + kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} { // Ensure the first slot is used for the null buffer void(slot_buffers.insert(runtime, NullBufferParams{})); deletion_iterator = slot_buffers.end(); @@ -703,13 +705,18 @@ template void BufferCache

::UnbindGraphicsTextureBuffers(size_t stage) { enabled_texture_buffers[stage] = 0; written_texture_buffers[stage] = 0; + image_texture_buffers[stage] = 0; } template void BufferCache

::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, - u32 size, PixelFormat format, bool is_written) { + u32 size, PixelFormat format, bool is_written, + bool is_image) { enabled_texture_buffers[stage] |= 1U << tbo_index; written_texture_buffers[stage] |= (is_written ? 1U : 0U) << tbo_index; + if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { + image_texture_buffers[stage] |= (is_image ? 1U : 0U) << tbo_index; + } texture_buffers[stage][tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); } @@ -717,6 +724,7 @@ template void BufferCache

::UnbindComputeStorageBuffers() { enabled_compute_storage_buffers = 0; written_compute_storage_buffers = 0; + image_compute_texture_buffers = 0; } template @@ -737,13 +745,17 @@ template void BufferCache

::UnbindComputeTextureBuffers() { enabled_compute_texture_buffers = 0; written_compute_texture_buffers = 0; + image_compute_texture_buffers = 0; } template void BufferCache

::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, - PixelFormat format, bool is_written) { + PixelFormat format, bool is_written, bool is_image) { enabled_compute_texture_buffers |= 1U << tbo_index; written_compute_texture_buffers |= (is_written ? 1U : 0U) << tbo_index; + if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { + image_compute_texture_buffers |= (is_image ? 1U : 0U) << tbo_index; + } compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); } @@ -1057,7 +1069,6 @@ void BufferCache

::BindHostGraphicsStorageBuffers(size_t stage) { template void BufferCache

::BindHostGraphicsTextureBuffers(size_t stage) { - u32 binding_index = 0; ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) { const TextureBufferBinding& binding = texture_buffers[stage][index]; Buffer& buffer = slot_buffers[binding.buffer_id]; @@ -1066,9 +1077,12 @@ void BufferCache

::BindHostGraphicsTextureBuffers(size_t stage) { const u32 offset = buffer.Offset(binding.cpu_addr); const PixelFormat format = binding.format; - if constexpr (NEEDS_BIND_TEXTURE_BUFFER_INDEX) { - runtime.BindTextureBuffer(binding_index, buffer, offset, size, format); - ++binding_index; + if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { + if (((image_texture_buffers[stage] >> index) & 1) != 0) { + runtime.BindImageBuffer(buffer, offset, size, format); + } else { + runtime.BindTextureBuffer(buffer, offset, size, format); + } } else { runtime.BindTextureBuffer(buffer, offset, size, format); } @@ -1139,7 +1153,6 @@ void BufferCache

::BindHostComputeStorageBuffers() { template void BufferCache

::BindHostComputeTextureBuffers() { - u32 binding_index = 0; ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) { const TextureBufferBinding& binding = compute_texture_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; @@ -1148,9 +1161,12 @@ void BufferCache

::BindHostComputeTextureBuffers() { const u32 offset = buffer.Offset(binding.cpu_addr); const PixelFormat format = binding.format; - if constexpr (NEEDS_BIND_TEXTURE_BUFFER_INDEX) { - runtime.BindTextureBuffer(binding_index, buffer, offset, size, format); - ++binding_index; + if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { + if (((image_compute_texture_buffers >> index) & 1) != 0) { + runtime.BindImageBuffer(buffer, offset, size, format); + } else { + runtime.BindTextureBuffer(buffer, offset, size, format); + } } else { runtime.BindTextureBuffer(buffer, offset, size, format); } @@ -1339,11 +1355,10 @@ void BufferCache

::UpdateComputeStorageBuffers() { ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { // Resolve buffer Binding& binding = compute_storage_buffers[index]; - const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size); - binding.buffer_id = buffer_id; + binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); // Mark as written if needed if (((written_compute_storage_buffers >> index) & 1) != 0) { - MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size); + MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); } }); } -- cgit v1.2.3 From a7e9756671be5bb99566277709e5becdea774f34 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 30 May 2021 02:57:42 -0300 Subject: buffer_cache: Mark uniform buffers as dirty if any enable bit changes --- src/video_core/buffer_cache/buffer_cache.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/video_core/buffer_cache/buffer_cache.h') diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 6c92e4c30..d6b9eb99f 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -142,7 +142,7 @@ public: void BindHostComputeBuffers(); - void SetEnabledUniformBuffers(size_t stage, u32 enabled); + void SetEnabledUniformBuffers(const std::array& mask); void SetEnabledComputeUniformBuffers(u32 enabled); @@ -670,13 +670,13 @@ void BufferCache

::BindHostComputeBuffers() { } template -void BufferCache

::SetEnabledUniformBuffers(size_t stage, u32 enabled) { +void BufferCache

::SetEnabledUniformBuffers(const std::array& mask) { if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { - if (enabled_uniform_buffers[stage] != enabled) { - dirty_uniform_buffers[stage] = ~u32{0}; + if (enabled_uniform_buffers != mask) { + dirty_uniform_buffers.fill(~u32{0}); } } - enabled_uniform_buffers[stage] = enabled; + enabled_uniform_buffers = mask; } template -- cgit v1.2.3 From 4a2361a1e2271727f3259e8e4a60869165537253 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 2 Jun 2021 02:15:07 -0300 Subject: buffer_cache: Reduce uniform buffer size from shader usage Increases performance significantly on certain titles. --- src/video_core/buffer_cache/buffer_cache.h | 42 ++++++++++++++++++------------ 1 file changed, 26 insertions(+), 16 deletions(-) (limited to 'src/video_core/buffer_cache/buffer_cache.h') diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index d6b9eb99f..ec64f2293 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -44,6 +44,7 @@ MICROPROFILE_DECLARE(GPU_DownloadMemory); using BufferId = SlotId; using VideoCore::Surface::PixelFormat; +using namespace Common::Literals; constexpr u32 NUM_VERTEX_BUFFERS = 32; constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4; @@ -53,7 +54,8 @@ constexpr u32 NUM_STORAGE_BUFFERS = 16; constexpr u32 NUM_TEXTURE_BUFFERS = 16; constexpr u32 NUM_STAGES = 5; -using namespace Common::Literals; +using UniformBufferSizes = std::array, NUM_STAGES>; +using ComputeUniformBufferSizes = std::array; template class BufferCache { @@ -142,9 +144,10 @@ public: void BindHostComputeBuffers(); - void SetEnabledUniformBuffers(const std::array& mask); + void SetUniformBuffersState(const std::array& mask, + const UniformBufferSizes* sizes); - void SetEnabledComputeUniformBuffers(u32 enabled); + void SetComputeUniformBufferState(u32 mask, const ComputeUniformBufferSizes* sizes); void UnbindGraphicsStorageBuffers(size_t stage); @@ -384,8 +387,11 @@ private: std::array compute_storage_buffers; std::array compute_texture_buffers; - std::array enabled_uniform_buffers{}; - u32 enabled_compute_uniform_buffers = 0; + std::array enabled_uniform_buffer_masks{}; + u32 enabled_compute_uniform_buffer_mask = 0; + + const UniformBufferSizes* uniform_buffer_sizes{}; + const ComputeUniformBufferSizes* compute_uniform_buffer_sizes{}; std::array enabled_storage_buffers{}; std::array written_storage_buffers{}; @@ -670,18 +676,22 @@ void BufferCache

::BindHostComputeBuffers() { } template -void BufferCache

::SetEnabledUniformBuffers(const std::array& mask) { +void BufferCache

::SetUniformBuffersState(const std::array& mask, + const UniformBufferSizes* sizes) { if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { - if (enabled_uniform_buffers != mask) { + if (enabled_uniform_buffer_masks != mask) { dirty_uniform_buffers.fill(~u32{0}); } } - enabled_uniform_buffers = mask; + enabled_uniform_buffer_masks = mask; + uniform_buffer_sizes = sizes; } template -void BufferCache

::SetEnabledComputeUniformBuffers(u32 enabled) { - enabled_compute_uniform_buffers = enabled; +void BufferCache

::SetComputeUniformBufferState(u32 mask, + const ComputeUniformBufferSizes* sizes) { + enabled_compute_uniform_buffer_mask = mask; + compute_uniform_buffer_sizes = sizes; } template @@ -984,7 +994,7 @@ void BufferCache

::BindHostGraphicsUniformBuffers(size_t stage) { dirty = std::exchange(dirty_uniform_buffers[stage], 0); } u32 binding_index = 0; - ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) { + ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) { const bool needs_bind = ((dirty >> index) & 1) != 0; BindHostGraphicsUniformBuffer(stage, index, binding_index, needs_bind); if constexpr (NEEDS_BIND_UNIFORM_INDEX) { @@ -998,7 +1008,7 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 bool needs_bind) { const Binding& binding = uniform_buffers[stage][index]; const VAddr cpu_addr = binding.cpu_addr; - const u32 size = binding.size; + const u32 size = std::min(binding.size, (*uniform_buffer_sizes)[stage][index]); Buffer& buffer = slot_buffers[binding.buffer_id]; TouchBuffer(buffer); const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && @@ -1113,11 +1123,11 @@ void BufferCache

::BindHostComputeUniformBuffers() { dirty_uniform_buffers.fill(~u32{0}); } u32 binding_index = 0; - ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) { + ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { const Binding& binding = compute_uniform_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; TouchBuffer(buffer); - const u32 size = binding.size; + const u32 size = std::min(binding.size, (*compute_uniform_buffer_sizes)[index]); SynchronizeBuffer(buffer, binding.cpu_addr, size); const u32 offset = buffer.Offset(binding.cpu_addr); @@ -1261,7 +1271,7 @@ void BufferCache

::UpdateVertexBuffer(u32 index) { template void BufferCache

::UpdateUniformBuffers(size_t stage) { - ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) { + ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) { Binding& binding = uniform_buffers[stage][index]; if (binding.buffer_id) { // Already updated @@ -1334,7 +1344,7 @@ void BufferCache

::UpdateTransformFeedbackBuffer(u32 index) { template void BufferCache

::UpdateComputeUniformBuffers() { - ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) { + ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { Binding& binding = compute_uniform_buffers[index]; binding = NULL_BINDING; const auto& launch_desc = kepler_compute.launch_description; -- cgit v1.2.3 From 60a96c49e59e600685b9a79d80b2685318b4fb64 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 10 Jun 2021 02:24:12 -0300 Subject: buffer_cache: Fix copy based uniform bindings tracking --- src/video_core/buffer_cache/buffer_cache.h | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'src/video_core/buffer_cache/buffer_cache.h') diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index ec64f2293..47cb0a47d 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -680,6 +680,9 @@ void BufferCache

::SetUniformBuffersState(const std::array& m const UniformBufferSizes* sizes) { if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { if (enabled_uniform_buffer_masks != mask) { + if constexpr (IS_OPENGL) { + fast_bound_uniform_buffers.fill(0); + } dirty_uniform_buffers.fill(~u32{0}); } } @@ -1020,6 +1023,7 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 // Fast path for Nvidia if (!HasFastUniformBufferBound(stage, binding_index)) { // We only have to bind when the currently bound buffer is not the fast version + fast_bound_uniform_buffers[stage] |= 1U << binding_index; runtime.BindFastUniformBuffer(stage, binding_index, size); } const auto span = ImmediateBufferWithData(cpu_addr, size); @@ -1027,8 +1031,9 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 return; } } - fast_bound_uniform_buffers[stage] |= 1U << binding_index; - + if constexpr (IS_OPENGL) { + fast_bound_uniform_buffers[stage] |= 1U << binding_index; + } // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan const std::span span = runtime.BindMappedUniformBuffer(stage, binding_index, size); cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size); @@ -1046,9 +1051,15 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 // This exists to avoid instances where the fast buffer is bound and a GPU write happens return; } - fast_bound_uniform_buffers[stage] &= ~(1U << binding_index); - const u32 offset = buffer.Offset(cpu_addr); + if constexpr (IS_OPENGL) { + // Fast buffer will be unbound + fast_bound_uniform_buffers[stage] &= ~(1U << binding_index); + + // Mark the index as dirty if offset doesn't match + const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset(); + dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index; + } if constexpr (NEEDS_BIND_UNIFORM_INDEX) { runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size); } else { -- cgit v1.2.3 From 94e751f415d70fe255eada77c4385ec966c07a95 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 14 Jun 2021 04:32:45 -0300 Subject: buffer_cache: Invalidate fast buffers on compute --- src/video_core/buffer_cache/buffer_cache.h | 1 + 1 file changed, 1 insertion(+) (limited to 'src/video_core/buffer_cache/buffer_cache.h') diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 47cb0a47d..d004199ba 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -1132,6 +1132,7 @@ void BufferCache

::BindHostComputeUniformBuffers() { if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { // Mark all uniform buffers as dirty dirty_uniform_buffers.fill(~u32{0}); + fast_bound_uniform_buffers.fill(0); } u32 binding_index = 0; ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { -- cgit v1.2.3 From 838d7e4ca59b79dc9a8dd727a12dfba00e73242c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 18 Jun 2021 03:22:00 -0300 Subject: buffer_cache: Fix size reductions not having in mind bind sizes A buffer binding can change between shaders without changing the shaders. This lead to outdated bindings on OpenGL. --- src/video_core/buffer_cache/buffer_cache.h | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) (limited to 'src/video_core/buffer_cache/buffer_cache.h') diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index d004199ba..a75de4384 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -405,8 +405,6 @@ private: u32 written_compute_texture_buffers = 0; u32 image_compute_texture_buffers = 0; - std::array fast_bound_uniform_buffers{}; - std::array uniform_cache_hits{}; std::array uniform_cache_shots{}; @@ -416,6 +414,10 @@ private: std::conditional_t, Empty> dirty_uniform_buffers{}; + std::conditional_t, Empty> fast_bound_uniform_buffers{}; + std::conditional_t, NUM_STAGES>, Empty> + uniform_buffer_binding_sizes{}; std::vector cached_write_buffer_ids; @@ -684,6 +686,7 @@ void BufferCache

::SetUniformBuffersState(const std::array& m fast_bound_uniform_buffers.fill(0); } dirty_uniform_buffers.fill(~u32{0}); + uniform_buffer_binding_sizes.fill({}); } } enabled_uniform_buffer_masks = mask; @@ -1016,14 +1019,18 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 TouchBuffer(buffer); const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && size <= uniform_buffer_skip_cache_size && - !buffer.IsRegionGpuModified(cpu_addr, size); + !buffer.IsRegionGpuModified(cpu_addr, size) && false; if (use_fast_buffer) { if constexpr (IS_OPENGL) { if (runtime.HasFastBufferSubData()) { // Fast path for Nvidia - if (!HasFastUniformBufferBound(stage, binding_index)) { + const bool should_fast_bind = + !HasFastUniformBufferBound(stage, binding_index) || + uniform_buffer_binding_sizes[stage][binding_index] != size; + if (should_fast_bind) { // We only have to bind when the currently bound buffer is not the fast version fast_bound_uniform_buffers[stage] |= 1U << binding_index; + uniform_buffer_binding_sizes[stage][binding_index] = size; runtime.BindFastUniformBuffer(stage, binding_index, size); } const auto span = ImmediateBufferWithData(cpu_addr, size); @@ -1033,6 +1040,7 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 } if constexpr (IS_OPENGL) { fast_bound_uniform_buffers[stage] |= 1U << binding_index; + uniform_buffer_binding_sizes[stage][binding_index] = size; } // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan const std::span span = runtime.BindMappedUniformBuffer(stage, binding_index, size); @@ -1046,9 +1054,13 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 } ++uniform_cache_shots[0]; - if (!needs_bind && !HasFastUniformBufferBound(stage, binding_index)) { - // Skip binding if it's not needed and if the bound buffer is not the fast version - // This exists to avoid instances where the fast buffer is bound and a GPU write happens + // Skip binding if it's not needed and if the bound buffer is not the fast version + // This exists to avoid instances where the fast buffer is bound and a GPU write happens + needs_bind |= HasFastUniformBufferBound(stage, binding_index); + if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { + needs_bind |= uniform_buffer_binding_sizes[stage][binding_index] != size; + } + if (!needs_bind) { return; } const u32 offset = buffer.Offset(cpu_addr); @@ -1060,6 +1072,9 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset(); dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index; } + if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { + uniform_buffer_binding_sizes[stage][binding_index] = size; + } if constexpr (NEEDS_BIND_UNIFORM_INDEX) { runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size); } else { @@ -1725,6 +1740,7 @@ template void BufferCache

::NotifyBufferDeletion() { if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { dirty_uniform_buffers.fill(~u32{0}); + uniform_buffer_binding_sizes.fill({}); } auto& flags = maxwell3d.dirty.flags; flags[Dirty::IndexBuffer] = true; -- cgit v1.2.3 From df9b7e18f5c5bab84cc8c38214a0e1e9e9506bd4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 19 Jun 2021 17:28:46 -0300 Subject: buffer_cache: Fix debugging leftover --- src/video_core/buffer_cache/buffer_cache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/buffer_cache/buffer_cache.h') diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index a75de4384..cb9c69baf 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -1019,7 +1019,7 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 TouchBuffer(buffer); const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && size <= uniform_buffer_skip_cache_size && - !buffer.IsRegionGpuModified(cpu_addr, size) && false; + !buffer.IsRegionGpuModified(cpu_addr, size); if (use_fast_buffer) { if constexpr (IS_OPENGL) { if (runtime.HasFastBufferSubData()) { -- cgit v1.2.3