From a5a94f52ffcbf3119d272a9369021a213ea6dad2 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 9 Feb 2022 15:00:05 +0100 Subject: MacroHLE: Add MultidrawIndirect HLE Macro. --- src/video_core/buffer_cache/buffer_cache.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'src/video_core/buffer_cache') diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index f1c60d1f3..99abe0edf 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -170,6 +170,9 @@ public: void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format, bool is_written, bool is_image); + [[nodiscard]] std::pair ObtainBuffer(GPUVAddr gpu_addr, u32 size, + bool synchronize, bool mark_as_written); + void FlushCachedWrites(); /// Return true when there are uncommitted buffers to be downloaded @@ -790,6 +793,25 @@ void BufferCache

::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_add compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); } +template +std::pair BufferCache

::ObtainBuffer(GPUVAddr gpu_addr, u32 size, + bool synchronize, + bool mark_as_written) { + const std::optional cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); + if (!cpu_addr) { + return {&slot_buffers[NULL_BUFFER_ID], 0}; + } + const BufferId buffer_id = FindBuffer(*cpu_addr, size); + Buffer& buffer = slot_buffers[buffer_id]; + if (synchronize) { + SynchronizeBuffer(buffer, *cpu_addr, size); + } + if (mark_as_written) { + MarkWrittenBuffer(buffer_id, *cpu_addr, size); + } + return {&buffer, buffer.Offset(*cpu_addr)}; +} + template void BufferCache

::FlushCachedWrites() { for (const BufferId buffer_id : cached_write_buffer_ids) { -- cgit v1.2.3 From 0f89828073a541eaa2cfd985483f839bd2f97b74 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 9 Feb 2022 15:39:40 +0100 Subject: MacroHLE: Implement DrawIndexedIndirect & DrawArraysIndirect. --- src/video_core/buffer_cache/buffer_cache.h | 160 ++++++++++++++++++--- src/video_core/dma_pusher.cpp | 3 +- src/video_core/dma_pusher.h | 1 + src/video_core/engines/draw_manager.cpp | 2 +- src/video_core/engines/draw_manager.h | 5 +- src/video_core/engines/maxwell_3d.cpp | 4 + src/video_core/engines/maxwell_3d.h | 12 +- src/video_core/macro/macro_hle.cpp | 45 +++--- src/video_core/rasterizer_interface.h | 2 +- src/video_core/renderer_vulkan/vk_buffer_cache.cpp | 4 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 48 ++++--- src/video_core/renderer_vulkan/vk_rasterizer.h | 2 +- src/video_core/vulkan_common/vulkan_device.cpp | 5 +- src/video_core/vulkan_common/vulkan_device.h | 1 + src/video_core/vulkan_common/vulkan_wrapper.cpp | 6 +- src/video_core/vulkan_common/vulkan_wrapper.h | 24 +++- 16 files changed, 252 insertions(+), 72 deletions(-) (limited to 'src/video_core/buffer_cache') diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 99abe0edf..557227b37 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -171,7 +171,9 @@ public: bool is_written, bool is_image); [[nodiscard]] std::pair ObtainBuffer(GPUVAddr gpu_addr, u32 size, - bool synchronize, bool mark_as_written); + bool synchronize = true, + bool mark_as_written = false, + bool discard_downloads = false); void FlushCachedWrites(); @@ -203,6 +205,14 @@ public: /// Return true when a CPU region is modified from the CPU [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); + void SetDrawIndirect(const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect_) { + current_draw_indirect = current_draw_indirect_; + } + + [[nodiscard]] std::pair GetDrawIndirectCount(); + + [[nodiscard]] std::pair GetDrawIndirectBuffer(); + std::mutex mutex; Runtime& runtime; @@ -275,6 +285,8 @@ private: void BindHostVertexBuffers(); + void BindHostDrawIndirectBuffers(); + void BindHostGraphicsUniformBuffers(size_t stage); void BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, bool needs_bind); @@ -301,6 +313,8 @@ private: void UpdateVertexBuffer(u32 index); + void UpdateDrawIndirect(); + void UpdateUniformBuffers(size_t stage); void UpdateStorageBuffers(size_t stage); @@ -340,6 +354,8 @@ private: bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size); + bool SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size); + void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, std::span copies); @@ -375,6 +391,8 @@ private: SlotVector slot_buffers; DelayedDestructionRing delayed_destruction_ring; + const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect{}; + u32 last_index_count = 0; Binding index_buffer; @@ -383,6 +401,8 @@ private: std::array, NUM_STAGES> storage_buffers; std::array, NUM_STAGES> texture_buffers; std::array transform_feedback_buffers; + Binding count_buffer_binding; + Binding indirect_buffer_binding; std::array compute_uniform_buffers; std::array compute_storage_buffers; @@ -422,6 +442,7 @@ private: std::vector cached_write_buffer_ids; + IntervalSet discarded_ranges; IntervalSet uncommitted_ranges; IntervalSet common_ranges; std::deque committed_ranges; @@ -579,13 +600,17 @@ bool BufferCache

::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am }}; boost::container::small_vector tmp_intervals; + const bool is_high_accuracy = + Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High; auto mirror = [&](VAddr base_address, VAddr base_address_end) { const u64 size = base_address_end - base_address; const VAddr diff = base_address - *cpu_src_address; const VAddr new_base_address = *cpu_dest_address + diff; const IntervalType add_interval{new_base_address, new_base_address + size}; - uncommitted_ranges.add(add_interval); tmp_intervals.push_back(add_interval); + if (is_high_accuracy) { + uncommitted_ranges.add(add_interval); + } }; ForEachWrittenRange(*cpu_src_address, amount, mirror); // This subtraction in this order is important for overlapping copies. @@ -677,6 +702,9 @@ void BufferCache

::BindHostGeometryBuffers(bool is_indexed) { } BindHostVertexBuffers(); BindHostTransformFeedbackBuffers(); + if (current_draw_indirect) { + BindHostDrawIndirectBuffers(); + } } template @@ -796,7 +824,8 @@ void BufferCache

::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_add template std::pair BufferCache

::ObtainBuffer(GPUVAddr gpu_addr, u32 size, bool synchronize, - bool mark_as_written) { + bool mark_as_written, + bool discard_downloads) { const std::optional cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); if (!cpu_addr) { return {&slot_buffers[NULL_BUFFER_ID], 0}; @@ -804,11 +833,17 @@ std::pair BufferCache

::ObtainBuffer(GPUVAddr gpu_ad const BufferId buffer_id = FindBuffer(*cpu_addr, size); Buffer& buffer = slot_buffers[buffer_id]; if (synchronize) { - SynchronizeBuffer(buffer, *cpu_addr, size); + // SynchronizeBuffer(buffer, *cpu_addr, size); + SynchronizeBufferNoModified(buffer, *cpu_addr, size); } if (mark_as_written) { MarkWrittenBuffer(buffer_id, *cpu_addr, size); } + if (discard_downloads) { + IntervalType interval{*cpu_addr, size}; + ClearDownload(interval); + discarded_ranges.subtract(interval); + } return {&buffer, buffer.Offset(*cpu_addr)}; } @@ -827,10 +862,6 @@ bool BufferCache

::HasUncommittedFlushes() const noexcept { template void BufferCache

::AccumulateFlushes() { - if (Settings::values.gpu_accuracy.GetValue() != Settings::GPUAccuracy::High) { - uncommitted_ranges.clear(); - return; - } if (uncommitted_ranges.empty()) { return; } @@ -845,12 +876,15 @@ bool BufferCache

::ShouldWaitAsyncFlushes() const noexcept { template void BufferCache

::CommitAsyncFlushesHigh() { AccumulateFlushes(); + + for (const auto& interval : discarded_ranges) { + common_ranges.subtract(interval); + } + if (committed_ranges.empty()) { return; } MICROPROFILE_SCOPE(GPU_DownloadMemory); - const bool is_accuracy_normal = - Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::Normal; auto it = committed_ranges.begin(); while (it != committed_ranges.end()) { @@ -875,9 +909,6 @@ void BufferCache

::CommitAsyncFlushesHigh() { ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { buffer.ForEachDownloadRangeAndClear( cpu_addr, size, [&](u64 range_offset, u64 range_size) { - if (is_accuracy_normal) { - return; - } const VAddr buffer_addr = buffer.CpuAddr(); const auto add_download = [&](VAddr start, VAddr end) { const u64 new_offset = start - buffer_addr; @@ -891,7 +922,7 @@ void BufferCache

::CommitAsyncFlushesHigh() { buffer_id, }); // Align up to avoid cache conflicts - constexpr u64 align = 256ULL; + constexpr u64 align = 8ULL; constexpr u64 mask = ~(align - 1ULL); total_size_bytes += (new_size + align - 1) & mask; largest_copy = std::max(largest_copy, new_size); @@ -942,12 +973,7 @@ void BufferCache

::CommitAsyncFlushesHigh() { template void BufferCache

::CommitAsyncFlushes() { - if (Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High) { - CommitAsyncFlushesHigh(); - } else { - uncommitted_ranges.clear(); - committed_ranges.clear(); - } + CommitAsyncFlushesHigh(); } template @@ -1063,6 +1089,19 @@ void BufferCache

::BindHostVertexBuffers() { } } +template +void BufferCache

::BindHostDrawIndirectBuffers() { + const auto bind_buffer = [this](const Binding& binding) { + Buffer& buffer = slot_buffers[binding.buffer_id]; + TouchBuffer(buffer, binding.buffer_id); + SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); + }; + if (current_draw_indirect->include_count) { + bind_buffer(count_buffer_binding); + } + bind_buffer(indirect_buffer_binding); +} + template void BufferCache

::BindHostGraphicsUniformBuffers(size_t stage) { u32 dirty = ~0U; @@ -1294,6 +1333,9 @@ void BufferCache

::DoUpdateGraphicsBuffers(bool is_indexed) { UpdateStorageBuffers(stage); UpdateTextureBuffers(stage); } + if (current_draw_indirect) { + UpdateDrawIndirect(); + } } while (has_deleted_buffers); } @@ -1383,6 +1425,27 @@ void BufferCache

::UpdateVertexBuffer(u32 index) { }; } +template +void BufferCache

::UpdateDrawIndirect() { + const auto update = [this](GPUVAddr gpu_addr, size_t size, Binding& binding) { + const std::optional cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); + if (!cpu_addr) { + binding = NULL_BINDING; + return; + } + binding = Binding{ + .cpu_addr = *cpu_addr, + .size = static_cast(size), + .buffer_id = FindBuffer(*cpu_addr, static_cast(size)), + }; + }; + if (current_draw_indirect->include_count) { + update(current_draw_indirect->count_start_address, sizeof(u32), count_buffer_binding); + } + update(current_draw_indirect->indirect_start_address, current_draw_indirect->buffer_size, + indirect_buffer_binding); +} + template void BufferCache

::UpdateUniformBuffers(size_t stage) { ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) { @@ -1704,6 +1767,51 @@ bool BufferCache

::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 s return false; } +template +bool BufferCache

::SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size) { + boost::container::small_vector copies; + u64 total_size_bytes = 0; + u64 largest_copy = 0; + IntervalSet found_sets{}; + auto make_copies = [&] { + for (auto& interval : found_sets) { + const std::size_t sub_size = interval.upper() - interval.lower(); + const VAddr cpu_addr = interval.lower(); + copies.push_back(BufferCopy{ + .src_offset = total_size_bytes, + .dst_offset = cpu_addr - buffer.CpuAddr(), + .size = sub_size, + }); + total_size_bytes += sub_size; + largest_copy = std::max(largest_copy, sub_size); + } + const std::span copies_span(copies.data(), copies.size()); + UploadMemory(buffer, total_size_bytes, largest_copy, copies_span); + }; + buffer.ForEachUploadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) { + const VAddr base_adr = buffer.CpuAddr() + range_offset; + const VAddr end_adr = base_adr + range_size; + const IntervalType add_interval{base_adr, end_adr}; + found_sets.add(add_interval); + }); + if (found_sets.empty()) { + return true; + } + const IntervalType search_interval{cpu_addr, cpu_addr + size}; + auto it = common_ranges.lower_bound(search_interval); + auto it_end = common_ranges.upper_bound(search_interval); + if (it == common_ranges.end()) { + make_copies(); + return false; + } + while (it != it_end) { + found_sets.subtract(*it); + it++; + } + make_copies(); + return false; +} + template void BufferCache

::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, std::span copies) { @@ -1963,4 +2071,16 @@ bool BufferCache

::HasFastUniformBufferBound(size_t stage, u32 binding_index) } } +template +std::pair::Buffer*, u32> BufferCache

::GetDrawIndirectCount() { + auto& buffer = slot_buffers[count_buffer_binding.buffer_id]; + return std::make_pair(&buffer, buffer.Offset(count_buffer_binding.cpu_addr)); +} + +template +std::pair::Buffer*, u32> BufferCache

::GetDrawIndirectBuffer() { + auto& buffer = slot_buffers[indirect_buffer_binding.buffer_id]; + return std::make_pair(&buffer, buffer.Offset(indirect_buffer_binding.cpu_addr)); +} + } // namespace VideoCommon diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index eb1371612..13ff64fa3 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -97,6 +97,7 @@ void DmaPusher::ProcessCommands(std::span commands) { if (dma_state.non_incrementing) { const u32 max_write = static_cast( std::min(index + dma_state.method_count, commands.size()) - index); + dma_state.dma_word_offset = static_cast(index * sizeof(u32)); CallMultiMethod(&command_header.argument, max_write); dma_state.method_count -= max_write; dma_state.is_last_call = true; @@ -175,7 +176,7 @@ void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const { dma_state.method_count); } else { auto subchannel = subchannels[dma_state.subchannel]; - subchannel->current_dma_segment = dma_state.dma_get; + subchannel->current_dma_segment = dma_state.dma_get + dma_state.dma_word_offset; subchannel->CallMultiMethod(dma_state.method, base_start, num_methods, dma_state.method_count); } diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index ca0899ba7..da7728ded 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h @@ -157,6 +157,7 @@ private: u32 method_count; ///< Current method count u32 length_pending; ///< Large NI command length pending GPUVAddr dma_get; ///< Currently read segment + u32 dma_word_offset; ///< Current word ofset from address bool non_incrementing; ///< Current command's NI flag bool is_last_call; }; diff --git a/src/video_core/engines/draw_manager.cpp b/src/video_core/engines/draw_manager.cpp index 4fa77b684..c60f32aad 100644 --- a/src/video_core/engines/draw_manager.cpp +++ b/src/video_core/engines/draw_manager.cpp @@ -216,7 +216,7 @@ void DrawManager::ProcessDrawIndirect(bool draw_indexed) { UpdateTopology(); if (maxwell3d->ShouldExecute()) { - maxwell3d->rasterizer->DrawIndirect(draw_indexed); + maxwell3d->rasterizer->DrawIndirect(); } } } // namespace Tegra::Engines diff --git a/src/video_core/engines/draw_manager.h b/src/video_core/engines/draw_manager.h index 0cdb37f83..437990162 100644 --- a/src/video_core/engines/draw_manager.h +++ b/src/video_core/engines/draw_manager.h @@ -33,7 +33,10 @@ public: }; struct IndirectParams { - GPUVAddr start_address; + bool is_indexed; + bool include_count; + GPUVAddr count_start_address; + GPUVAddr indirect_start_address; size_t buffer_size; size_t max_draw_counts; size_t stride; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 9b182b653..cd6274a9b 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -130,11 +130,15 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool } macro_params.insert(macro_params.end(), base_start, base_start + amount); + for (size_t i = 0; i < amount; i++) { + macro_addresses.push_back(current_dma_segment + i * sizeof(u32)); + } // Call the macro when there are no more parameters in the command buffer if (is_last_call) { CallMacroMethod(executing_macro, macro_params); macro_params.clear(); + macro_addresses.clear(); } } diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 22b904319..ac5e87563 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -3066,6 +3066,15 @@ public: std::unique_ptr draw_manager; friend class DrawManager; + + std::vector inline_index_draw_indexes; + std::vector macro_addresses; + + Core::System& system; + MemoryManager& memory_manager; + + /// Handles a write to the CLEAR_BUFFERS register. + void ProcessClearBuffers(u32 layer_count); private: void InitializeRegisterDefaults(); @@ -3126,9 +3135,6 @@ private: /// Returns a query's value or an empty object if the value will be deferred through a cache. std::optional GetQueryResult(); - Core::System& system; - MemoryManager& memory_manager; - VideoCore::RasterizerInterface* rasterizer = nullptr; /// Start offsets of each macro in macro_memory diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp index 1cc202cc7..da988cc0d 100644 --- a/src/video_core/macro/macro_hle.cpp +++ b/src/video_core/macro/macro_hle.cpp @@ -9,6 +9,7 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/macro/macro.h" #include "video_core/macro/macro_hle.h" +#include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" namespace Tegra { @@ -24,15 +25,14 @@ void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, const std::vector& parameters[4], parameters[1], parameters[3], parameters[5], instance_count); } -void HLE_0D61FC9FAAC9FCAD(Engines::Maxwell3D& maxwell3d, const std::vector& parameters) { +void HLE_DrawArraysIndirect(Engines::Maxwell3D& maxwell3d, const std::vector& parameters) { const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); maxwell3d.draw_manager->DrawArray( static_cast(parameters[0]), parameters[3], parameters[1], parameters[4], instance_count); } -void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector& parameters) { - const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); +void HLE_DrawIndexedIndirect(Engines::Maxwell3D& maxwell3d, const std::vector& parameters) { const u32 element_base = parameters[4]; const u32 base_instance = parameters[5]; maxwell3d.regs.vertex_id_base = element_base; @@ -41,9 +41,18 @@ void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector& maxwell3d.CallMethod(0x8e4, element_base, true); maxwell3d.CallMethod(0x8e5, base_instance, true); - maxwell3d.draw_manager->DrawIndex( - static_cast(parameters[0]), - parameters[3], parameters[1], element_base, base_instance, instance_count); + auto& params = maxwell3d.draw_manager->GetIndirectParams(); + params.is_indexed = true; + params.include_count = false; + params.count_start_address = 0; + params.indirect_start_address = maxwell3d.macro_addresses[1]; + params.buffer_size = 5 * sizeof(u32); + params.max_draw_counts = 1; + params.stride = 0; + + maxwell3d.draw_manager->DrawIndexedIndirect( + static_cast(parameters[0]), 0, + 1U << 18); maxwell3d.regs.vertex_id_base = 0x0; maxwell3d.CallMethod(0x8e3, 0x640, true); @@ -51,8 +60,9 @@ void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector& maxwell3d.CallMethod(0x8e5, 0x0, true); } -// Multidraw Indirect -void HLE_3F5E74B9C9A50164(Engines::Maxwell3D& maxwell3d, const std::vector& parameters) { +// Multidraw Indixed Indirect +void HLE_MultiDrawIndexedIndirect(Engines::Maxwell3D& maxwell3d, + const std::vector& parameters) { const u32 start_indirect = parameters[0]; const u32 end_indirect = parameters[1]; if (start_indirect >= end_indirect) { @@ -66,7 +76,6 @@ void HLE_3F5E74B9C9A50164(Engines::Maxwell3D& maxwell3d, const std::vector& // size of each indirect segment const u32 indirect_words = 5 + padding; const u32 stride = indirect_words * sizeof(u32); - const GPUVAddr start_address = maxwell3d.current_dma_segment + 4 * sizeof(u32); const std::size_t draw_count = end_indirect - start_indirect; u32 lowest_first = std::numeric_limits::max(); u32 highest_limit = std::numeric_limits::min(); @@ -80,12 +89,16 @@ void HLE_3F5E74B9C9A50164(Engines::Maxwell3D& maxwell3d, const std::vector& const u32 base_vertex = parameters[8]; const u32 base_instance = parameters[9]; + maxwell3d.regs.vertex_id_base = base_vertex; maxwell3d.CallMethod(0x8e3, 0x640, true); maxwell3d.CallMethod(0x8e4, base_vertex, true); maxwell3d.CallMethod(0x8e5, base_instance, true); auto& params = maxwell3d.draw_manager->GetIndirectParams(); - params.start_address = start_address; - params.buffer_size = sizeof(u32) + stride * draw_count; + params.is_indexed = true; + params.include_count = true; + params.count_start_address = maxwell3d.macro_addresses[4]; + params.indirect_start_address = maxwell3d.macro_addresses[5]; + params.buffer_size = stride * draw_count; params.max_draw_counts = draw_count; params.stride = stride; maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; @@ -93,7 +106,7 @@ void HLE_3F5E74B9C9A50164(Engines::Maxwell3D& maxwell3d, const std::vector& } // Multi-layer Clear -void HLE_EAD26C3E2109B06B(Engines::Maxwell3D& maxwell3d, const std::vector& parameters) { +void HLE_MultiLayerClear(Engines::Maxwell3D& maxwell3d, const std::vector& parameters) { ASSERT(parameters.size() == 1); const Engines::Maxwell3D::Regs::ClearSurface clear_params{parameters[0]}; @@ -107,10 +120,10 @@ void HLE_EAD26C3E2109B06B(Engines::Maxwell3D& maxwell3d, const std::vector& constexpr std::array, 5> hle_funcs{{ {0x771BB18C62444DA0, &HLE_771BB18C62444DA0}, - {0x0D61FC9FAAC9FCAD, &HLE_0D61FC9FAAC9FCAD}, - {0x0217920100488FF7, &HLE_0217920100488FF7}, - {0x3F5E74B9C9A50164, &HLE_3F5E74B9C9A50164}, - {0xEAD26C3E2109B06B, &HLE_EAD26C3E2109B06B}, + {0x0D61FC9FAAC9FCAD, &HLE_DrawArraysIndirect}, + {0x0217920100488FF7, &HLE_DrawIndexedIndirect}, + {0x3F5E74B9C9A50164, &HLE_MultiDrawIndexedIndirect}, + {0xEAD26C3E2109B06B, &HLE_MultiLayerClear}, }}; class HLEMacroImpl final : public CachedMacro { diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index a2a651f34..641b95c7c 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -43,7 +43,7 @@ public: virtual void Draw(bool is_indexed, u32 instance_count) = 0; /// Dispatches an indirect draw invocation - virtual void DrawIndirect(bool is_indexed) {} + virtual void DrawIndirect() {} /// Clear the current framebuffer virtual void Clear(u32 layer_count) = 0; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 6b54d7111..487d8b416 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -56,7 +56,8 @@ vk::Buffer CreateBuffer(const Device& device, u64 size) { VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | - VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; + VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | + VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT; if (device.IsExtTransformFeedbackSupported()) { flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT; } @@ -516,6 +517,7 @@ void BufferCacheRuntime::ReserveNullBuffer() { if (device.IsExtTransformFeedbackSupported()) { create_info.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT; } + create_info.usage |= VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT; null_buffer = device.GetLogical().CreateBuffer(create_info); if (device.HasDebuggingToolAttached()) { null_buffer.SetObjectNameEXT("Null buffer"); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 9b75f33dd..6f1adc97f 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -225,25 +225,40 @@ void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) { }); } -void RasterizerVulkan::DrawIndirect(bool is_indexed) { - PrepareDraw(is_indexed, [this, is_indexed] { - const auto params = maxwell3d->draw_manager->GetIndirectParams(); - const auto [buffer, offset] = buffer_cache.ObtainBuffer( - params.start_address, static_cast(params.buffer_size), true, false); - scheduler.Record([buffer_obj = buffer->Handle(), offset, - max_draw_counts = params.max_draw_counts, stride = params.stride, - is_indexed](vk::CommandBuffer cmdbuf) { - if (is_indexed) { - cmdbuf.DrawIndexedIndirectCount(buffer_obj, offset + 4ULL, buffer_obj, offset, - static_cast(max_draw_counts), - static_cast(stride)); +void RasterizerVulkan::DrawIndirect() { + const auto& params = maxwell3d->draw_manager->GetIndirectParams(); + buffer_cache.SetDrawIndirect(¶ms); + PrepareDraw(params.is_indexed, [this, ¶ms] { + const auto [buffer, offset] = buffer_cache.GetDrawIndirectBuffer(); + if (params.include_count) { + const auto [draw_buffer, offset_base] = buffer_cache.GetDrawIndirectCount(); + scheduler.Record([draw_buffer_obj = draw_buffer->Handle(), + buffer_obj = buffer->Handle(), offset_base, offset, + params](vk::CommandBuffer cmdbuf) { + if (params.is_indexed) { + cmdbuf.DrawIndexedIndirectCount( + buffer_obj, offset, draw_buffer_obj, offset_base, + static_cast(params.max_draw_counts), static_cast(params.stride)); + } else { + cmdbuf.DrawIndirectCount(buffer_obj, offset, draw_buffer_obj, offset_base, + static_cast(params.max_draw_counts), + static_cast(params.stride)); + } + }); + return; + } + scheduler.Record([buffer_obj = buffer->Handle(), offset, params](vk::CommandBuffer cmdbuf) { + if (params.is_indexed) { + cmdbuf.DrawIndexedIndirect(buffer_obj, offset, + static_cast(params.max_draw_counts), + static_cast(params.stride)); } else { - cmdbuf.DrawIndirectCount(buffer_obj, offset + 4ULL, buffer_obj, offset, - static_cast(max_draw_counts), - static_cast(stride)); + cmdbuf.DrawIndirect(buffer_obj, offset, static_cast(params.max_draw_counts), + static_cast(params.stride)); } }); }); + buffer_cache.SetDrawIndirect(nullptr); } void RasterizerVulkan::Clear(u32 layer_count) { @@ -425,9 +440,6 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) { bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) { std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex}; - if (!Settings::IsGPULevelHigh()) { - return buffer_cache.IsRegionGpuModified(addr, size); - } return texture_cache.IsRegionGpuModified(addr, size) || buffer_cache.IsRegionGpuModified(addr, size); } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index bc43a8a1f..43a210da0 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -65,7 +65,7 @@ public: ~RasterizerVulkan() override; void Draw(bool is_indexed, u32 instance_count) override; - void DrawIndirect(bool is_indexed) override; + void DrawIndirect() override; void Clear(u32 layer_count) override; void DispatchCompute() override; void ResetCounter(VideoCore::QueryType type) override; diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 477fc428b..207fae8c9 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -351,7 +351,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR .dualSrcBlend = true, .logicOp = true, .multiDrawIndirect = true, - .drawIndirectFirstInstance = false, + .drawIndirectFirstInstance = true, .depthClamp = true, .depthBiasClamp = true, .fillModeNonSolid = true, @@ -1024,6 +1024,8 @@ void Device::CheckSuitability(bool requires_swapchain) const { std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), std::make_pair(features.imageCubeArray, "imageCubeArray"), std::make_pair(features.independentBlend, "independentBlend"), + std::make_pair(features.multiDrawIndirect, "multiDrawIndirect"), + std::make_pair(features.drawIndirectFirstInstance, "drawIndirectFirstInstance"), std::make_pair(features.depthClamp, "depthClamp"), std::make_pair(features.samplerAnisotropy, "samplerAnisotropy"), std::make_pair(features.largePoints, "largePoints"), @@ -1117,6 +1119,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { test(khr_spirv_1_4, VK_KHR_SPIRV_1_4_EXTENSION_NAME, true); test(khr_push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, true); test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); + test(khr_draw_indirect_count, VK_KHR_DRAW_INDIRECT_COUNT_EXTENSION_NAME, true); test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); test(has_ext_primitive_topology_list_restart, diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 6a26c4e6e..d0d7c2299 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -451,6 +451,7 @@ private: bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. bool nv_viewport_array2{}; ///< Support for VK_NV_viewport_array2. bool nv_geometry_shader_passthrough{}; ///< Support for VK_NV_geometry_shader_passthrough. + bool khr_draw_indirect_count{}; ///< Support for VK_KHR_draw_indirect_count. bool khr_uniform_buffer_standard_layout{}; ///< Support for scalar uniform buffer layouts. bool khr_spirv_1_4{}; ///< Support for VK_KHR_spirv_1_4. bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts. diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index c58c4c1c4..f8f8ed9f8 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -94,8 +94,10 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkCmdDispatch); X(vkCmdDraw); X(vkCmdDrawIndexed); - X(vkCmdDrawIndirectCount); - X(vkCmdDrawIndexedIndirectCount); + X(vkCmdDrawIndirect); + X(vkCmdDrawIndexedIndirect); + X(vkCmdDrawIndirectCountKHR); + X(vkCmdDrawIndexedIndirectCountKHR); X(vkCmdEndQuery); X(vkCmdEndRenderPass); X(vkCmdEndTransformFeedbackEXT); diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index 9bd158dce..493a48573 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h @@ -213,8 +213,10 @@ struct DeviceDispatch : InstanceDispatch { PFN_vkCmdDispatch vkCmdDispatch{}; PFN_vkCmdDraw vkCmdDraw{}; PFN_vkCmdDrawIndexed vkCmdDrawIndexed{}; - PFN_vkCmdDrawIndirectCount vkCmdDrawIndirectCount{}; - PFN_vkCmdDrawIndexedIndirectCount vkCmdDrawIndexedIndirectCount{}; + PFN_vkCmdDrawIndirect vkCmdDrawIndirect{}; + PFN_vkCmdDrawIndexedIndirect vkCmdDrawIndexedIndirect{}; + PFN_vkCmdDrawIndirectCountKHR vkCmdDrawIndirectCountKHR{}; + PFN_vkCmdDrawIndexedIndirectCountKHR vkCmdDrawIndexedIndirectCountKHR{}; PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{}; PFN_vkCmdEndQuery vkCmdEndQuery{}; PFN_vkCmdEndRenderPass vkCmdEndRenderPass{}; @@ -1021,17 +1023,27 @@ public: first_instance); } + void DrawIndirect(VkBuffer src_buffer, VkDeviceSize src_offset, u32 draw_count, + u32 stride) const noexcept { + dld->vkCmdDrawIndirect(handle, src_buffer, src_offset, draw_count, stride); + } + + void DrawIndexedIndirect(VkBuffer src_buffer, VkDeviceSize src_offset, u32 draw_count, + u32 stride) const noexcept { + dld->vkCmdDrawIndexedIndirect(handle, src_buffer, src_offset, draw_count, stride); + } + void DrawIndirectCount(VkBuffer src_buffer, VkDeviceSize src_offset, VkBuffer count_buffer, VkDeviceSize count_offset, u32 draw_count, u32 stride) const noexcept { - dld->vkCmdDrawIndirectCount(handle, src_buffer, src_offset, count_buffer, count_offset, - draw_count, stride); + dld->vkCmdDrawIndirectCountKHR(handle, src_buffer, src_offset, count_buffer, count_offset, + draw_count, stride); } void DrawIndexedIndirectCount(VkBuffer src_buffer, VkDeviceSize src_offset, VkBuffer count_buffer, VkDeviceSize count_offset, u32 draw_count, u32 stride) const noexcept { - dld->vkCmdDrawIndexedIndirectCount(handle, src_buffer, src_offset, count_buffer, - count_offset, draw_count, stride); + dld->vkCmdDrawIndexedIndirectCountKHR(handle, src_buffer, src_offset, count_buffer, + count_offset, draw_count, stride); } void ClearAttachments(Span attachments, -- cgit v1.2.3 From c541559767c3912940ee3d73a122530b3edde9f1 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 5 Mar 2022 08:01:13 +0100 Subject: MacroHLE: Refactor MacroHLE system. --- src/video_core/buffer_cache/buffer_cache.h | 6 +- src/video_core/dma_pusher.cpp | 9 +- src/video_core/dma_pusher.h | 2 +- src/video_core/engines/draw_manager.cpp | 6 + src/video_core/engines/draw_manager.h | 2 + src/video_core/engines/maxwell_3d.cpp | 37 +++ src/video_core/engines/maxwell_3d.h | 21 +- src/video_core/macro/macro.cpp | 7 +- src/video_core/macro/macro.h | 1 + src/video_core/macro/macro_hle.cpp | 445 ++++++++++++++++++++++------- src/video_core/macro/macro_hle.h | 5 + 11 files changed, 420 insertions(+), 121 deletions(-) (limited to 'src/video_core/buffer_cache') diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 557227b37..98343628c 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -1574,7 +1574,11 @@ void BufferCache

::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 s if (!is_async) { return; } - uncommitted_ranges.add(base_interval); + const bool is_high_accuracy = + Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High; + if (is_high_accuracy) { + uncommitted_ranges.add(base_interval); + } } template diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 13ff64fa3..5ad40abaa 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -94,10 +94,10 @@ void DmaPusher::ProcessCommands(std::span commands) { if (dma_state.method_count) { // Data word of methods command + dma_state.dma_word_offset = static_cast(index * sizeof(u32)); if (dma_state.non_incrementing) { const u32 max_write = static_cast( std::min(index + dma_state.method_count, commands.size()) - index); - dma_state.dma_word_offset = static_cast(index * sizeof(u32)); CallMultiMethod(&command_header.argument, max_write); dma_state.method_count -= max_write; dma_state.is_last_call = true; @@ -133,6 +133,8 @@ void DmaPusher::ProcessCommands(std::span commands) { case SubmissionMode::Inline: dma_state.method = command_header.method; dma_state.subchannel = command_header.subchannel; + dma_state.dma_word_offset = static_cast( + -static_cast(dma_state.dma_get)); // negate to set address as 0 CallMethod(command_header.arg_count); dma_state.non_incrementing = true; dma_increment_once = false; @@ -165,8 +167,9 @@ void DmaPusher::CallMethod(u32 argument) const { dma_state.method_count, }); } else { - subchannels[dma_state.subchannel]->CallMethod(dma_state.method, argument, - dma_state.is_last_call); + auto subchannel = subchannels[dma_state.subchannel]; + subchannel->current_dma_segment = dma_state.dma_get + dma_state.dma_word_offset; + subchannel->CallMethod(dma_state.method, argument, dma_state.is_last_call); } } diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index da7728ded..1cdb690ed 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h @@ -157,7 +157,7 @@ private: u32 method_count; ///< Current method count u32 length_pending; ///< Large NI command length pending GPUVAddr dma_get; ///< Currently read segment - u32 dma_word_offset; ///< Current word ofset from address + u64 dma_word_offset; ///< Current word ofset from address bool non_incrementing; ///< Current command's NI flag bool is_last_call; }; diff --git a/src/video_core/engines/draw_manager.cpp b/src/video_core/engines/draw_manager.cpp index c60f32aad..183d5403c 100644 --- a/src/video_core/engines/draw_manager.cpp +++ b/src/video_core/engines/draw_manager.cpp @@ -91,6 +91,12 @@ void DrawManager::DrawIndex(PrimitiveTopology topology, u32 index_first, u32 ind ProcessDraw(true, num_instances); } +void DrawManager::DrawArrayIndirect(PrimitiveTopology topology) { + draw_state.topology = topology; + + ProcessDrawIndirect(true); +} + void DrawManager::DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first, u32 index_count) { const auto& regs{maxwell3d->regs}; draw_state.topology = topology; diff --git a/src/video_core/engines/draw_manager.h b/src/video_core/engines/draw_manager.h index 437990162..49a4fca48 100644 --- a/src/video_core/engines/draw_manager.h +++ b/src/video_core/engines/draw_manager.h @@ -56,6 +56,8 @@ public: void DrawIndex(PrimitiveTopology topology, u32 index_first, u32 index_count, u32 base_index, u32 base_instance, u32 num_instances); + void DrawArrayIndirect(PrimitiveTopology topology); + void DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first, u32 index_count); const State& GetDrawState() const { diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index cd6274a9b..b998a8e69 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -133,15 +133,52 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool for (size_t i = 0; i < amount; i++) { macro_addresses.push_back(current_dma_segment + i * sizeof(u32)); } + macro_segments.emplace_back(current_dma_segment, amount); // Call the macro when there are no more parameters in the command buffer if (is_last_call) { CallMacroMethod(executing_macro, macro_params); macro_params.clear(); macro_addresses.clear(); + macro_segments.clear(); } } +void Maxwell3D::RefreshParameters() { + size_t current_index = 0; + for (auto& segment : macro_segments) { + if (segment.first == 0) { + current_index += segment.second; + continue; + } + memory_manager.ReadBlock(segment.first, ¯o_params[current_index], + sizeof(u32) * segment.second); + current_index += segment.second; + } +} + +u32 Maxwell3D::GetMaxCurrentVertices() { + u32 num_vertices = 0; + for (size_t index = 0; index < Regs::NumVertexArrays; ++index) { + const auto& array = regs.vertex_streams[index]; + if (array.enable == 0) { + continue; + } + const auto& attribute = regs.vertex_attrib_format[index]; + if (attribute.constant) { + num_vertices = std::max(num_vertices, 1U); + continue; + } + const auto& limit = regs.vertex_stream_limits[index]; + const GPUVAddr gpu_addr_begin = array.Address(); + const GPUVAddr gpu_addr_end = limit.Address() + 1; + const u32 address_size = static_cast(gpu_addr_end - gpu_addr_begin); + num_vertices = std::max( + num_vertices, address_size / std::max(attribute.SizeInBytes(), array.stride.Value())); + } + return num_vertices; +} + u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) { // Keep track of the register value in shadow_state when requested. const auto control = shadow_state.shadow_ram_control; diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index ac5e87563..e2256594d 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -3068,10 +3068,14 @@ public: friend class DrawManager; std::vector inline_index_draw_indexes; - std::vector macro_addresses; - Core::System& system; - MemoryManager& memory_manager; + GPUVAddr getMacroAddress(size_t index) const { + return macro_addresses[index]; + } + + void RefreshParameters(); + + u32 GetMaxCurrentVertices(); /// Handles a write to the CLEAR_BUFFERS register. void ProcessClearBuffers(u32 layer_count); @@ -3135,6 +3139,9 @@ private: /// Returns a query's value or an empty object if the value will be deferred through a cache. std::optional GetQueryResult(); + Core::System& system; + MemoryManager& memory_manager; + VideoCore::RasterizerInterface* rasterizer = nullptr; /// Start offsets of each macro in macro_memory @@ -3151,6 +3158,14 @@ private: Upload::State upload_state; bool execute_on{true}; + + std::array draw_command{}; + std::vector deferred_draw_method; + enum class DrawMode : u32 { General = 0, Instance, InlineIndex }; + DrawMode draw_mode{DrawMode::General}; + bool draw_indexed{}; + std::vector> macro_segments; + std::vector macro_addresses; }; #define ASSERT_REG_POSITION(field_name, position) \ diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp index 505d81c1e..01dd25f95 100644 --- a/src/video_core/macro/macro.cpp +++ b/src/video_core/macro/macro.cpp @@ -13,6 +13,7 @@ #include "common/fs/fs.h" #include "common/fs/path_util.h" #include "common/settings.h" +#include "video_core/engines/maxwell_3d.h" #include "video_core/macro/macro.h" #include "video_core/macro/macro_hle.h" #include "video_core/macro/macro_interpreter.h" @@ -40,8 +41,8 @@ static void Dump(u64 hash, std::span code) { macro_file.write(reinterpret_cast(code.data()), code.size_bytes()); } -MacroEngine::MacroEngine(Engines::Maxwell3D& maxwell3d) - : hle_macros{std::make_unique(maxwell3d)} {} +MacroEngine::MacroEngine(Engines::Maxwell3D& maxwell3d_) + : hle_macros{std::make_unique(maxwell3d_)}, maxwell3d{maxwell3d_} {} MacroEngine::~MacroEngine() = default; @@ -61,6 +62,7 @@ void MacroEngine::Execute(u32 method, const std::vector& parameters) { if (cache_info.has_hle_program) { cache_info.hle_program->Execute(parameters, method); } else { + maxwell3d.RefreshParameters(); cache_info.lle_program->Execute(parameters, method); } } else { @@ -106,6 +108,7 @@ void MacroEngine::Execute(u32 method, const std::vector& parameters) { cache_info.hle_program = std::move(hle_program); cache_info.hle_program->Execute(parameters, method); } else { + maxwell3d.RefreshParameters(); cache_info.lle_program->Execute(parameters, method); } } diff --git a/src/video_core/macro/macro.h b/src/video_core/macro/macro.h index 07d97ba39..737ced9a4 100644 --- a/src/video_core/macro/macro.h +++ b/src/video_core/macro/macro.h @@ -137,6 +137,7 @@ private: std::unordered_map macro_cache; std::unordered_map> uploaded_macro_code; std::unique_ptr hle_macros; + Engines::Maxwell3D& maxwell3d; }; std::unique_ptr GetMacroEngine(Engines::Maxwell3D& maxwell3d); diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp index da988cc0d..79fab96e1 100644 --- a/src/video_core/macro/macro_hle.cpp +++ b/src/video_core/macro/macro_hle.cpp @@ -3,6 +3,7 @@ #include #include +#include "common/assert.h" #include "common/scope_exit.h" #include "video_core/dirty_flags.h" #include "video_core/engines/draw_manager.h" @@ -15,143 +16,365 @@ namespace Tegra { namespace { -using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector& parameters); - -// HLE'd functions -void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, const std::vector& parameters) { - const u32 instance_count = parameters[2] & maxwell3d.GetRegisterValue(0xD1B); - maxwell3d.draw_manager->DrawIndex( - static_cast(parameters[0] & 0x3ffffff), - parameters[4], parameters[1], parameters[3], parameters[5], instance_count); +bool IsTopologySafe(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) { + switch (topology) { + case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points: + case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines: + case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineLoop: + case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip: + case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles: + case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip: + case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan: + case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency: + case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency: + case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency: + case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency: + case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Patches: + return true; + case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Quads: + case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::QuadStrip: + case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Polygon: + default: + return false; + } } -void HLE_DrawArraysIndirect(Engines::Maxwell3D& maxwell3d, const std::vector& parameters) { - const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); - maxwell3d.draw_manager->DrawArray( - static_cast(parameters[0]), - parameters[3], parameters[1], parameters[4], instance_count); -} +class HLEMacroImpl : public CachedMacro { +public: + explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {} -void HLE_DrawIndexedIndirect(Engines::Maxwell3D& maxwell3d, const std::vector& parameters) { - const u32 element_base = parameters[4]; - const u32 base_instance = parameters[5]; - maxwell3d.regs.vertex_id_base = element_base; - maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; - maxwell3d.CallMethod(0x8e3, 0x640, true); - maxwell3d.CallMethod(0x8e4, element_base, true); - maxwell3d.CallMethod(0x8e5, base_instance, true); - - auto& params = maxwell3d.draw_manager->GetIndirectParams(); - params.is_indexed = true; - params.include_count = false; - params.count_start_address = 0; - params.indirect_start_address = maxwell3d.macro_addresses[1]; - params.buffer_size = 5 * sizeof(u32); - params.max_draw_counts = 1; - params.stride = 0; - - maxwell3d.draw_manager->DrawIndexedIndirect( - static_cast(parameters[0]), 0, - 1U << 18); - - maxwell3d.regs.vertex_id_base = 0x0; - maxwell3d.CallMethod(0x8e3, 0x640, true); - maxwell3d.CallMethod(0x8e4, 0x0, true); - maxwell3d.CallMethod(0x8e5, 0x0, true); -} +protected: + void advanceCheck() { + current_value = (current_value + 1) % fibonacci_post; + check_limit = current_value == 0; + if (check_limit) { + const u32 new_fibonacci = fibonacci_pre + fibonacci_post; + fibonacci_pre = fibonacci_post; + fibonacci_post = new_fibonacci; + } + } + + Engines::Maxwell3D& maxwell3d; + u32 fibonacci_pre{89}; + u32 fibonacci_post{144}; + u32 current_value{fibonacci_post - 1}; + bool check_limit{}; +}; + +class HLE_771BB18C62444DA0 final : public HLEMacroImpl { +public: + explicit HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} -// Multidraw Indixed Indirect -void HLE_MultiDrawIndexedIndirect(Engines::Maxwell3D& maxwell3d, - const std::vector& parameters) { - const u32 start_indirect = parameters[0]; - const u32 end_indirect = parameters[1]; - if (start_indirect >= end_indirect) { - // Nothing to do. - return; + void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { + maxwell3d.RefreshParameters(); + const u32 instance_count = parameters[2] & maxwell3d.GetRegisterValue(0xD1B); + maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; + maxwell3d.draw_manager->DrawIndex( + static_cast(parameters[0] & + 0x3ffffff), + parameters[4], parameters[1], parameters[3], parameters[5], instance_count); } - const auto topology = - static_cast(parameters[2]); - const u32 padding = parameters[3]; // padding is in words - - // size of each indirect segment - const u32 indirect_words = 5 + padding; - const u32 stride = indirect_words * sizeof(u32); - const std::size_t draw_count = end_indirect - start_indirect; - u32 lowest_first = std::numeric_limits::max(); - u32 highest_limit = std::numeric_limits::min(); - for (std::size_t index = 0; index < draw_count; index++) { - const std::size_t base = index * indirect_words + 5; - const u32 count = parameters[base]; - const u32 first_index = parameters[base + 2]; - lowest_first = std::min(lowest_first, first_index); - highest_limit = std::max(highest_limit, first_index + count); +}; + +class HLE_DrawArraysIndirect final : public HLEMacroImpl { +public: + explicit HLE_DrawArraysIndirect(Engines::Maxwell3D& maxwell3d_, bool extended_ = false) + : HLEMacroImpl(maxwell3d_), extended(extended_) {} + + void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { + auto topology = + static_cast(parameters[0]); + if (!IsTopologySafe(topology)) { + Fallback(parameters); + return; + } + + auto& params = maxwell3d.draw_manager->GetIndirectParams(); + params.is_indexed = false; + params.include_count = false; + params.count_start_address = 0; + params.indirect_start_address = maxwell3d.getMacroAddress(1); + params.buffer_size = 4 * sizeof(u32); + params.max_draw_counts = 1; + params.stride = 0; + + if (extended) { + maxwell3d.CallMethod(0x8e3, 0x640, true); + maxwell3d.CallMethod(0x8e4, parameters[4], true); + } + + maxwell3d.draw_manager->DrawArrayIndirect(topology); + + if (extended) { + maxwell3d.CallMethod(0x8e3, 0x640, true); + maxwell3d.CallMethod(0x8e4, 0, true); + } + maxwell3d.regs.vertex_buffer.first = 0; + maxwell3d.regs.vertex_buffer.count = 0; } - const u32 base_vertex = parameters[8]; - const u32 base_instance = parameters[9]; - maxwell3d.regs.vertex_id_base = base_vertex; - maxwell3d.CallMethod(0x8e3, 0x640, true); - maxwell3d.CallMethod(0x8e4, base_vertex, true); - maxwell3d.CallMethod(0x8e5, base_instance, true); - auto& params = maxwell3d.draw_manager->GetIndirectParams(); - params.is_indexed = true; - params.include_count = true; - params.count_start_address = maxwell3d.macro_addresses[4]; - params.indirect_start_address = maxwell3d.macro_addresses[5]; - params.buffer_size = stride * draw_count; - params.max_draw_counts = draw_count; - params.stride = stride; - maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; - maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, highest_limit); -} +private: + void Fallback(const std::vector& parameters) { + SCOPE_EXIT({ + if (extended) { + maxwell3d.CallMethod(0x8e3, 0x640, true); + maxwell3d.CallMethod(0x8e4, 0, true); + } + }); + maxwell3d.RefreshParameters(); + const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); -// Multi-layer Clear -void HLE_MultiLayerClear(Engines::Maxwell3D& maxwell3d, const std::vector& parameters) { - ASSERT(parameters.size() == 1); + const u32 vertex_first = parameters[3]; + const u32 vertex_count = parameters[1]; - const Engines::Maxwell3D::Regs::ClearSurface clear_params{parameters[0]}; - const u32 rt_index = clear_params.RT; - const u32 num_layers = maxwell3d.regs.rt[rt_index].depth; - ASSERT(clear_params.layer == 0); + if (maxwell3d.GetMaxCurrentVertices() < vertex_first + vertex_count) { + ASSERT_MSG(false, "Faulty draw!"); + return; + } - maxwell3d.regs.clear_surface.raw = clear_params.raw; - maxwell3d.draw_manager->Clear(num_layers); -} + const u32 base_instance = parameters[4]; + if (extended) { + maxwell3d.CallMethod(0x8e3, 0x640, true); + maxwell3d.CallMethod(0x8e4, base_instance, true); + } + + maxwell3d.draw_manager->DrawArray( + static_cast(parameters[0]), + vertex_first, vertex_count, base_instance, instance_count); + } -constexpr std::array, 5> hle_funcs{{ - {0x771BB18C62444DA0, &HLE_771BB18C62444DA0}, - {0x0D61FC9FAAC9FCAD, &HLE_DrawArraysIndirect}, - {0x0217920100488FF7, &HLE_DrawIndexedIndirect}, - {0x3F5E74B9C9A50164, &HLE_MultiDrawIndexedIndirect}, - {0xEAD26C3E2109B06B, &HLE_MultiLayerClear}, -}}; + bool extended; +}; -class HLEMacroImpl final : public CachedMacro { +class HLE_DrawIndexedIndirect final : public HLEMacroImpl { public: - explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d_, HLEFunction func_) - : maxwell3d{maxwell3d_}, func{func_} {} + explicit HLE_DrawIndexedIndirect(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} - void Execute(const std::vector& parameters, u32 method) override { - func(maxwell3d, parameters); + void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { + auto topology = + static_cast(parameters[0]); + if (!IsTopologySafe(topology)) { + Fallback(parameters); + return; + } + + advanceCheck(); + if (check_limit) { + maxwell3d.RefreshParameters(); + minimum_limit = std::max(parameters[3], minimum_limit); + } + + const u32 base_vertex = parameters[8]; + const u32 base_instance = parameters[9]; + maxwell3d.regs.vertex_id_base = base_vertex; + maxwell3d.CallMethod(0x8e3, 0x640, true); + maxwell3d.CallMethod(0x8e4, base_vertex, true); + maxwell3d.CallMethod(0x8e5, base_instance, true); + auto& params = maxwell3d.draw_manager->GetIndirectParams(); + params.is_indexed = true; + params.include_count = false; + params.count_start_address = 0; + params.indirect_start_address = maxwell3d.getMacroAddress(1); + params.buffer_size = 5 * sizeof(u32); + params.max_draw_counts = 1; + params.stride = 0; + maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; + maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, minimum_limit); + maxwell3d.CallMethod(0x8e3, 0x640, true); + maxwell3d.CallMethod(0x8e4, 0x0, true); + maxwell3d.CallMethod(0x8e5, 0x0, true); } private: - Engines::Maxwell3D& maxwell3d; - HLEFunction func; + void Fallback(const std::vector& parameters) { + maxwell3d.RefreshParameters(); + const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); + const u32 element_base = parameters[4]; + const u32 base_instance = parameters[5]; + maxwell3d.regs.vertex_id_base = element_base; + maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; + maxwell3d.CallMethod(0x8e3, 0x640, true); + maxwell3d.CallMethod(0x8e4, element_base, true); + maxwell3d.CallMethod(0x8e5, base_instance, true); + + maxwell3d.draw_manager->DrawIndex( + static_cast(parameters[0]), + parameters[3], parameters[1], element_base, base_instance, instance_count); + + maxwell3d.regs.vertex_id_base = 0x0; + maxwell3d.CallMethod(0x8e3, 0x640, true); + maxwell3d.CallMethod(0x8e4, 0x0, true); + maxwell3d.CallMethod(0x8e5, 0x0, true); + } + + u32 minimum_limit{1 << 18}; +}; + +class HLE_MultiLayerClear final : public HLEMacroImpl { +public: + explicit HLE_MultiLayerClear(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} + + void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { + maxwell3d.RefreshParameters(); + ASSERT(parameters.size() == 1); + + const Engines::Maxwell3D::Regs::ClearSurface clear_params{parameters[0]}; + const u32 rt_index = clear_params.RT; + const u32 num_layers = maxwell3d.regs.rt[rt_index].depth; + ASSERT(clear_params.layer == 0); + + maxwell3d.regs.clear_surface.raw = clear_params.raw; + maxwell3d.draw_manager->Clear(num_layers); + } +}; + +class HLE_MultiDrawIndexedIndirectCount final : public HLEMacroImpl { +public: + explicit HLE_MultiDrawIndexedIndirectCount(Engines::Maxwell3D& maxwell3d_) + : HLEMacroImpl(maxwell3d_) {} + + void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { + const auto topology = + static_cast(parameters[2]); + if (!IsTopologySafe(topology)) { + Fallback(parameters); + return; + } + + advanceCheck(); + if (check_limit) { + maxwell3d.RefreshParameters(); + } + const u32 start_indirect = parameters[0]; + const u32 end_indirect = parameters[1]; + if (start_indirect >= end_indirect) { + // Nothing to do. + return; + } + + maxwell3d.regs.draw.topology.Assign(topology); + const u32 padding = parameters[3]; // padding is in words + + // size of each indirect segment + const u32 indirect_words = 5 + padding; + const u32 stride = indirect_words * sizeof(u32); + const std::size_t draw_count = end_indirect - start_indirect; + u32 lowest_first = std::numeric_limits::max(); + u32 highest_limit = std::numeric_limits::min(); + for (std::size_t index = 0; index < draw_count; index++) { + const std::size_t base = index * indirect_words + 5; + const u32 count = parameters[base]; + const u32 first_index = parameters[base + 2]; + lowest_first = std::min(lowest_first, first_index); + highest_limit = std::max(highest_limit, first_index + count); + } + if (check_limit) { + minimum_limit = std::max(highest_limit, minimum_limit); + } + + maxwell3d.regs.index_buffer.first = 0; + maxwell3d.regs.index_buffer.count = std::max(highest_limit, minimum_limit); + maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; + auto& params = maxwell3d.draw_manager->GetIndirectParams(); + params.is_indexed = true; + params.include_count = true; + params.count_start_address = maxwell3d.getMacroAddress(4); + params.indirect_start_address = maxwell3d.getMacroAddress(5); + params.buffer_size = stride * draw_count; + params.max_draw_counts = draw_count; + params.stride = stride; + maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; + maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, highest_limit); + } + +private: + void Fallback(const std::vector& parameters) { + SCOPE_EXIT({ + // Clean everything. + // Clean everything. + maxwell3d.regs.vertex_id_base = 0x0; + maxwell3d.CallMethod(0x8e3, 0x640, true); + maxwell3d.CallMethod(0x8e4, 0x0, true); + maxwell3d.CallMethod(0x8e5, 0x0, true); + }); + maxwell3d.RefreshParameters(); + const u32 start_indirect = parameters[0]; + const u32 end_indirect = parameters[1]; + if (start_indirect >= end_indirect) { + // Nothing to do. + return; + } + const auto topology = + static_cast(parameters[2]); + maxwell3d.regs.draw.topology.Assign(topology); + const u32 padding = parameters[3]; + const std::size_t max_draws = parameters[4]; + + const u32 indirect_words = 5 + padding; + const std::size_t first_draw = start_indirect; + const std::size_t effective_draws = end_indirect - start_indirect; + const std::size_t last_draw = start_indirect + std::min(effective_draws, max_draws); + + for (std::size_t index = first_draw; index < last_draw; index++) { + const std::size_t base = index * indirect_words + 5; + const u32 base_vertex = parameters[base + 3]; + const u32 base_instance = parameters[base + 4]; + maxwell3d.regs.vertex_id_base = base_vertex; + maxwell3d.CallMethod(0x8e3, 0x640, true); + maxwell3d.CallMethod(0x8e4, base_vertex, true); + maxwell3d.CallMethod(0x8e5, base_instance, true); + maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; + maxwell3d.draw_manager->DrawIndex(topology, parameters[base + 2], parameters[base], + base_vertex, base_instance, parameters[base + 1]); + } + } + + u32 minimum_limit{1 << 12}; }; } // Anonymous namespace -HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {} +HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} { + builders.emplace(0x771BB18C62444DA0ULL, + std::function(Engines::Maxwell3D&)>( + [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr { + return std::make_unique(maxwell3d); + })); + builders.emplace(0x0D61FC9FAAC9FCADULL, + std::function(Engines::Maxwell3D&)>( + [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr { + return std::make_unique(maxwell3d); + })); + builders.emplace(0x8A4D173EB99A8603ULL, + std::function(Engines::Maxwell3D&)>( + [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr { + return std::make_unique(maxwell3d, true); + })); + builders.emplace(0x0217920100488FF7ULL, + std::function(Engines::Maxwell3D&)>( + [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr { + return std::make_unique(maxwell3d); + })); + builders.emplace(0x3F5E74B9C9A50164ULL, + std::function(Engines::Maxwell3D&)>( + [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr { + return std::make_unique(maxwell3d); + })); + builders.emplace(0xEAD26C3E2109B06BULL, + std::function(Engines::Maxwell3D&)>( + [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr { + return std::make_unique(maxwell3d); + })); +} + HLEMacro::~HLEMacro() = default; std::unique_ptr HLEMacro::GetHLEProgram(u64 hash) const { - const auto it = std::find_if(hle_funcs.cbegin(), hle_funcs.cend(), - [hash](const auto& pair) { return pair.first == hash; }); - if (it == hle_funcs.end()) { + const auto it = builders.find(hash); + if (it == builders.end()) { return nullptr; } - return std::make_unique(maxwell3d, it->second); + return it->second(maxwell3d); } } // namespace Tegra diff --git a/src/video_core/macro/macro_hle.h b/src/video_core/macro/macro_hle.h index 625332c9d..33f92fab1 100644 --- a/src/video_core/macro/macro_hle.h +++ b/src/video_core/macro/macro_hle.h @@ -3,7 +3,10 @@ #pragma once +#include #include +#include + #include "common/common_types.h" namespace Tegra { @@ -23,6 +26,8 @@ public: private: Engines::Maxwell3D& maxwell3d; + std::unordered_map(Engines::Maxwell3D&)>> + builders; }; } // namespace Tegra -- cgit v1.2.3 From ce448ce770b6c329caec7ad1ae00e01dddb67b03 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 18 Nov 2022 00:21:13 +0100 Subject: Revert Buffer cache changes and setup additional macros. --- src/video_core/buffer_cache/buffer_cache.h | 113 +++++--------------------- src/video_core/dma_pusher.cpp | 17 +++- src/video_core/engines/engine_interface.h | 1 + src/video_core/engines/maxwell_3d.cpp | 26 +++--- src/video_core/engines/maxwell_3d.h | 23 ++++-- src/video_core/macro/macro.cpp | 5 ++ src/video_core/macro/macro_hle.cpp | 122 +++++++++++++++++++++++++++-- 7 files changed, 179 insertions(+), 128 deletions(-) (limited to 'src/video_core/buffer_cache') diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 98343628c..f86edaa3e 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -170,11 +170,6 @@ public: void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format, bool is_written, bool is_image); - [[nodiscard]] std::pair ObtainBuffer(GPUVAddr gpu_addr, u32 size, - bool synchronize = true, - bool mark_as_written = false, - bool discard_downloads = false); - void FlushCachedWrites(); /// Return true when there are uncommitted buffers to be downloaded @@ -354,8 +349,6 @@ private: bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size); - bool SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size); - void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, std::span copies); @@ -442,7 +435,6 @@ private: std::vector cached_write_buffer_ids; - IntervalSet discarded_ranges; IntervalSet uncommitted_ranges; IntervalSet common_ranges; std::deque committed_ranges; @@ -600,17 +592,13 @@ bool BufferCache

::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am }}; boost::container::small_vector tmp_intervals; - const bool is_high_accuracy = - Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High; auto mirror = [&](VAddr base_address, VAddr base_address_end) { const u64 size = base_address_end - base_address; const VAddr diff = base_address - *cpu_src_address; const VAddr new_base_address = *cpu_dest_address + diff; const IntervalType add_interval{new_base_address, new_base_address + size}; + uncommitted_ranges.add(add_interval); tmp_intervals.push_back(add_interval); - if (is_high_accuracy) { - uncommitted_ranges.add(add_interval); - } }; ForEachWrittenRange(*cpu_src_address, amount, mirror); // This subtraction in this order is important for overlapping copies. @@ -821,32 +809,6 @@ void BufferCache

::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_add compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); } -template -std::pair BufferCache

::ObtainBuffer(GPUVAddr gpu_addr, u32 size, - bool synchronize, - bool mark_as_written, - bool discard_downloads) { - const std::optional cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); - if (!cpu_addr) { - return {&slot_buffers[NULL_BUFFER_ID], 0}; - } - const BufferId buffer_id = FindBuffer(*cpu_addr, size); - Buffer& buffer = slot_buffers[buffer_id]; - if (synchronize) { - // SynchronizeBuffer(buffer, *cpu_addr, size); - SynchronizeBufferNoModified(buffer, *cpu_addr, size); - } - if (mark_as_written) { - MarkWrittenBuffer(buffer_id, *cpu_addr, size); - } - if (discard_downloads) { - IntervalType interval{*cpu_addr, size}; - ClearDownload(interval); - discarded_ranges.subtract(interval); - } - return {&buffer, buffer.Offset(*cpu_addr)}; -} - template void BufferCache

::FlushCachedWrites() { for (const BufferId buffer_id : cached_write_buffer_ids) { @@ -862,6 +824,10 @@ bool BufferCache

::HasUncommittedFlushes() const noexcept { template void BufferCache

::AccumulateFlushes() { + if (Settings::values.gpu_accuracy.GetValue() != Settings::GPUAccuracy::High) { + uncommitted_ranges.clear(); + return; + } if (uncommitted_ranges.empty()) { return; } @@ -877,14 +843,12 @@ template void BufferCache

::CommitAsyncFlushesHigh() { AccumulateFlushes(); - for (const auto& interval : discarded_ranges) { - common_ranges.subtract(interval); - } - if (committed_ranges.empty()) { return; } MICROPROFILE_SCOPE(GPU_DownloadMemory); + const bool is_accuracy_normal = + Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::Normal; auto it = committed_ranges.begin(); while (it != committed_ranges.end()) { @@ -909,6 +873,9 @@ void BufferCache

::CommitAsyncFlushesHigh() { ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { buffer.ForEachDownloadRangeAndClear( cpu_addr, size, [&](u64 range_offset, u64 range_size) { + if (is_accuracy_normal) { + return; + } const VAddr buffer_addr = buffer.CpuAddr(); const auto add_download = [&](VAddr start, VAddr end) { const u64 new_offset = start - buffer_addr; @@ -973,7 +940,12 @@ void BufferCache

::CommitAsyncFlushesHigh() { template void BufferCache

::CommitAsyncFlushes() { - CommitAsyncFlushesHigh(); + if (Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High) { + CommitAsyncFlushesHigh(); + } else { + uncommitted_ranges.clear(); + committed_ranges.clear(); + } } template @@ -1353,7 +1325,7 @@ void BufferCache

::UpdateIndexBuffer() { const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); const auto& index_array = draw_state.index_buffer; auto& flags = maxwell3d->dirty.flags; - if (!flags[Dirty::IndexBuffer] && last_index_count == index_array.count) { + if (!flags[Dirty::IndexBuffer]) { return; } flags[Dirty::IndexBuffer] = false; @@ -1574,11 +1546,7 @@ void BufferCache

::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 s if (!is_async) { return; } - const bool is_high_accuracy = - Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High; - if (is_high_accuracy) { - uncommitted_ranges.add(base_interval); - } + uncommitted_ranges.add(base_interval); } template @@ -1771,51 +1739,6 @@ bool BufferCache

::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 s return false; } -template -bool BufferCache

::SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size) { - boost::container::small_vector copies; - u64 total_size_bytes = 0; - u64 largest_copy = 0; - IntervalSet found_sets{}; - auto make_copies = [&] { - for (auto& interval : found_sets) { - const std::size_t sub_size = interval.upper() - interval.lower(); - const VAddr cpu_addr = interval.lower(); - copies.push_back(BufferCopy{ - .src_offset = total_size_bytes, - .dst_offset = cpu_addr - buffer.CpuAddr(), - .size = sub_size, - }); - total_size_bytes += sub_size; - largest_copy = std::max(largest_copy, sub_size); - } - const std::span copies_span(copies.data(), copies.size()); - UploadMemory(buffer, total_size_bytes, largest_copy, copies_span); - }; - buffer.ForEachUploadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) { - const VAddr base_adr = buffer.CpuAddr() + range_offset; - const VAddr end_adr = base_adr + range_size; - const IntervalType add_interval{base_adr, end_adr}; - found_sets.add(add_interval); - }); - if (found_sets.empty()) { - return true; - } - const IntervalType search_interval{cpu_addr, cpu_addr + size}; - auto it = common_ranges.lower_bound(search_interval); - auto it_end = common_ranges.upper_bound(search_interval); - if (it == common_ranges.end()) { - make_copies(); - return false; - } - while (it != it_end) { - found_sets.subtract(*it); - it++; - } - make_copies(); - return false; -} - template void BufferCache

::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, std::span copies) { diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 7a82355da..b3e9cb82e 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -77,11 +77,20 @@ bool DmaPusher::Step() { command_headers.resize_destructive(command_list_header.size); constexpr u32 MacroRegistersStart = 0xE00; if (dma_state.method < MacroRegistersStart) { - memory_manager.ReadBlock(dma_state.dma_get, command_headers.data(), - command_list_header.size * sizeof(u32)); + if (Settings::IsGPULevelHigh()) { + memory_manager.ReadBlock(dma_state.dma_get, command_headers.data(), + command_list_header.size * sizeof(u32)); + } else { + memory_manager.ReadBlockUnsafe(dma_state.dma_get, command_headers.data(), + command_list_header.size * sizeof(u32)); + } } else { - memory_manager.ReadBlockUnsafe(dma_state.dma_get, command_headers.data(), - command_list_header.size * sizeof(u32)); + const size_t copy_size = command_list_header.size * sizeof(u32); + if (subchannels[dma_state.subchannel]) { + subchannels[dma_state.subchannel]->current_dirty = + memory_manager.IsMemoryDirty(dma_state.dma_get, copy_size); + } + memory_manager.ReadBlockUnsafe(dma_state.dma_get, command_headers.data(), copy_size); } ProcessCommands(command_headers); } diff --git a/src/video_core/engines/engine_interface.h b/src/video_core/engines/engine_interface.h index 76630272d..38f1abdc4 100644 --- a/src/video_core/engines/engine_interface.h +++ b/src/video_core/engines/engine_interface.h @@ -18,6 +18,7 @@ public: virtual void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) = 0; + bool current_dirty{}; GPUVAddr current_dma_segment; }; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index a9fd6d960..bbe3202fe 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -4,6 +4,7 @@ #include #include #include "common/assert.h" +#include "common/settings.h" #include "core/core.h" #include "core/core_timing.h" #include "video_core/dirty_flags.h" @@ -14,6 +15,7 @@ #include "video_core/rasterizer_interface.h" #include "video_core/textures/texture.h" + namespace Tegra::Engines { using VideoCore::QueryType; @@ -134,6 +136,8 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool macro_addresses.push_back(current_dma_segment + i * sizeof(u32)); } macro_segments.emplace_back(current_dma_segment, amount); + current_macro_dirty |= current_dirty; + current_dirty = false; // Call the macro when there are no more parameters in the command buffer if (is_last_call) { @@ -141,10 +145,14 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool macro_params.clear(); macro_addresses.clear(); macro_segments.clear(); + current_macro_dirty = false; } } -void Maxwell3D::RefreshParameters() { +void Maxwell3D::RefreshParametersImpl() { + if (!Settings::IsGPULevelHigh()) { + return; + } size_t current_index = 0; for (auto& segment : macro_segments) { if (segment.first == 0) { @@ -157,21 +165,6 @@ void Maxwell3D::RefreshParameters() { } } -bool Maxwell3D::AnyParametersDirty() { - size_t current_index = 0; - for (auto& segment : macro_segments) { - if (segment.first == 0) { - current_index += segment.second; - continue; - } - if (memory_manager.IsMemoryDirty(segment.first, sizeof(u32) * segment.second)) { - return true; - } - current_index += segment.second; - } - return false; -} - u32 Maxwell3D::GetMaxCurrentVertices() { u32 num_vertices = 0; for (size_t index = 0; index < Regs::NumVertexArrays; ++index) { @@ -332,7 +325,6 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { const u32 argument = ProcessShadowRam(method, method_argument); ProcessDirtyRegisters(method, argument); - ProcessMethodCall(method, argument, method_argument, is_last_call); } diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index cd996413c..f0a379801 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -272,6 +272,7 @@ public: }; union { + u32 raw; BitField<0, 1, Mode> mode; BitField<4, 8, u32> pad; }; @@ -1217,10 +1218,12 @@ public: struct Window { union { + u32 raw_1; BitField<0, 16, u32> x_min; BitField<16, 16, u32> x_max; }; union { + u32 raw_2; BitField<0, 16, u32> y_min; BitField<16, 16, u32> y_max; }; @@ -3090,9 +3093,16 @@ public: return macro_addresses[index]; } - void RefreshParameters(); + void RefreshParameters() { + if (!current_macro_dirty) { + return; + } + RefreshParametersImpl(); + } - bool AnyParametersDirty(); + bool AnyParametersDirty() { + return current_macro_dirty; + } u32 GetMaxCurrentVertices(); @@ -3101,6 +3111,9 @@ public: /// Handles a write to the CLEAR_BUFFERS register. void ProcessClearBuffers(u32 layer_count); + /// Handles a write to the CB_BIND register. + void ProcessCBBind(size_t stage_index); + private: void InitializeRegisterDefaults(); @@ -3154,12 +3167,11 @@ private: void ProcessCBData(u32 value); void ProcessCBMultiData(const u32* start_base, u32 amount); - /// Handles a write to the CB_BIND register. - void ProcessCBBind(size_t stage_index); - /// Returns a query's value or an empty object if the value will be deferred through a cache. std::optional GetQueryResult(); + void RefreshParametersImpl(); + Core::System& system; MemoryManager& memory_manager; @@ -3187,6 +3199,7 @@ private: bool draw_indexed{}; std::vector> macro_segments; std::vector macro_addresses; + bool current_macro_dirty{}; }; #define ASSERT_REG_POSITION(field_name, position) \ diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp index 01dd25f95..49c47dafe 100644 --- a/src/video_core/macro/macro.cpp +++ b/src/video_core/macro/macro.cpp @@ -12,6 +12,7 @@ #include "common/assert.h" #include "common/fs/fs.h" #include "common/fs/path_util.h" +#include "common/microprofile.h" #include "common/settings.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/macro/macro.h" @@ -22,6 +23,8 @@ #include "video_core/macro/macro_jit_x64.h" #endif +MICROPROFILE_DEFINE(MacroHLE, "GPU", "Execute macro hle", MP_RGB(128, 192, 192)); + namespace Tegra { static void Dump(u64 hash, std::span code) { @@ -60,6 +63,7 @@ void MacroEngine::Execute(u32 method, const std::vector& parameters) { if (compiled_macro != macro_cache.end()) { const auto& cache_info = compiled_macro->second; if (cache_info.has_hle_program) { + MICROPROFILE_SCOPE(MacroHLE); cache_info.hle_program->Execute(parameters, method); } else { maxwell3d.RefreshParameters(); @@ -106,6 +110,7 @@ void MacroEngine::Execute(u32 method, const std::vector& parameters) { if (auto hle_program = hle_macros->GetHLEProgram(cache_info.hash)) { cache_info.has_hle_program = true; cache_info.hle_program = std::move(hle_program); + MICROPROFILE_SCOPE(MacroHLE); cache_info.hle_program->Execute(parameters, method); } else { maxwell3d.RefreshParameters(); diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp index 638247e55..3eac50975 100644 --- a/src/video_core/macro/macro_hle.cpp +++ b/src/video_core/macro/macro_hle.cpp @@ -86,7 +86,7 @@ public: void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { auto topology = static_cast(parameters[0]); - if (!IsTopologySafe(topology)) { + if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) { Fallback(parameters); return; } @@ -117,8 +117,8 @@ private: void Fallback(const std::vector& parameters) { SCOPE_EXIT({ if (extended) { - maxwell3d.CallMethod(0x8e3, 0x640, true); - maxwell3d.CallMethod(0x8e4, 0, true); + maxwell3d.engine_state = Maxwell::EngineHint::None; + maxwell3d.replace_table.clear(); } }); maxwell3d.RefreshParameters(); @@ -127,7 +127,8 @@ private: const u32 vertex_first = parameters[3]; const u32 vertex_count = parameters[1]; - if (maxwell3d.GetMaxCurrentVertices() < vertex_first + vertex_count) { + if (maxwell3d.AnyParametersDirty() && + maxwell3d.GetMaxCurrentVertices() < vertex_first + vertex_count) { ASSERT_MSG(false, "Faulty draw!"); return; } @@ -157,7 +158,7 @@ public: void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { auto topology = static_cast(parameters[0]); - if (!IsTopologySafe(topology)) { + if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) { Fallback(parameters); return; } @@ -169,7 +170,11 @@ public: } const u32 estimate = static_cast(maxwell3d.EstimateIndexBufferSize()); const u32 base_size = std::max(minimum_limit, estimate); - maxwell3d.regs.draw.topology.Assign(topology); + const u32 element_base = parameters[4]; + const u32 base_instance = parameters[5]; + maxwell3d.regs.vertex_id_base = element_base; + maxwell3d.regs.global_base_vertex_index = element_base; + maxwell3d.regs.global_base_instance_index = base_instance; maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex); @@ -186,6 +191,9 @@ public: maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, base_size); maxwell3d.engine_state = Maxwell::EngineHint::None; maxwell3d.replace_table.clear(); + maxwell3d.regs.vertex_id_base = 0x0; + maxwell3d.regs.global_base_vertex_index = 0x0; + maxwell3d.regs.global_base_instance_index = 0x0; } private: @@ -195,6 +203,8 @@ private: const u32 element_base = parameters[4]; const u32 base_instance = parameters[5]; maxwell3d.regs.vertex_id_base = element_base; + maxwell3d.regs.global_base_vertex_index = element_base; + maxwell3d.regs.global_base_instance_index = base_instance; maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex); @@ -205,6 +215,8 @@ private: parameters[3], parameters[1], element_base, base_instance, instance_count); maxwell3d.regs.vertex_id_base = 0x0; + maxwell3d.regs.global_base_vertex_index = 0x0; + maxwell3d.regs.global_base_instance_index = 0x0; maxwell3d.engine_state = Maxwell::EngineHint::None; maxwell3d.replace_table.clear(); } @@ -253,7 +265,6 @@ public: return; } - maxwell3d.regs.draw.topology.Assign(topology); const u32 padding = parameters[3]; // padding is in words // size of each indirect segment @@ -335,6 +346,83 @@ private: u32 minimum_limit{1 << 12}; }; +class HLE_C713C83D8F63CCF3 final : public HLEMacroImpl { +public: + explicit HLE_C713C83D8F63CCF3(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} + + void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { + maxwell3d.RefreshParameters(); + const u32 offset = (parameters[0] & 0x3FFFFFFF) << 2; + const u32 address = maxwell3d.regs.shadow_scratch[24]; + auto& const_buffer = maxwell3d.regs.const_buffer; + const_buffer.size = 0x7000; + const_buffer.address_high = (address >> 24) & 0xFF; + const_buffer.address_low = address << 8; + const_buffer.offset = offset; + } +}; + +class HLE_D7333D26E0A93EDE final : public HLEMacroImpl { +public: + explicit HLE_D7333D26E0A93EDE(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} + + void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { + maxwell3d.RefreshParameters(); + const size_t index = parameters[0]; + const u32 address = maxwell3d.regs.shadow_scratch[42 + index]; + const u32 size = maxwell3d.regs.shadow_scratch[47 + index]; + auto& const_buffer = maxwell3d.regs.const_buffer; + const_buffer.size = size; + const_buffer.address_high = (address >> 24) & 0xFF; + const_buffer.address_low = address << 8; + } +}; + +class HLE_BindShader final : public HLEMacroImpl { +public: + explicit HLE_BindShader(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} + + void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { + maxwell3d.RefreshParameters(); + auto& regs = maxwell3d.regs; + const u32 index = parameters[0]; + if ((parameters[1] - regs.shadow_scratch[28 + index]) == 0) { + return; + } + + regs.pipelines[index & 0xF].offset = parameters[2]; + maxwell3d.dirty.flags[VideoCommon::Dirty::Shaders] = true; + regs.shadow_scratch[28 + index] = parameters[1]; + regs.shadow_scratch[34 + index] = parameters[2]; + + const u32 address = parameters[4]; + auto& const_buffer = regs.const_buffer; + const_buffer.size = 0x10000; + const_buffer.address_high = (address >> 24) & 0xFF; + const_buffer.address_low = address << 8; + + const size_t bind_group_id = parameters[3] & 0x7F; + auto& bind_group = regs.bind_groups[bind_group_id]; + bind_group.raw_config = 0x11; + maxwell3d.ProcessCBBind(bind_group_id); + } +}; + +class HLE_SetRasterBoundingBox final : public HLEMacroImpl { +public: + explicit HLE_SetRasterBoundingBox(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} + + void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { + maxwell3d.RefreshParameters(); + const u32 raster_mode = parameters[0]; + auto& regs = maxwell3d.regs; + const u32 raster_enabled = maxwell3d.regs.conservative_raster_enable; + const u32 scratch_data = maxwell3d.regs.shadow_scratch[52]; + regs.raster_bounding_box.raw = raster_mode & 0xFFFFF00F; + regs.raster_bounding_box.pad.Assign(scratch_data & raster_enabled); + } +}; + } // Anonymous namespace HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} { @@ -368,6 +456,26 @@ HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} { [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr { return std::make_unique(maxwell3d); })); + builders.emplace(0xC713C83D8F63CCF3ULL, + std::function(Engines::Maxwell3D&)>( + [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr { + return std::make_unique(maxwell3d); + })); + builders.emplace(0xD7333D26E0A93EDEULL, + std::function(Engines::Maxwell3D&)>( + [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr { + return std::make_unique(maxwell3d); + })); + builders.emplace(0xEB29B2A09AA06D38ULL, + std::function(Engines::Maxwell3D&)>( + [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr { + return std::make_unique(maxwell3d); + })); + builders.emplace(0xDB1341DBEB4C8AF7ULL, + std::function(Engines::Maxwell3D&)>( + [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr { + return std::make_unique(maxwell3d); + })); } HLEMacro::~HLEMacro() = default; -- cgit v1.2.3 From 3630bfaef332768e08ecc0c34cd4bca83a2579f8 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 20 Nov 2022 03:07:14 +0100 Subject: RasterizerMemory: Add filtering for flushing/invalidation operations. --- src/video_core/CMakeLists.txt | 1 + src/video_core/buffer_cache/buffer_cache.h | 2 +- src/video_core/cache_types.h | 24 +++++++++ src/video_core/memory_manager.cpp | 60 +++++++++++++---------- src/video_core/memory_manager.h | 25 +++++++--- src/video_core/rasterizer_interface.h | 13 +++-- src/video_core/renderer_null/null_rasterizer.cpp | 8 +-- src/video_core/renderer_null/null_rasterizer.h | 12 +++-- src/video_core/renderer_opengl/gl_rasterizer.cpp | 51 ++++++++++++------- src/video_core/renderer_opengl/gl_rasterizer.h | 13 +++-- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 52 +++++++++++++------- src/video_core/renderer_vulkan/vk_rasterizer.h | 13 +++-- src/video_core/texture_cache/texture_cache.h | 3 +- src/video_core/texture_cache/texture_cache_base.h | 2 +- 14 files changed, 186 insertions(+), 93 deletions(-) create mode 100644 src/video_core/cache_types.h (limited to 'src/video_core/buffer_cache') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index fd71bf186..aa271a377 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -13,6 +13,7 @@ add_library(video_core STATIC buffer_cache/buffer_base.h buffer_cache/buffer_cache.cpp buffer_cache/buffer_cache.h + cache_types.h cdma_pusher.cpp cdma_pusher.h compatible_formats.cpp diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index f86edaa3e..bdc0681b7 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -208,7 +208,7 @@ public: [[nodiscard]] std::pair GetDrawIndirectBuffer(); - std::mutex mutex; + std::recursive_mutex mutex; Runtime& runtime; private: diff --git a/src/video_core/cache_types.h b/src/video_core/cache_types.h new file mode 100644 index 000000000..1a5db3c55 --- /dev/null +++ b/src/video_core/cache_types.h @@ -0,0 +1,24 @@ +// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/common_funcs.h" +#include "common/common_types.h" + +namespace VideoCommon { + +enum class CacheType : u32 { + None = 0, + TextureCache = 1 << 0, + QueryCache = 1 << 1, + BufferCache = 1 << 2, + ShaderCache = 1 << 3, + NoTextureCache = QueryCache | BufferCache | ShaderCache, + NoBufferCache = TextureCache | QueryCache | ShaderCache, + NoQueryCache = TextureCache | BufferCache | ShaderCache, + All = TextureCache | QueryCache | BufferCache | ShaderCache, +}; +DECLARE_ENUM_FLAG_OPERATORS(CacheType) + +} // namespace VideoCommon diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 4fcae9909..3a5cdeb39 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -356,8 +356,8 @@ inline void MemoryManager::MemoryOperation(GPUVAddr gpu_src_addr, std::size_t si } template -void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, - std::size_t size) const { +void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, + [[maybe_unused]] VideoCommon::CacheType which) const { auto set_to_zero = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, std::size_t copy_amount) { std::memset(dest_buffer, 0, copy_amount); @@ -367,7 +367,7 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, const VAddr cpu_addr_base = (static_cast(page_table[page_index]) << cpu_page_bits) + offset; if constexpr (is_safe) { - rasterizer->FlushRegion(cpu_addr_base, copy_amount); + rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); } u8* physical = memory.GetPointer(cpu_addr_base); std::memcpy(dest_buffer, physical, copy_amount); @@ -377,7 +377,7 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, const VAddr cpu_addr_base = (static_cast(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; if constexpr (is_safe) { - rasterizer->FlushRegion(cpu_addr_base, copy_amount); + rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); } if (!IsBigPageContinous(page_index)) [[unlikely]] { memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount); @@ -395,18 +395,19 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, MemoryOperation(gpu_src_addr, size, mapped_big, set_to_zero, read_short_pages); } -void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const { - ReadBlockImpl(gpu_src_addr, dest_buffer, size); +void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, + VideoCommon::CacheType which) const { + ReadBlockImpl(gpu_src_addr, dest_buffer, size, which); } void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, const std::size_t size) const { - ReadBlockImpl(gpu_src_addr, dest_buffer, size); + ReadBlockImpl(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None); } template -void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, - std::size_t size) { +void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size, + [[maybe_unused]] VideoCommon::CacheType which) { auto just_advance = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, std::size_t copy_amount) { src_buffer = static_cast(src_buffer) + copy_amount; @@ -415,7 +416,7 @@ void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffe const VAddr cpu_addr_base = (static_cast(page_table[page_index]) << cpu_page_bits) + offset; if constexpr (is_safe) { - rasterizer->InvalidateRegion(cpu_addr_base, copy_amount); + rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which); } u8* physical = memory.GetPointer(cpu_addr_base); std::memcpy(physical, src_buffer, copy_amount); @@ -425,7 +426,7 @@ void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffe const VAddr cpu_addr_base = (static_cast(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; if constexpr (is_safe) { - rasterizer->InvalidateRegion(cpu_addr_base, copy_amount); + rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which); } if (!IsBigPageContinous(page_index)) [[unlikely]] { memory.WriteBlockUnsafe(cpu_addr_base, src_buffer, copy_amount); @@ -443,16 +444,18 @@ void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffe MemoryOperation(gpu_dest_addr, size, mapped_big, just_advance, write_short_pages); } -void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size) { - WriteBlockImpl(gpu_dest_addr, src_buffer, size); +void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size, + VideoCommon::CacheType which) { + WriteBlockImpl(gpu_dest_addr, src_buffer, size, which); } void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size) { - WriteBlockImpl(gpu_dest_addr, src_buffer, size); + WriteBlockImpl(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None); } -void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size) const { +void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size, + VideoCommon::CacheType which) const { auto do_nothing = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, [[maybe_unused]] std::size_t copy_amount) {}; @@ -460,12 +463,12 @@ void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size) const { auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { const VAddr cpu_addr_base = (static_cast(page_table[page_index]) << cpu_page_bits) + offset; - rasterizer->FlushRegion(cpu_addr_base, copy_amount); + rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); }; auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { const VAddr cpu_addr_base = (static_cast(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; - rasterizer->FlushRegion(cpu_addr_base, copy_amount); + rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); }; auto flush_short_pages = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { @@ -475,7 +478,8 @@ void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size) const { MemoryOperation(gpu_addr, size, mapped_big, do_nothing, flush_short_pages); } -bool MemoryManager::IsMemoryDirty(GPUVAddr gpu_addr, size_t size) const { +bool MemoryManager::IsMemoryDirty(GPUVAddr gpu_addr, size_t size, + VideoCommon::CacheType which) const { bool result = false; auto do_nothing = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, @@ -484,13 +488,13 @@ bool MemoryManager::IsMemoryDirty(GPUVAddr gpu_addr, size_t size) const { auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { const VAddr cpu_addr_base = (static_cast(page_table[page_index]) << cpu_page_bits) + offset; - result |= rasterizer->MustFlushRegion(cpu_addr_base, copy_amount); + result |= rasterizer->MustFlushRegion(cpu_addr_base, copy_amount, which); return result; }; auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { const VAddr cpu_addr_base = (static_cast(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; - result |= rasterizer->MustFlushRegion(cpu_addr_base, copy_amount); + result |= rasterizer->MustFlushRegion(cpu_addr_base, copy_amount, which); return result; }; auto check_short_pages = [&](std::size_t page_index, std::size_t offset, @@ -547,7 +551,8 @@ size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr, size_t max_size) co return kind_map.GetContinousSizeFrom(gpu_addr); } -void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size) const { +void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size, + VideoCommon::CacheType which) const { auto do_nothing = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, [[maybe_unused]] std::size_t copy_amount) {}; @@ -555,12 +560,12 @@ void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size) const { auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { const VAddr cpu_addr_base = (static_cast(page_table[page_index]) << cpu_page_bits) + offset; - rasterizer->InvalidateRegion(cpu_addr_base, copy_amount); + rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which); }; auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { const VAddr cpu_addr_base = (static_cast(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; - rasterizer->InvalidateRegion(cpu_addr_base, copy_amount); + rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which); }; auto invalidate_short_pages = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { @@ -570,14 +575,15 @@ void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size) const { MemoryOperation(gpu_addr, size, mapped_big, do_nothing, invalidate_short_pages); } -void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size) { +void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size, + VideoCommon::CacheType which) { std::vector tmp_buffer(size); - ReadBlock(gpu_src_addr, tmp_buffer.data(), size); + ReadBlock(gpu_src_addr, tmp_buffer.data(), size, which); // The output block must be flushed in case it has data modified from the GPU. // Fixes NPC geometry in Zombie Panic in Wonderland DX - FlushRegion(gpu_dest_addr, size); - WriteBlock(gpu_dest_addr, tmp_buffer.data(), size); + FlushRegion(gpu_dest_addr, size, which); + WriteBlock(gpu_dest_addr, tmp_buffer.data(), size, which); } bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const { diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 50043a8ae..828e13439 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -12,6 +12,7 @@ #include "common/multi_level_page_table.h" #include "common/range_map.h" #include "common/virtual_buffer.h" +#include "video_core/cache_types.h" #include "video_core/pte_kind.h" namespace VideoCore { @@ -60,9 +61,12 @@ public: * in the Host Memory counterpart. Note: This functions cause Host GPU Memory * Flushes and Invalidations, respectively to each operation. */ - void ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const; - void WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size); - void CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size); + void ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, + VideoCommon::CacheType which = VideoCommon::CacheType::All) const; + void WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size, + VideoCommon::CacheType which = VideoCommon::CacheType::All); + void CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size, + VideoCommon::CacheType which = VideoCommon::CacheType::All); /** * ReadBlockUnsafe and WriteBlockUnsafe are special versions of ReadBlock and @@ -105,11 +109,14 @@ public: GPUVAddr MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages = true); void Unmap(GPUVAddr gpu_addr, std::size_t size); - void FlushRegion(GPUVAddr gpu_addr, size_t size) const; + void FlushRegion(GPUVAddr gpu_addr, size_t size, + VideoCommon::CacheType which = VideoCommon::CacheType::All) const; - void InvalidateRegion(GPUVAddr gpu_addr, size_t size) const; + void InvalidateRegion(GPUVAddr gpu_addr, size_t size, + VideoCommon::CacheType which = VideoCommon::CacheType::All) const; - bool IsMemoryDirty(GPUVAddr gpu_addr, size_t size) const; + bool IsMemoryDirty(GPUVAddr gpu_addr, size_t size, + VideoCommon::CacheType which = VideoCommon::CacheType::All) const; size_t MaxContinousRange(GPUVAddr gpu_addr, size_t size) const; @@ -128,10 +135,12 @@ private: FuncReserved&& func_reserved, FuncUnmapped&& func_unmapped) const; template - void ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const; + void ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, + VideoCommon::CacheType which) const; template - void WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size); + void WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size, + VideoCommon::CacheType which); template [[nodiscard]] std::size_t PageEntryIndex(GPUVAddr gpu_addr) const { diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 641b95c7c..6d8d2b666 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -8,6 +8,7 @@ #include #include "common/common_types.h" #include "common/polyfill_thread.h" +#include "video_core/cache_types.h" #include "video_core/engines/fermi_2d.h" #include "video_core/gpu.h" @@ -83,13 +84,16 @@ public: virtual void FlushAll() = 0; /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory - virtual void FlushRegion(VAddr addr, u64 size) = 0; + virtual void FlushRegion(VAddr addr, u64 size, + VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; /// Check if the the specified memory area requires flushing to CPU Memory. - virtual bool MustFlushRegion(VAddr addr, u64 size) = 0; + virtual bool MustFlushRegion(VAddr addr, u64 size, + VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; /// Notify rasterizer that any caches of the specified region should be invalidated - virtual void InvalidateRegion(VAddr addr, u64 size) = 0; + virtual void InvalidateRegion(VAddr addr, u64 size, + VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; /// Notify rasterizer that any caches of the specified region are desync with guest virtual void OnCPUWrite(VAddr addr, u64 size) = 0; @@ -105,7 +109,8 @@ public: /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory /// and invalidated - virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; + virtual void FlushAndInvalidateRegion( + VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; /// Notify the host renderer to wait for previous primitive and compute operations. virtual void WaitForIdle() = 0; diff --git a/src/video_core/renderer_null/null_rasterizer.cpp b/src/video_core/renderer_null/null_rasterizer.cpp index 9734d84bc..2c11345d7 100644 --- a/src/video_core/renderer_null/null_rasterizer.cpp +++ b/src/video_core/renderer_null/null_rasterizer.cpp @@ -39,11 +39,11 @@ void RasterizerNull::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr u32 size) {} void RasterizerNull::DisableGraphicsUniformBuffer(size_t stage, u32 index) {} void RasterizerNull::FlushAll() {} -void RasterizerNull::FlushRegion(VAddr addr, u64 size) {} -bool RasterizerNull::MustFlushRegion(VAddr addr, u64 size) { +void RasterizerNull::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType) {} +bool RasterizerNull::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheType) { return false; } -void RasterizerNull::InvalidateRegion(VAddr addr, u64 size) {} +void RasterizerNull::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType) {} void RasterizerNull::OnCPUWrite(VAddr addr, u64 size) {} void RasterizerNull::InvalidateGPUCache() {} void RasterizerNull::UnmapMemory(VAddr addr, u64 size) {} @@ -61,7 +61,7 @@ void RasterizerNull::SignalSyncPoint(u32 value) { } void RasterizerNull::SignalReference() {} void RasterizerNull::ReleaseFences() {} -void RasterizerNull::FlushAndInvalidateRegion(VAddr addr, u64 size) {} +void RasterizerNull::FlushAndInvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType) {} void RasterizerNull::WaitForIdle() {} void RasterizerNull::FragmentBarrier() {} void RasterizerNull::TiledCacheBarrier() {} diff --git a/src/video_core/renderer_null/null_rasterizer.h b/src/video_core/renderer_null/null_rasterizer.h index ecf77ba42..2112aa70e 100644 --- a/src/video_core/renderer_null/null_rasterizer.h +++ b/src/video_core/renderer_null/null_rasterizer.h @@ -38,9 +38,12 @@ public: void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; void FlushAll() override; - void FlushRegion(VAddr addr, u64 size) override; - bool MustFlushRegion(VAddr addr, u64 size) override; - void InvalidateRegion(VAddr addr, u64 size) override; + void FlushRegion(VAddr addr, u64 size, + VideoCommon::CacheType which = VideoCommon::CacheType::All) override; + bool MustFlushRegion(VAddr addr, u64 size, + VideoCommon::CacheType which = VideoCommon::CacheType::All) override; + void InvalidateRegion(VAddr addr, u64 size, + VideoCommon::CacheType which = VideoCommon::CacheType::All) override; void OnCPUWrite(VAddr addr, u64 size) override; void InvalidateGPUCache() override; void UnmapMemory(VAddr addr, u64 size) override; @@ -50,7 +53,8 @@ public: void SignalSyncPoint(u32 value) override; void SignalReference() override; void ReleaseFences() override; - void FlushAndInvalidateRegion(VAddr addr, u64 size) override; + void FlushAndInvalidateRegion( + VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; void WaitForIdle() override; void FragmentBarrier() override; void TiledCacheBarrier() override; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 0807d0b88..d58dcedea 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -352,46 +352,60 @@ void RasterizerOpenGL::DisableGraphicsUniformBuffer(size_t stage, u32 index) { void RasterizerOpenGL::FlushAll() {} -void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { +void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); if (addr == 0 || size == 0) { return; } - { + if (bool(which & VideoCommon::CacheType::TextureCache)) { std::scoped_lock lock{texture_cache.mutex}; texture_cache.DownloadMemory(addr, size); } - { + if ((bool(which & VideoCommon::CacheType::BufferCache))) { std::scoped_lock lock{buffer_cache.mutex}; buffer_cache.DownloadMemory(addr, size); } - query_cache.FlushRegion(addr, size); + if ((bool(which & VideoCommon::CacheType::QueryCache))) { + query_cache.FlushRegion(addr, size); + } } -bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) { - std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; +bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { + if ((bool(which & VideoCommon::CacheType::BufferCache))) { + std::scoped_lock lock{buffer_cache.mutex}; + if (buffer_cache.IsRegionGpuModified(addr, size)) { + return true; + } + } if (!Settings::IsGPULevelHigh()) { - return buffer_cache.IsRegionGpuModified(addr, size); + return false; + } + if (bool(which & VideoCommon::CacheType::TextureCache)) { + std::scoped_lock lock{texture_cache.mutex}; + return texture_cache.IsRegionGpuModified(addr, size); } - return texture_cache.IsRegionGpuModified(addr, size) || - buffer_cache.IsRegionGpuModified(addr, size); + return false; } -void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { +void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); if (addr == 0 || size == 0) { return; } - { + if (bool(which & VideoCommon::CacheType::TextureCache)) { std::scoped_lock lock{texture_cache.mutex}; texture_cache.WriteMemory(addr, size); } - { + if (bool(which & VideoCommon::CacheType::BufferCache)) { std::scoped_lock lock{buffer_cache.mutex}; buffer_cache.WriteMemory(addr, size); } - shader_cache.InvalidateRegion(addr, size); - query_cache.InvalidateRegion(addr, size); + if (bool(which & VideoCommon::CacheType::ShaderCache)) { + shader_cache.InvalidateRegion(addr, size); + } + if (bool(which & VideoCommon::CacheType::QueryCache)) { + query_cache.InvalidateRegion(addr, size); + } } void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { @@ -458,11 +472,12 @@ void RasterizerOpenGL::ReleaseFences() { fence_manager.WaitPendingFences(); } -void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { +void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size, + VideoCommon::CacheType which) { if (Settings::IsGPULevelExtreme()) { - FlushRegion(addr, size); + FlushRegion(addr, size, which); } - InvalidateRegion(addr, size); + InvalidateRegion(addr, size, which); } void RasterizerOpenGL::WaitForIdle() { @@ -531,7 +546,7 @@ void RasterizerOpenGL::AccelerateInlineToMemory(GPUVAddr address, size_t copy_si } gpu_memory->WriteBlockUnsafe(address, memory.data(), copy_size); { - std::unique_lock lock{buffer_cache.mutex}; + std::unique_lock lock{buffer_cache.mutex}; if (!buffer_cache.InlineMemory(*cpu_addr, copy_size, memory)) { buffer_cache.WriteMemory(*cpu_addr, copy_size); } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index efd19f880..94e65d64b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -77,9 +77,12 @@ public: void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; void FlushAll() override; - void FlushRegion(VAddr addr, u64 size) override; - bool MustFlushRegion(VAddr addr, u64 size) override; - void InvalidateRegion(VAddr addr, u64 size) override; + void FlushRegion(VAddr addr, u64 size, + VideoCommon::CacheType which = VideoCommon::CacheType::All) override; + bool MustFlushRegion(VAddr addr, u64 size, + VideoCommon::CacheType which = VideoCommon::CacheType::All) override; + void InvalidateRegion(VAddr addr, u64 size, + VideoCommon::CacheType which = VideoCommon::CacheType::All) override; void OnCPUWrite(VAddr addr, u64 size) override; void InvalidateGPUCache() override; void UnmapMemory(VAddr addr, u64 size) override; @@ -89,7 +92,9 @@ public: void SignalSyncPoint(u32 value) override; void SignalReference() override; void ReleaseFences() override; - void FlushAndInvalidateRegion(VAddr addr, u64 size) override; + void FlushAndInvalidateRegion( + VAddr addr, u64 size, + VideoCommon::CacheType which = VideoCommon::CacheType::All) override; void WaitForIdle() override; void FragmentBarrier() override; void TiledCacheBarrier() override; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 143af93c5..463c49f9c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -423,41 +423,58 @@ void Vulkan::RasterizerVulkan::DisableGraphicsUniformBuffer(size_t stage, u32 in void RasterizerVulkan::FlushAll() {} -void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) { +void RasterizerVulkan::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { if (addr == 0 || size == 0) { return; } - { + if (bool(which & VideoCommon::CacheType::TextureCache)) { std::scoped_lock lock{texture_cache.mutex}; texture_cache.DownloadMemory(addr, size); } - { + if ((bool(which & VideoCommon::CacheType::BufferCache))) { std::scoped_lock lock{buffer_cache.mutex}; buffer_cache.DownloadMemory(addr, size); } - query_cache.FlushRegion(addr, size); + if ((bool(which & VideoCommon::CacheType::QueryCache))) { + query_cache.FlushRegion(addr, size); + } } -bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) { - std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex}; - return texture_cache.IsRegionGpuModified(addr, size) || - buffer_cache.IsRegionGpuModified(addr, size); +bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { + if ((bool(which & VideoCommon::CacheType::BufferCache))) { + std::scoped_lock lock{buffer_cache.mutex}; + if (buffer_cache.IsRegionGpuModified(addr, size)) { + return true; + } + } + if (!Settings::IsGPULevelHigh()) { + return false; + } + if (bool(which & VideoCommon::CacheType::TextureCache)) { + std::scoped_lock lock{texture_cache.mutex}; + return texture_cache.IsRegionGpuModified(addr, size); + } + return false; } -void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { +void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { if (addr == 0 || size == 0) { return; } - { + if (bool(which & VideoCommon::CacheType::TextureCache)) { std::scoped_lock lock{texture_cache.mutex}; texture_cache.WriteMemory(addr, size); } - { + if ((bool(which & VideoCommon::CacheType::BufferCache))) { std::scoped_lock lock{buffer_cache.mutex}; buffer_cache.WriteMemory(addr, size); } - pipeline_cache.InvalidateRegion(addr, size); - query_cache.InvalidateRegion(addr, size); + if ((bool(which & VideoCommon::CacheType::QueryCache))) { + query_cache.InvalidateRegion(addr, size); + } + if ((bool(which & VideoCommon::CacheType::ShaderCache))) { + pipeline_cache.InvalidateRegion(addr, size); + } } void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { @@ -522,11 +539,12 @@ void RasterizerVulkan::ReleaseFences() { fence_manager.WaitPendingFences(); } -void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) { +void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size, + VideoCommon::CacheType which) { if (Settings::IsGPULevelExtreme()) { - FlushRegion(addr, size); + FlushRegion(addr, size, which); } - InvalidateRegion(addr, size); + InvalidateRegion(addr, size, which); } void RasterizerVulkan::WaitForIdle() { @@ -602,7 +620,7 @@ void RasterizerVulkan::AccelerateInlineToMemory(GPUVAddr address, size_t copy_si } gpu_memory->WriteBlockUnsafe(address, memory.data(), copy_size); { - std::unique_lock lock{buffer_cache.mutex}; + std::unique_lock lock{buffer_cache.mutex}; if (!buffer_cache.InlineMemory(*cpu_addr, copy_size, memory)) { buffer_cache.WriteMemory(*cpu_addr, copy_size); } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 839de6b26..82b28a54a 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -73,9 +73,12 @@ public: void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; void FlushAll() override; - void FlushRegion(VAddr addr, u64 size) override; - bool MustFlushRegion(VAddr addr, u64 size) override; - void InvalidateRegion(VAddr addr, u64 size) override; + void FlushRegion(VAddr addr, u64 size, + VideoCommon::CacheType which = VideoCommon::CacheType::All) override; + bool MustFlushRegion(VAddr addr, u64 size, + VideoCommon::CacheType which = VideoCommon::CacheType::All) override; + void InvalidateRegion(VAddr addr, u64 size, + VideoCommon::CacheType which = VideoCommon::CacheType::All) override; void OnCPUWrite(VAddr addr, u64 size) override; void InvalidateGPUCache() override; void UnmapMemory(VAddr addr, u64 size) override; @@ -85,7 +88,9 @@ public: void SignalSyncPoint(u32 value) override; void SignalReference() override; void ReleaseFences() override; - void FlushAndInvalidateRegion(VAddr addr, u64 size) override; + void FlushAndInvalidateRegion( + VAddr addr, u64 size, + VideoCommon::CacheType which = VideoCommon::CacheType::All) override; void WaitForIdle() override; void FragmentBarrier() override; void TiledCacheBarrier() override; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 27c82cd20..7fe451b5a 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -740,7 +740,8 @@ void TextureCache

::UploadImageContents(Image& image, StagingBuffer& staging) const GPUVAddr gpu_addr = image.gpu_addr; if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { - gpu_memory->ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); + gpu_memory->ReadBlock(gpu_addr, mapped_span.data(), mapped_span.size_bytes(), + VideoCommon::CacheType::NoTextureCache); const auto uploads = FullUploadSwizzles(image.info); runtime.AccelerateImageUpload(image, staging, uploads); return; diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 4fd677a80..6b2898705 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -203,7 +203,7 @@ public: /// Create channel state. void CreateChannel(Tegra::Control::ChannelState& channel) final override; - std::mutex mutex; + std::recursive_mutex mutex; private: /// Iterate over all page indices in a range -- cgit v1.2.3 From d09aa0182f18d1ac338ab47009b42fdeb67497a8 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 24 Dec 2022 19:19:41 -0500 Subject: MacroHLE: Final cleanup and fixes. --- src/common/range_map.h | 8 +- src/shader_recompiler/environment.h | 4 +- src/video_core/buffer_cache/buffer_cache.h | 3 +- src/video_core/engines/draw_manager.cpp | 3 +- src/video_core/engines/maxwell_3d.h | 2 +- src/video_core/macro/macro_hle.cpp | 98 +++++++--------------- src/video_core/renderer_opengl/gl_rasterizer.h | 3 +- .../renderer_vulkan/fixed_pipeline_state.h | 3 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 11 ++- src/video_core/renderer_vulkan/vk_rasterizer.h | 3 +- .../renderer_vulkan/vk_staging_buffer_pool.h | 5 +- src/video_core/vulkan_common/vulkan_device.cpp | 24 ++++-- src/video_core/vulkan_common/vulkan_device.h | 52 ++++++------ src/video_core/vulkan_common/vulkan_wrapper.h | 3 +- 14 files changed, 94 insertions(+), 128 deletions(-) (limited to 'src/video_core/buffer_cache') diff --git a/src/common/range_map.h b/src/common/range_map.h index 993e21643..051e713a7 100644 --- a/src/common/range_map.h +++ b/src/common/range_map.h @@ -13,8 +13,8 @@ namespace Common { template class RangeMap { private: - using KeyT = std::conditional_t, typename KeyTBase, - std::make_signed_t>; + using KeyT = + std::conditional_t, KeyTBase, std::make_signed_t>; public: explicit RangeMap(ValueT null_value_) : null_value{null_value_} { @@ -56,8 +56,8 @@ public: private: using MapType = std::map; - using IteratorType = MapType::iterator; - using ConstIteratorType = MapType::const_iterator; + using IteratorType = typename MapType::iterator; + using ConstIteratorType = typename MapType::const_iterator; size_t ContinousSizeInternal(KeyT address) const { const auto it = GetFirstElemnentBeforeOrOn(address); diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index b9b4455f6..8fc359126 100644 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h @@ -36,8 +36,8 @@ public: [[nodiscard]] virtual bool HasHLEMacroState() const = 0; - [[nodiscard]] virtual std::optional GetReplaceConstBuffer( - u32 bank, u32 offset) = 0; + [[nodiscard]] virtual std::optional GetReplaceConstBuffer(u32 bank, + u32 offset) = 0; virtual void Dump(u64 hash) = 0; diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index bdc0681b7..06fd40851 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -200,7 +200,8 @@ public: /// Return true when a CPU region is modified from the CPU [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); - void SetDrawIndirect(const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect_) { + void SetDrawIndirect( + const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect_) { current_draw_indirect = current_draw_indirect_; } diff --git a/src/video_core/engines/draw_manager.cpp b/src/video_core/engines/draw_manager.cpp index 183d5403c..feea89c0e 100644 --- a/src/video_core/engines/draw_manager.cpp +++ b/src/video_core/engines/draw_manager.cpp @@ -97,7 +97,8 @@ void DrawManager::DrawArrayIndirect(PrimitiveTopology topology) { ProcessDrawIndirect(true); } -void DrawManager::DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first, u32 index_count) { +void DrawManager::DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first, + u32 index_count) { const auto& regs{maxwell3d->regs}; draw_state.topology = topology; draw_state.index_buffer = regs.index_buffer; diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 478ba4dc7..dbefcd715 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -3086,7 +3086,7 @@ public: std::unique_ptr draw_manager; friend class DrawManager; - + std::vector inline_index_draw_indexes; GPUVAddr getMacroAddress(size_t index) const { diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp index 294a338d2..3481fcd41 100644 --- a/src/video_core/macro/macro_hle.cpp +++ b/src/video_core/macro/macro_hle.cpp @@ -47,21 +47,7 @@ public: explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {} protected: - void advanceCheck() { - current_value = (current_value + 1) % fibonacci_post; - check_limit = current_value == 0; - if (check_limit) { - const u32 new_fibonacci = fibonacci_pre + fibonacci_post; - fibonacci_pre = fibonacci_post; - fibonacci_post = new_fibonacci; - } - } - Engines::Maxwell3D& maxwell3d; - u32 fibonacci_pre{89}; - u32 fibonacci_post{144}; - u32 current_value{fibonacci_post - 1}; - bool check_limit{}; }; class HLE_771BB18C62444DA0 final : public HLEMacroImpl { @@ -124,12 +110,13 @@ private: maxwell3d.RefreshParameters(); const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); + auto topology = static_cast(parameters[0]); const u32 vertex_first = parameters[3]; const u32 vertex_count = parameters[1]; - - if (maxwell3d.AnyParametersDirty() && - maxwell3d.GetMaxCurrentVertices() < vertex_first + vertex_count) { + if (!IsTopologySafe(topology) && + static_cast(maxwell3d.GetMaxCurrentVertices()) < + static_cast(vertex_first) + static_cast(vertex_count)) { ASSERT_MSG(false, "Faulty draw!"); return; } @@ -141,9 +128,8 @@ private: maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseInstance); } - maxwell3d.draw_manager->DrawArray( - static_cast(parameters[0]), - vertex_first, vertex_count, base_instance, instance_count); + maxwell3d.draw_manager->DrawArray(topology, vertex_first, vertex_count, base_instance, + instance_count); if (extended) { maxwell3d.regs.global_base_instance_index = 0; @@ -166,13 +152,7 @@ public: return; } - advanceCheck(); - if (check_limit) { - maxwell3d.RefreshParameters(); - minimum_limit = std::max(parameters[3], minimum_limit); - } const u32 estimate = static_cast(maxwell3d.EstimateIndexBufferSize()); - const u32 base_size = std::max(minimum_limit, estimate); const u32 element_base = parameters[4]; const u32 base_instance = parameters[5]; maxwell3d.regs.vertex_id_base = element_base; @@ -191,7 +171,7 @@ public: params.max_draw_counts = 1; params.stride = 0; maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; - maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, base_size); + maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, estimate); maxwell3d.engine_state = Maxwell::EngineHint::None; maxwell3d.replace_table.clear(); maxwell3d.regs.vertex_id_base = 0x0; @@ -223,8 +203,6 @@ private: maxwell3d.engine_state = Maxwell::EngineHint::None; maxwell3d.replace_table.clear(); } - - u32 minimum_limit{1 << 18}; }; class HLE_MultiLayerClear final : public HLEMacroImpl { @@ -257,10 +235,6 @@ public: return; } - advanceCheck(); - if (check_limit) { - maxwell3d.RefreshParameters(); - } const u32 start_indirect = parameters[0]; const u32 end_indirect = parameters[1]; if (start_indirect >= end_indirect) { @@ -274,20 +248,7 @@ public: const u32 indirect_words = 5 + padding; const u32 stride = indirect_words * sizeof(u32); const std::size_t draw_count = end_indirect - start_indirect; - u32 lowest_first = std::numeric_limits::max(); - u32 highest_limit = std::numeric_limits::min(); - for (std::size_t index = 0; index < draw_count; index++) { - const std::size_t base = index * indirect_words + 5; - const u32 count = parameters[base]; - const u32 first_index = parameters[base + 2]; - lowest_first = std::min(lowest_first, first_index); - highest_limit = std::max(highest_limit, first_index + count); - } - if (check_limit) { - minimum_limit = std::max(highest_limit, minimum_limit); - } const u32 estimate = static_cast(maxwell3d.EstimateIndexBufferSize()); - const u32 base_size = std::max(minimum_limit, estimate); maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; auto& params = maxwell3d.draw_manager->GetIndirectParams(); params.is_indexed = true; @@ -301,7 +262,7 @@ public: maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex); maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance); - maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, base_size); + maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, estimate); maxwell3d.engine_state = Maxwell::EngineHint::None; maxwell3d.replace_table.clear(); } @@ -323,7 +284,6 @@ private: return; } const auto topology = static_cast(parameters[2]); - maxwell3d.regs.draw.topology.Assign(topology); const u32 padding = parameters[3]; const std::size_t max_draws = parameters[4]; @@ -345,8 +305,6 @@ private: base_vertex, base_instance, parameters[base + 1]); } } - - u32 minimum_limit{1 << 12}; }; class HLE_C713C83D8F63CCF3 final : public HLEMacroImpl { @@ -431,53 +389,53 @@ public: HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} { builders.emplace(0x771BB18C62444DA0ULL, std::function(Engines::Maxwell3D&)>( - [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr { - return std::make_unique(maxwell3d); + [](Engines::Maxwell3D& maxwell3d__) -> std::unique_ptr { + return std::make_unique(maxwell3d__); })); builders.emplace(0x0D61FC9FAAC9FCADULL, std::function(Engines::Maxwell3D&)>( - [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr { - return std::make_unique(maxwell3d); + [](Engines::Maxwell3D& maxwell3d__) -> std::unique_ptr { + return std::make_unique(maxwell3d__); })); builders.emplace(0x8A4D173EB99A8603ULL, std::function(Engines::Maxwell3D&)>( - [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr { - return std::make_unique(maxwell3d, true); + [](Engines::Maxwell3D& maxwell3d__) -> std::unique_ptr { + return std::make_unique(maxwell3d__, true); })); builders.emplace(0x0217920100488FF7ULL, std::function(Engines::Maxwell3D&)>( - [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr { - return std::make_unique(maxwell3d); + [](Engines::Maxwell3D& maxwell3d__) -> std::unique_ptr { + return std::make_unique(maxwell3d__); })); builders.emplace(0x3F5E74B9C9A50164ULL, std::function(Engines::Maxwell3D&)>( - [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr { - return std::make_unique(maxwell3d); + [](Engines::Maxwell3D& maxwell3d__) -> std::unique_ptr { + return std::make_unique(maxwell3d__); })); builders.emplace(0xEAD26C3E2109B06BULL, std::function(Engines::Maxwell3D&)>( - [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr { - return std::make_unique(maxwell3d); + [](Engines::Maxwell3D& maxwell3d__) -> std::unique_ptr { + return std::make_unique(maxwell3d__); })); builders.emplace(0xC713C83D8F63CCF3ULL, std::function(Engines::Maxwell3D&)>( - [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr { - return std::make_unique(maxwell3d); + [](Engines::Maxwell3D& maxwell3d__) -> std::unique_ptr { + return std::make_unique(maxwell3d__); })); builders.emplace(0xD7333D26E0A93EDEULL, std::function(Engines::Maxwell3D&)>( - [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr { - return std::make_unique(maxwell3d); + [](Engines::Maxwell3D& maxwell3d__) -> std::unique_ptr { + return std::make_unique(maxwell3d__); })); builders.emplace(0xEB29B2A09AA06D38ULL, std::function(Engines::Maxwell3D&)>( - [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr { - return std::make_unique(maxwell3d); + [](Engines::Maxwell3D& maxwell3d__) -> std::unique_ptr { + return std::make_unique(maxwell3d__); })); builders.emplace(0xDB1341DBEB4C8AF7ULL, std::function(Engines::Maxwell3D&)>( - [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr { - return std::make_unique(maxwell3d); + [](Engines::Maxwell3D& maxwell3d__) -> std::unique_ptr { + return std::make_unique(maxwell3d__); })); } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index d119c2b66..be4f76c18 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -93,8 +93,7 @@ public: void SignalReference() override; void ReleaseFences() override; void FlushAndInvalidateRegion( - VAddr addr, u64 size, - VideoCommon::CacheType which = VideoCommon::CacheType::All) override; + VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; void WaitForIdle() override; void FragmentBarrier() override; void TiledCacheBarrier() override; diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index f47406347..98ea20b42 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -164,7 +164,8 @@ struct FixedPipelineState { }; void Refresh(const Maxwell& regs); - void Refresh2(const Maxwell& regs, Maxwell::PrimitiveTopology topology, bool base_feautures_supported); + void Refresh2(const Maxwell& regs, Maxwell::PrimitiveTopology topology, + bool base_feautures_supported); void Refresh3(const Maxwell& regs); Maxwell::ComparisonOp DepthTestFunc() const noexcept { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index c69ebe396..d11383bf1 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -227,7 +227,8 @@ struct DefaultSpec { ConfigureFuncPtr ConfigureFunc(const std::array& modules, const std::array& infos) { - return FindSpec(modules, infos); + return FindSpec(modules, infos); } } // Anonymous namespace @@ -505,11 +506,9 @@ void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling, if (bind_pipeline) { cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); } - if (is_rescaling) { - cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS, - RESCALING_LAYOUT_WORDS_OFFSET, sizeof(rescaling_data), - rescaling_data.data()); - } + cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS, + RESCALING_LAYOUT_WORDS_OFFSET, sizeof(rescaling_data), + rescaling_data.data()); if (update_rescaling) { const f32 config_down_factor{Settings::values.resolution_info.down_factor}; const f32 scale_down_factor{is_rescaling ? config_down_factor : 1.0f}; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index c06182807..c661e5b19 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -89,8 +89,7 @@ public: void SignalReference() override; void ReleaseFences() override; void FlushAndInvalidateRegion( - VAddr addr, u64 size, - VideoCommon::CacheType which = VideoCommon::CacheType::All) override; + VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; void WaitForIdle() override; void FragmentBarrier() override; void TiledCacheBarrier() override; diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h index 2906d92a4..4fd15f11a 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h @@ -1,6 +1,5 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. +// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later #pragma once diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 780f5dede..ea62edb13 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -587,11 +587,16 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR dynamic_state_3 = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_3_FEATURES_EXT, .pNext = nullptr, - .extendedDynamicState3DepthClampEnable = ext_extended_dynamic_state_3_enables ? VK_TRUE : VK_FALSE, - .extendedDynamicState3LogicOpEnable = ext_extended_dynamic_state_3_enables ? VK_TRUE : VK_FALSE, - .extendedDynamicState3ColorBlendEnable = ext_extended_dynamic_state_3_blend ? VK_TRUE : VK_FALSE, - .extendedDynamicState3ColorBlendEquation = ext_extended_dynamic_state_3_blend ? VK_TRUE : VK_FALSE, - .extendedDynamicState3ColorWriteMask = ext_extended_dynamic_state_3_blend ? VK_TRUE : VK_FALSE, + .extendedDynamicState3DepthClampEnable = + ext_extended_dynamic_state_3_enables ? VK_TRUE : VK_FALSE, + .extendedDynamicState3LogicOpEnable = + ext_extended_dynamic_state_3_enables ? VK_TRUE : VK_FALSE, + .extendedDynamicState3ColorBlendEnable = + ext_extended_dynamic_state_3_blend ? VK_TRUE : VK_FALSE, + .extendedDynamicState3ColorBlendEquation = + ext_extended_dynamic_state_3_blend ? VK_TRUE : VK_FALSE, + .extendedDynamicState3ColorWriteMask = + ext_extended_dynamic_state_3_blend ? VK_TRUE : VK_FALSE, }; SetNext(next, dynamic_state_3); } else { @@ -1342,14 +1347,17 @@ std::vector Device::LoadExtensions(bool requires_surface) { features.pNext = &extended_dynamic_state_3; physical.GetFeatures2(features); - ext_extended_dynamic_state_3_blend = extended_dynamic_state_3.extendedDynamicState3ColorBlendEnable && + ext_extended_dynamic_state_3_blend = + extended_dynamic_state_3.extendedDynamicState3ColorBlendEnable && extended_dynamic_state_3.extendedDynamicState3ColorBlendEquation && extended_dynamic_state_3.extendedDynamicState3ColorWriteMask; - ext_extended_dynamic_state_3_enables = extended_dynamic_state_3.extendedDynamicState3DepthClampEnable && + ext_extended_dynamic_state_3_enables = + extended_dynamic_state_3.extendedDynamicState3DepthClampEnable && extended_dynamic_state_3.extendedDynamicState3LogicOpEnable; - ext_extended_dynamic_state_3 = ext_extended_dynamic_state_3_blend || ext_extended_dynamic_state_3_enables; + ext_extended_dynamic_state_3 = + ext_extended_dynamic_state_3_blend || ext_extended_dynamic_state_3_enables; if (ext_extended_dynamic_state_3) { extensions.push_back(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME); } diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index b58ec736f..920a8f4e3 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -486,33 +486,33 @@ private: bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax. bool ext_depth_clip_control{}; ///< Support for VK_EXT_depth_clip_control bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. - bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. - bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info. - bool ext_subgroup_size_control{}; ///< Support for VK_EXT_subgroup_size_control. - bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. - bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. - bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. - bool ext_extended_dynamic_state_2{}; ///< Support for VK_EXT_extended_dynamic_state2. + bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. + bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info. + bool ext_subgroup_size_control{}; ///< Support for VK_EXT_subgroup_size_control. + bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. + bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. + bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. + bool ext_extended_dynamic_state_2{}; ///< Support for VK_EXT_extended_dynamic_state2. bool ext_extended_dynamic_state_2_extra{}; ///< Support for VK_EXT_extended_dynamic_state2. - bool ext_extended_dynamic_state_3{}; ///< Support for VK_EXT_extended_dynamic_state3. - bool ext_extended_dynamic_state_3_blend{}; ///< Support for VK_EXT_extended_dynamic_state3. - bool ext_extended_dynamic_state_3_enables{}; ///< Support for VK_EXT_extended_dynamic_state3. - bool ext_line_rasterization{}; ///< Support for VK_EXT_line_rasterization. - bool ext_vertex_input_dynamic_state{}; ///< Support for VK_EXT_vertex_input_dynamic_state. - bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. - bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64. - bool ext_conservative_rasterization{}; ///< Support for VK_EXT_conservative_rasterization. - bool ext_provoking_vertex{}; ///< Support for VK_EXT_provoking_vertex. - bool ext_memory_budget{}; ///< Support for VK_EXT_memory_budget. - bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. - bool has_broken_cube_compatibility{}; ///< Has broken cube compatiblity bit - bool has_renderdoc{}; ///< Has RenderDoc attached - bool has_nsight_graphics{}; ///< Has Nsight Graphics attached - bool supports_d24_depth{}; ///< Supports D24 depth buffers. - bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting. - bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format. - u32 max_vertex_input_attributes{}; ///< Max vertex input attributes in pipeline - u32 max_vertex_input_bindings{}; ///< Max vertex input buffers in pipeline + bool ext_extended_dynamic_state_3{}; ///< Support for VK_EXT_extended_dynamic_state3. + bool ext_extended_dynamic_state_3_blend{}; ///< Support for VK_EXT_extended_dynamic_state3. + bool ext_extended_dynamic_state_3_enables{}; ///< Support for VK_EXT_extended_dynamic_state3. + bool ext_line_rasterization{}; ///< Support for VK_EXT_line_rasterization. + bool ext_vertex_input_dynamic_state{}; ///< Support for VK_EXT_vertex_input_dynamic_state. + bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. + bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64. + bool ext_conservative_rasterization{}; ///< Support for VK_EXT_conservative_rasterization. + bool ext_provoking_vertex{}; ///< Support for VK_EXT_provoking_vertex. + bool ext_memory_budget{}; ///< Support for VK_EXT_memory_budget. + bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. + bool has_broken_cube_compatibility{}; ///< Has broken cube compatiblity bit + bool has_renderdoc{}; ///< Has RenderDoc attached + bool has_nsight_graphics{}; ///< Has Nsight Graphics attached + bool supports_d24_depth{}; ///< Supports D24 depth buffers. + bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting. + bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format. + u32 max_vertex_input_attributes{}; ///< Max vertex input attributes in pipeline + u32 max_vertex_input_bindings{}; ///< Max vertex input buffers in pipeline // Telemetry parameters std::string vendor_name; ///< Device's driver name. diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index d7ae14478..accfad8c1 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h @@ -1269,7 +1269,8 @@ public: dld->vkCmdSetColorBlendEnableEXT(handle, first, enables.size(), enables.data()); } - void SetColorBlendEquationEXT(u32 first, Span equations) const noexcept { + void SetColorBlendEquationEXT(u32 first, + Span equations) const noexcept { dld->vkCmdSetColorBlendEquationEXT(handle, first, equations.size(), equations.data()); } -- cgit v1.2.3