From bdc01254a9b3ce8359f8f007c2102cb2d112418e Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 4 Aug 2023 03:31:52 +0200 Subject: Query Cache: Setup Base rework --- src/video_core/query_cache/bank_base.h | 106 +++++ src/video_core/query_cache/query_base.h | 72 ++++ src/video_core/query_cache/query_cache.h | 543 ++++++++++++++++++++++++++ src/video_core/query_cache/query_cache_base.h | 181 +++++++++ src/video_core/query_cache/query_stream.h | 125 ++++++ src/video_core/query_cache/types.h | 74 ++++ 6 files changed, 1101 insertions(+) create mode 100644 src/video_core/query_cache/bank_base.h create mode 100644 src/video_core/query_cache/query_base.h create mode 100644 src/video_core/query_cache/query_cache.h create mode 100644 src/video_core/query_cache/query_cache_base.h create mode 100644 src/video_core/query_cache/query_stream.h create mode 100644 src/video_core/query_cache/types.h (limited to 'src/video_core/query_cache') diff --git a/src/video_core/query_cache/bank_base.h b/src/video_core/query_cache/bank_base.h new file mode 100644 index 000000000..4246a609d --- /dev/null +++ b/src/video_core/query_cache/bank_base.h @@ -0,0 +1,106 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#include +#include +#include + + +#include "common/common_types.h" + +namespace VideoCommon { + +class BankBase { +protected: + const size_t base_bank_size; + size_t bank_size; + std::atomic references; + size_t current_slot; + +public: + BankBase(size_t bank_size_) + : base_bank_size{bank_size_}, bank_size(bank_size_), references(0), current_slot(0) {} + + virtual ~BankBase() = default; + + virtual std::pair Reserve() { + if (IsClosed()) { + return {false, bank_size}; + } + const size_t result = current_slot++; + return {true, result}; + } + + virtual void Reset() { + current_slot = 0; + references = 0; + bank_size = base_bank_size; + } + + size_t Size() const { + return bank_size; + } + + void AddReference(size_t how_many = 1) { + references.fetch_add(how_many, std::memory_order_relaxed); + } + + void CloseReference(size_t how_many = 1) { + if (how_many > references.load(std::memory_order_relaxed)) { + UNREACHABLE(); + } + references.fetch_sub(how_many, std::memory_order_relaxed); + } + + void Close() { + bank_size = current_slot; + } + + constexpr bool IsClosed() { + return current_slot >= bank_size; + } + + bool IsDead() { + return IsClosed() && references == 0; + } +}; + +template +class BankPool { +private: + std::deque bank_pool; + std::deque bank_indices; + +public: + BankPool() = default; + ~BankPool() = default; + + // Reserve a bank from the pool and return its index + template + size_t ReserveBank(Func&& builder) { + if (!bank_indices.empty() && bank_pool[bank_indices.front()].IsDead()) { + size_t new_index = bank_indices.front(); + bank_indices.pop_front(); + bank_pool[new_index].Reset(); + return new_index; + } + size_t new_index = bank_pool.size(); + builder(bank_pool, new_index); + bank_indices.push_back(new_index); + return new_index; + } + + // Get a reference to a bank using its index + BankType& GetBank(size_t index) { + return bank_pool[index]; + } + + // Get the total number of banks in the pool + size_t BankCount() const { + return bank_pool.size(); + } +}; + +} // namespace VideoCommon diff --git a/src/video_core/query_cache/query_base.h b/src/video_core/query_cache/query_base.h new file mode 100644 index 000000000..485ed669c --- /dev/null +++ b/src/video_core/query_cache/query_base.h @@ -0,0 +1,72 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#include "common/common_funcs.h" +#include "common/common_types.h" + +namespace VideoCommon { + +enum class QueryFlagBits : u32 { + HasTimestamp = 1 << 0, ///< Indicates if this query has a tiemstamp. + IsFinalValueSynced = 1 << 1, ///< Indicates if the query has been synced in the host + IsHostSynced = 1 << 2, ///< Indicates if the query has been synced in the host + IsGuestSynced = 1 << 3, ///< Indicates if the query has been synced with the guest. + IsHostManaged = 1 << 4, ///< Indicates if this query points to a host query + IsRewritten = 1 << 5, ///< Indicates if this query was rewritten by another query + IsInvalidated = 1 << 6, ///< Indicates the value of th query has been nullified. + IsOrphan = 1 << 7, ///< Indicates the query has not been set by a guest query. + IsFence = 1 << 8, ///< Indicates the query is a fence. +}; +DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits) + +class QueryBase { +public: + VAddr guest_address; + QueryFlagBits flags; + u64 value; + +protected: + // Default constructor + QueryBase() : guest_address(0), flags{}, value{} {} + + // Parameterized constructor + QueryBase(VAddr address, QueryFlagBits flags_, u64 value_) + : guest_address(address), flags(flags_), value{value_} {} +}; + +class GuestQuery : public QueryBase { +public: + // Parameterized constructor + GuestQuery(bool isLong, VAddr address, u64 queryValue) + : QueryBase(address, QueryFlagBits::IsFinalValueSynced, queryValue) { + if (isLong) { + flags |= QueryFlagBits::HasTimestamp; + } + } +}; + +class HostQueryBase : public QueryBase { +public: + // Default constructor + HostQueryBase() + : QueryBase(0, QueryFlagBits::IsHostManaged | QueryFlagBits::IsOrphan, 0), start_bank_id{}, + size_banks{}, start_slot{}, size_slots{} {} + + // Parameterized constructor + HostQueryBase(bool isLong, VAddr address) + : QueryBase(address, QueryFlagBits::IsHostManaged, 0), start_bank_id{}, size_banks{}, + start_slot{}, size_slots{} { + if (isLong) { + flags |= QueryFlagBits::HasTimestamp; + } + } + + u32 start_bank_id; + u32 size_banks; + size_t start_slot; + size_t size_slots; +}; + +} // namespace VideoCommon \ No newline at end of file diff --git a/src/video_core/query_cache/query_cache.h b/src/video_core/query_cache/query_cache.h new file mode 100644 index 000000000..f6af48d14 --- /dev/null +++ b/src/video_core/query_cache/query_cache.h @@ -0,0 +1,543 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "common/assert.h" +#include "common/common_types.h" +#include "common/logging/log.h" +#include "common/scope_exit.h" +#include "common/settings.h" +#include "core/memory.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/gpu.h" +#include "video_core/memory_manager.h" +#include "video_core/query_cache/bank_base.h" +#include "video_core/query_cache/query_base.h" +#include "video_core/query_cache/query_cache_base.h" +#include "video_core/query_cache/query_stream.h" +#include "video_core/query_cache/types.h" + +namespace VideoCommon { + +using Maxwell = Tegra::Engines::Maxwell3D; + +struct SyncValuesStruct { + VAddr address; + u64 value; + u64 size; + + static constexpr bool GeneratesBaseBuffer = true; +}; + +template +class GuestStreamer : public SimpleStreamer { +public: + using RuntimeType = typename Traits::RuntimeType; + + GuestStreamer(size_t id_, RuntimeType& runtime_) + : SimpleStreamer(id_), runtime{runtime_} {} + + virtual ~GuestStreamer() = default; + + size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, + std::optional subreport = std::nullopt) override { + auto new_id = BuildQuery(has_timestamp, address, static_cast(value)); + pending_sync.push_back(new_id); + return new_id; + } + + bool HasPendingSync() override { + return !pending_sync.empty(); + } + + void SyncWrites() override { + if (pending_sync.empty()) { + return; + } + std::vector sync_values; + sync_values.reserve(pending_sync.size()); + for (size_t pending_id : pending_sync) { + auto& query = slot_queries[pending_id]; + if (True(query.flags & QueryFlagBits::IsRewritten) || + True(query.flags & QueryFlagBits::IsInvalidated)) { + continue; + } + query.flags |= QueryFlagBits::IsHostSynced; + sync_values.emplace_back(query.guest_address, query.value, + True(query.flags & QueryFlagBits::HasTimestamp) ? 8 : 4); + } + pending_sync.clear(); + if (sync_values.size() > 0) { + runtime.template SyncValues(sync_values); + } + } + +private: + RuntimeType& runtime; + std::deque pending_sync; +}; + +template +class StubStreamer : public GuestStreamer { +public: + using RuntimeType = typename Traits::RuntimeType; + + StubStreamer(size_t id_, RuntimeType& runtime_) : GuestStreamer(id_, runtime_) {} + + ~StubStreamer() override = default; + + size_t WriteCounter(VAddr address, bool has_timestamp, [[maybe_unused]] u32 value, + std::optional subreport = std::nullopt) override { + size_t new_id = GuestStreamer::WriteCounter(address, has_timestamp, 1U, subreport); + return new_id; + } +}; + +template +struct QueryCacheBase::QueryCacheBaseImpl { + using RuntimeType = typename Traits::RuntimeType; + + QueryCacheBaseImpl(QueryCacheBase* owner_, VideoCore::RasterizerInterface& rasterizer_, + Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_, Tegra::GPU& gpu_) + : owner{owner_}, rasterizer{rasterizer_}, + cpu_memory{cpu_memory_}, runtime{runtime_}, gpu{gpu_} { + streamer_mask = 0; + for (size_t i = 0; i < static_cast(QueryType::MaxQueryTypes); i++) { + streamers[i] = runtime.GetStreamerInterface(static_cast(i)); + if (streamers[i]) { + streamer_mask |= 1ULL << i; + } + } + } + + template + void ForEachStreamerIn(u64 mask, Func&& func) { + static constexpr bool RETURNS_BOOL = + std::is_same_v, bool>; + while (mask != 0) { + size_t position = std::countr_zero(mask); + mask &= ~(1ULL << position); + if constexpr (RETURNS_BOOL) { + if (func(streamers[position])) { + return; + } + } else { + func(streamers[position]); + } + } + } + + template + void ForEachStreamer(Func&& func) { + ForEachStreamerIn(streamer_mask, func); + } + + QueryBase* ObtainQuery(QueryCacheBase::QueryLocation location) { + size_t which_stream = location.stream_id.Value(); + auto* streamer = streamers[which_stream]; + if (!streamer) { + return nullptr; + } + return streamer->GetQuery(location.query_id.Value()); + } + + QueryCacheBase* owner; + VideoCore::RasterizerInterface& rasterizer; + Core::Memory::Memory& cpu_memory; + Traits::RuntimeType& runtime; + Tegra::GPU& gpu; + std::array(QueryType::MaxQueryTypes)> streamers; + u64 streamer_mask; + std::mutex flush_guard; + std::deque flushes_pending; + std::vector::QueryLocation> pending_unregister; +}; + +template +QueryCacheBase::QueryCacheBase(Tegra::GPU& gpu_, + VideoCore::RasterizerInterface& rasterizer_, + Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_) + : cached_queries{} { + impl = std::make_unique::QueryCacheBaseImpl>( + this, rasterizer_, cpu_memory_, runtime_, gpu_); +} + +template +QueryCacheBase::~QueryCacheBase() = default; + +template +void QueryCacheBase::CounterEnable(QueryType counter_type, bool is_enabled) { + size_t index = static_cast(counter_type); + StreamerInterface* streamer = impl->streamers[index]; + if (!streamer) [[unlikely]] { + UNREACHABLE(); + return; + } + if (is_enabled) { + streamer->StartCounter(); + } else { + streamer->PauseCounter(); + } +} + +template +void QueryCacheBase::CounterClose(QueryType counter_type) { + size_t index = static_cast(counter_type); + StreamerInterface* streamer = impl->streamers[index]; + if (!streamer) [[unlikely]] { + UNREACHABLE(); + return; + } + streamer->CloseCounter(); +} + +template +void QueryCacheBase::CounterReset(QueryType counter_type) { + size_t index = static_cast(counter_type); + StreamerInterface* streamer = impl->streamers[index]; + if (!streamer) [[unlikely]] { + UNIMPLEMENTED(); + return; + } + streamer->ResetCounter(); +} + +template +void QueryCacheBase::BindToChannel(s32 id) { + VideoCommon::ChannelSetupCaches::BindToChannel(id); + impl->runtime.Bind3DEngine(maxwell3d); +} + +template +void QueryCacheBase::CounterReport(GPUVAddr addr, QueryType counter_type, + QueryPropertiesFlags flags, u32 payload, u32 subreport) { + const bool has_timestamp = True(flags & QueryPropertiesFlags::HasTimeout); + const bool is_fence = True(flags & QueryPropertiesFlags::IsAFence); + size_t streamer_id = static_cast(counter_type); + auto* streamer = impl->streamers[streamer_id]; + if (!streamer) [[unlikely]] { + if (has_timestamp) { + u64 timestamp = impl->gpu.GetTicks(); + gpu_memory->Write(addr + 8, timestamp); + gpu_memory->Write(addr, 1ULL); + } else { + gpu_memory->Write(addr, 1U); + } + return; + } + auto cpu_addr_opt = gpu_memory->GpuToCpuAddress(addr); + if (!cpu_addr_opt) [[unlikely]] { + return; + } + VAddr cpu_addr = *cpu_addr_opt; + const size_t new_query_id = streamer->WriteCounter(cpu_addr, has_timestamp, payload, subreport); + auto* query = streamer->GetQuery(new_query_id); + if (is_fence) { + query->flags |= QueryFlagBits::IsFence; + } + QueryLocation query_location{}; + query_location.stream_id.Assign(static_cast(streamer_id)); + query_location.query_id.Assign(static_cast(new_query_id)); + const auto gen_caching_indexing = [](VAddr cur_addr) { + return std::make_pair(cur_addr >> Core::Memory::YUZU_PAGEBITS, + static_cast(cur_addr & Core::Memory::YUZU_PAGEMASK)); + }; + u8* pointer = impl->cpu_memory.GetPointer(cpu_addr); + u8* pointer_timestamp = impl->cpu_memory.GetPointer(cpu_addr + 8); + bool is_synced = !Settings::IsGPULevelHigh() && is_fence; + std::function operation( + [this, is_synced, query_base = query, query_location, pointer, pointer_timestamp] { + if (True(query_base->flags & QueryFlagBits::IsInvalidated)) { + if (!is_synced) [[likely]] { + impl->pending_unregister.push_back(query_location); + } + return; + } + if (False(query_base->flags & QueryFlagBits::IsFinalValueSynced)) [[unlikely]] { + UNREACHABLE(); + return; + } + if (True(query_base->flags & QueryFlagBits::HasTimestamp)) { + u64 timestamp = impl->gpu.GetTicks(); + std::memcpy(pointer_timestamp, ×tamp, sizeof(timestamp)); + std::memcpy(pointer, &query_base->value, sizeof(query_base->value)); + } else { + u32 value = static_cast(query_base->value); + std::memcpy(pointer, &value, sizeof(value)); + } + if (!is_synced) [[likely]] { + impl->pending_unregister.push_back(query_location); + } + }); + if (is_fence) { + impl->rasterizer.SignalFence(std::move(operation)); + } else { + impl->rasterizer.SyncOperation(std::move(operation)); + } + if (is_synced) { + streamer->Free(new_query_id); + return; + } + auto [cont_addr, base] = gen_caching_indexing(cpu_addr); + { + std::scoped_lock lock(cache_mutex); + auto it1 = cached_queries.try_emplace(cont_addr); + auto& sub_container = it1.first->second; + auto it_current = sub_container.find(base); + if (it_current == sub_container.end()) { + sub_container.insert_or_assign(base, query_location); + return; + } + auto* old_query = impl->ObtainQuery(it_current->second); + old_query->flags |= QueryFlagBits::IsRewritten; + sub_container.insert_or_assign(base, query_location); + } +} + +template +void QueryCacheBase::UnregisterPending() { + const auto gen_caching_indexing = [](VAddr cur_addr) { + return std::make_pair(cur_addr >> Core::Memory::YUZU_PAGEBITS, + static_cast(cur_addr & Core::Memory::YUZU_PAGEMASK)); + }; + std::scoped_lock lock(cache_mutex); + for (QueryLocation loc : impl->pending_unregister) { + const auto [streamer_id, query_id] = loc.unpack(); + auto* streamer = impl->streamers[streamer_id]; + if (!streamer) [[unlikely]] { + continue; + } + auto* query = streamer->GetQuery(query_id); + auto [cont_addr, base] = gen_caching_indexing(query->guest_address); + auto it1 = cached_queries.find(cont_addr); + if (it1 != cached_queries.end()) { + auto it2 = it1->second.find(base); + if (it2 != it1->second.end()) { + if (it2->second.raw == loc.raw) { + it1->second.erase(it2); + } + } + } + streamer->Free(query_id); + } + impl->pending_unregister.clear(); +} + +template +void QueryCacheBase::NotifyWFI() { + bool should_sync = false; + impl->ForEachStreamer( + [&should_sync](StreamerInterface* streamer) { should_sync |= streamer->HasPendingSync(); }); + if (!should_sync) { + return; + } + + impl->ForEachStreamer([](StreamerInterface* streamer) { streamer->PresyncWrites(); }); + impl->runtime.Barriers(true); + impl->ForEachStreamer([](StreamerInterface* streamer) { streamer->SyncWrites(); }); + impl->runtime.Barriers(false); +} + +template +void QueryCacheBase::NotifySegment(bool resume) { + if (resume) { + impl->runtime.ResumeHostConditionalRendering(); + } else { + impl->runtime.PauseHostConditionalRendering(); + CounterClose(VideoCommon::QueryType::ZPassPixelCount64); + CounterClose(VideoCommon::QueryType::StreamingByteCount); + } +} + +template +bool QueryCacheBase::AccelerateHostConditionalRendering() { + bool qc_dirty = false; + const auto gen_lookup = [this, &qc_dirty](GPUVAddr address) -> VideoCommon::LookupData { + auto cpu_addr_opt = gpu_memory->GpuToCpuAddress(address); + if (!cpu_addr_opt) [[unlikely]] { + return VideoCommon::LookupData{ + .address = 0, + .found_query = nullptr, + }; + } + VAddr cpu_addr = *cpu_addr_opt; + std::scoped_lock lock(cache_mutex); + auto it1 = cached_queries.find(cpu_addr >> Core::Memory::YUZU_PAGEBITS); + if (it1 == cached_queries.end()) { + return VideoCommon::LookupData{ + .address = cpu_addr, + .found_query = nullptr, + }; + } + auto& sub_container = it1->second; + auto it_current = sub_container.find(cpu_addr & Core::Memory::YUZU_PAGEMASK); + + if (it_current == sub_container.end()) { + auto it_current_2 = sub_container.find((cpu_addr & Core::Memory::YUZU_PAGEMASK) + 4); + if (it_current_2 == sub_container.end()) { + return VideoCommon::LookupData{ + .address = cpu_addr, + .found_query = nullptr, + }; + } + } + auto* query = impl->ObtainQuery(it_current->second); + qc_dirty |= True(query->flags & QueryFlagBits::IsHostManaged) && + False(query->flags & QueryFlagBits::IsGuestSynced); + return VideoCommon::LookupData{ + .address = cpu_addr, + .found_query = query, + }; + }; + + auto& regs = maxwell3d->regs; + if (regs.render_enable_override != Maxwell::Regs::RenderEnable::Override::UseRenderEnable) { + impl->runtime.EndHostConditionalRendering(); + return false; + } + /*if (!Settings::IsGPULevelHigh()) { + impl->runtime.EndHostConditionalRendering(); + return gpu_memory->IsMemoryDirty(regs.render_enable.Address(), 24, + VideoCommon::CacheType::BufferCache | + VideoCommon::CacheType::QueryCache); + }*/ + const ComparisonMode mode = static_cast(regs.render_enable.mode); + const GPUVAddr address = regs.render_enable.Address(); + switch (mode) { + case ComparisonMode::True: + impl->runtime.EndHostConditionalRendering(); + return false; + case ComparisonMode::False: + impl->runtime.EndHostConditionalRendering(); + return false; + case ComparisonMode::Conditional: { + VideoCommon::LookupData object_1{gen_lookup(address)}; + return impl->runtime.HostConditionalRenderingCompareValue(object_1, qc_dirty); + } + case ComparisonMode::IfEqual: { + VideoCommon::LookupData object_1{gen_lookup(address)}; + VideoCommon::LookupData object_2{gen_lookup(address + 16)}; + return impl->runtime.HostConditionalRenderingCompareValues(object_1, object_2, qc_dirty, + true); + } + case ComparisonMode::IfNotEqual: { + VideoCommon::LookupData object_1{gen_lookup(address)}; + VideoCommon::LookupData object_2{gen_lookup(address + 16)}; + return impl->runtime.HostConditionalRenderingCompareValues(object_1, object_2, qc_dirty, + false); + } + default: + return false; + } +} + +// Async downloads +template +void QueryCacheBase::CommitAsyncFlushes() { + u64 mask{}; + { + std::scoped_lock lk(impl->flush_guard); + impl->ForEachStreamer([&mask](StreamerInterface* streamer) { + bool local_result = streamer->HasUnsyncedQueries(); + if (local_result) { + mask |= 1ULL << streamer->GetId(); + } + }); + impl->flushes_pending.push_back(mask); + } + std::function func([this] { UnregisterPending(); }); + impl->rasterizer.SyncOperation(std::move(func)); + if (mask == 0) { + return; + } + impl->ForEachStreamerIn(mask, + [](StreamerInterface* streamer) { streamer->PushUnsyncedQueries(); }); +} + +template +bool QueryCacheBase::HasUncommittedFlushes() const { + bool result = false; + impl->ForEachStreamer([&result](StreamerInterface* streamer) { + result |= streamer->HasUnsyncedQueries(); + return result; + }); + return result; +} + +template +bool QueryCacheBase::ShouldWaitAsyncFlushes() { + std::scoped_lock lk(impl->flush_guard); + return !impl->flushes_pending.empty() && impl->flushes_pending.front() != 0ULL; +} + +template +void QueryCacheBase::PopAsyncFlushes() { + u64 mask; + { + std::scoped_lock lk(impl->flush_guard); + mask = impl->flushes_pending.front(); + impl->flushes_pending.pop_front(); + } + if (mask == 0) { + return; + } + impl->ForEachStreamerIn(mask, + [](StreamerInterface* streamer) { streamer->PopUnsyncedQueries(); }); +} + +// Invalidation + +template +void QueryCacheBase::InvalidateQuery(QueryCacheBase::QueryLocation location) { + auto* query_base = impl->ObtainQuery(location); + if (!query_base) { + return; + } + query_base->flags |= QueryFlagBits::IsInvalidated; +} + +template +bool QueryCacheBase::IsQueryDirty(QueryCacheBase::QueryLocation location) { + auto* query_base = impl->ObtainQuery(location); + if (!query_base) { + return false; + } + return True(query_base->flags & QueryFlagBits::IsHostManaged) && + False(query_base->flags & QueryFlagBits::IsGuestSynced); +} + +template +bool QueryCacheBase::SemiFlushQueryDirty(QueryCacheBase::QueryLocation location) { + auto* query_base = impl->ObtainQuery(location); + if (!query_base) { + return false; + } + if (True(query_base->flags & QueryFlagBits::IsFinalValueSynced) && + False(query_base->flags & QueryFlagBits::IsGuestSynced)) { + auto* ptr = impl->cpu_memory.GetPointer(query_base->guest_address); + if (True(query_base->flags & QueryFlagBits::HasTimestamp)) { + std::memcpy(ptr, &query_base->value, sizeof(query_base->value)); + return false; + } + u32 value_l = static_cast(query_base->value); + std::memcpy(ptr, &value_l, sizeof(value_l)); + return false; + } + return True(query_base->flags & QueryFlagBits::IsHostManaged) && + False(query_base->flags & QueryFlagBits::IsGuestSynced); +} + +template +void QueryCacheBase::RequestGuestHostSync() { + impl->rasterizer.ReleaseFences(); +} + +} // namespace VideoCommon diff --git a/src/video_core/query_cache/query_cache_base.h b/src/video_core/query_cache/query_cache_base.h new file mode 100644 index 000000000..55f508dd1 --- /dev/null +++ b/src/video_core/query_cache/query_cache_base.h @@ -0,0 +1,181 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "common/assert.h" +#include "common/bit_field.h" +#include "common/common_types.h" +#include "core/memory.h" +#include "video_core/control/channel_state_cache.h" +#include "video_core/query_cache/query_base.h" +#include "video_core/query_cache/types.h" + +namespace Core::Memory { +class Memory; +} + +namespace VideoCore { +class RasterizerInterface; +} + +namespace Tegra { +class GPU; +} + +namespace VideoCommon { + +struct LookupData { + VAddr address; + QueryBase* found_query; +}; + +template +class QueryCacheBase : public VideoCommon::ChannelSetupCaches { + using RuntimeType = typename Traits::RuntimeType; + +public: + union QueryLocation { + BitField<27, 5, u32> stream_id; + BitField<0, 27, u32> query_id; + u32 raw; + + std::pair unpack() { + return {static_cast(stream_id.Value()), static_cast(query_id.Value())}; + } + }; + + explicit QueryCacheBase(Tegra::GPU& gpu, VideoCore::RasterizerInterface& rasterizer_, + Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_); + + ~QueryCacheBase(); + + void InvalidateRegion(VAddr addr, std::size_t size) { + IterateCache(addr, size, + [this](QueryLocation location) { InvalidateQuery(location); }); + } + + void FlushRegion(VAddr addr, std::size_t size) { + bool result = false; + IterateCache(addr, size, [this, &result](QueryLocation location) { + result |= SemiFlushQueryDirty(location); + return result; + }); + if (result) { + RequestGuestHostSync(); + } + } + + static u64 BuildMask(std::span types) { + u64 mask = 0; + for (auto query_type : types) { + mask |= 1ULL << (static_cast(query_type)); + } + return mask; + } + + /// Return true when a CPU region is modified from the GPU + [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size) { + bool result = false; + IterateCache(addr, size, [this, &result](QueryLocation location) { + result |= IsQueryDirty(location); + return result; + }); + return result; + } + + void CounterEnable(QueryType counter_type, bool is_enabled); + + void CounterReset(QueryType counter_type); + + void CounterClose(QueryType counter_type); + + void CounterReport(GPUVAddr addr, QueryType counter_type, QueryPropertiesFlags flags, + u32 payload, u32 subreport); + + void NotifyWFI(); + + bool AccelerateHostConditionalRendering(); + + // Async downloads + void CommitAsyncFlushes(); + + bool HasUncommittedFlushes() const; + + bool ShouldWaitAsyncFlushes(); + + void PopAsyncFlushes(); + + void NotifySegment(bool resume); + + void BindToChannel(s32 id) override; + +protected: + template + void IterateCache(VAddr addr, std::size_t size, Func&& func) { + static constexpr bool RETURNS_BOOL = + std::is_same_v, bool>; + const u64 addr_begin = addr; + const u64 addr_end = addr_begin + size; + + const u64 page_end = addr_end >> Core::Memory::YUZU_PAGEBITS; + std::scoped_lock lock(cache_mutex); + for (u64 page = addr_begin >> Core::Memory::YUZU_PAGEBITS; page <= page_end; ++page) { + const u64 page_start = page << Core::Memory::YUZU_PAGEBITS; + const auto in_range = [page_start, addr_begin, addr_end](const u32 query_location) { + const u64 cache_begin = page_start + query_location; + const u64 cache_end = cache_begin + sizeof(u32); + return cache_begin < addr_end && addr_begin < cache_end; + }; + const auto& it = cached_queries.find(page); + if (it == std::end(cached_queries)) { + continue; + } + auto& contents = it->second; + for (auto& query : contents) { + if (!in_range(query.first)) { + continue; + } + if constexpr (RETURNS_BOOL) { + if (func(query.second)) { + return; + } + } else { + func(query.second); + } + } + if constexpr (remove_from_cache) { + const auto in_range2 = [&](const std::pair& pair) { + return in_range(pair.first); + }; + std::erase_if(contents, in_range2); + } + } + } + + using ContentCache = typename std::unordered_map>; + + void InvalidateQuery(QueryLocation location); + bool IsQueryDirty(QueryLocation location); + bool SemiFlushQueryDirty(QueryLocation location); + void RequestGuestHostSync(); + void UnregisterPending(); + + std::unordered_map> cached_queries; + std::mutex cache_mutex; + + struct QueryCacheBaseImpl; + friend struct QueryCacheBaseImpl; + friend RuntimeType; + + std::unique_ptr impl; +}; + +} // namespace VideoCommon \ No newline at end of file diff --git a/src/video_core/query_cache/query_stream.h b/src/video_core/query_cache/query_stream.h new file mode 100644 index 000000000..dd5f95b3c --- /dev/null +++ b/src/video_core/query_cache/query_stream.h @@ -0,0 +1,125 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#include +#include +#include + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/query_cache/bank_base.h" +#include "video_core/query_cache/query_base.h" + +namespace VideoCommon { + +class StreamerInterface { +public: + StreamerInterface(size_t id_, u64 dependance_mask_ = 0) : id{id_}, dependance_mask{dependance_mask_} {} + virtual ~StreamerInterface() = default; + + virtual QueryBase* GetQuery(size_t id) = 0; + + virtual void StartCounter() { + /* Do Nothing */ + } + + virtual void PauseCounter() { + /* Do Nothing */ + } + + virtual void ResetCounter() { + /* Do Nothing */ + } + + virtual void CloseCounter() { + /* Do Nothing */ + } + + virtual bool HasPendingSync() { + return false; + } + + virtual void PresyncWrites() { + /* Do Nothing */ + } + + virtual void SyncWrites() { + /* Do Nothing */ + } + + virtual size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, + std::optional subreport = std::nullopt) = 0; + + virtual bool HasUnsyncedQueries() { + return false; + } + + virtual void PushUnsyncedQueries() { + /* Do Nothing */ + } + + virtual void PopUnsyncedQueries() { + /* Do Nothing */ + } + + virtual void Free(size_t query_id) = 0; + + size_t GetId() const { + return id; + } + +protected: + const size_t id; + const u64 dependance_mask; +}; + +template +class SimpleStreamer : public StreamerInterface { +public: + SimpleStreamer(size_t id_) : StreamerInterface{id_} {} + virtual ~SimpleStreamer() = default; + +protected: + virtual QueryType* GetQuery(size_t query_id) override { + if (query_id < slot_queries.size()) { + return &slot_queries[query_id]; + } + return nullptr; + } + + virtual void Free(size_t query_id) override { + std::scoped_lock lk(guard); + ReleaseQuery(query_id); + } + + template ()...))> + size_t BuildQuery(Args&&... args) { + std::scoped_lock lk(guard); + if (!old_queries.empty()) { + size_t new_id = old_queries.front(); + old_queries.pop_front(); + new (&slot_queries[new_id]) QueryType(std::forward(args)...); + return new_id; + } + size_t new_id = slot_queries.size(); + slot_queries.emplace_back(std::forward(args)...); + return new_id; + } + + void ReleaseQuery(size_t query_id) { + + if (query_id < slot_queries.size()) { + old_queries.push_back(query_id); + return; + } + UNREACHABLE(); + } + + std::mutex guard; + std::deque slot_queries; + std::deque old_queries; +}; + +} // namespace VideoCommon \ No newline at end of file diff --git a/src/video_core/query_cache/types.h b/src/video_core/query_cache/types.h new file mode 100644 index 000000000..e9226bbfc --- /dev/null +++ b/src/video_core/query_cache/types.h @@ -0,0 +1,74 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#include "common/common_funcs.h" +#include "common/common_types.h" + +namespace VideoCommon { + +enum class QueryPropertiesFlags : u32 { + HasTimeout = 1 << 0, + IsAFence = 1 << 1, +}; +DECLARE_ENUM_FLAG_OPERATORS(QueryPropertiesFlags) + +// This should always be equivalent to maxwell3d Report Semaphore Reports +enum class QueryType : u32 { + Payload = 0, // "None" in docs, but confirmed via hardware to return the payload + VerticesGenerated = 1, + ZPassPixelCount = 2, + PrimitivesGenerated = 3, + AlphaBetaClocks = 4, + VertexShaderInvocations = 5, + StreamingPrimitivesNeededMinusSucceeded = 6, + GeometryShaderInvocations = 7, + GeometryShaderPrimitivesGenerated = 9, + ZCullStats0 = 10, + StreamingPrimitivesSucceeded = 11, + ZCullStats1 = 12, + StreamingPrimitivesNeeded = 13, + ZCullStats2 = 14, + ClipperInvocations = 15, + ZCullStats3 = 16, + ClipperPrimitivesGenerated = 17, + VtgPrimitivesOut = 18, + PixelShaderInvocations = 19, + ZPassPixelCount64 = 21, + IEEECleanColorTarget = 24, + IEEECleanZetaTarget = 25, + StreamingByteCount = 26, + TessellationInitInvocations = 27, + BoundingRectangle = 28, + TessellationShaderInvocations = 29, + TotalStreamingPrimitivesNeededMinusSucceeded = 30, + TessellationShaderPrimitivesGenerated = 31, + // max. + MaxQueryTypes, +}; + +// Comparison modes for Host Conditional Rendering +enum class ComparisonMode : u32 { + False = 0, + True = 1, + Conditional = 2, + IfEqual = 3, + IfNotEqual = 4, + MaxComparisonMode, +}; + +// Reduction ops. +enum class ReductionOp : u32 { + RedAdd = 0, + RedMin = 1, + RedMax = 2, + RedInc = 3, + RedDec = 4, + RedAnd = 5, + RedOr = 6, + RedXor = 7, + MaxReductionOp, +}; + +} // namespace VideoCommon \ No newline at end of file -- cgit v1.2.3 From aa6587d854e4953876b02ca71278a665bcae8179 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 4 Aug 2023 13:38:49 +0200 Subject: QueryCache: Implement dependant queries. --- src/video_core/query_cache/query_base.h | 1 + src/video_core/query_cache/query_cache.h | 18 ++- src/video_core/query_cache/query_stream.h | 6 +- src/video_core/renderer_vulkan/vk_query_cache.cpp | 160 +++++++++++++++++++++- 4 files changed, 180 insertions(+), 5 deletions(-) (limited to 'src/video_core/query_cache') diff --git a/src/video_core/query_cache/query_base.h b/src/video_core/query_cache/query_base.h index 485ed669c..0ae23af9f 100644 --- a/src/video_core/query_cache/query_base.h +++ b/src/video_core/query_cache/query_base.h @@ -18,6 +18,7 @@ enum class QueryFlagBits : u32 { IsInvalidated = 1 << 6, ///< Indicates the value of th query has been nullified. IsOrphan = 1 << 7, ///< Indicates the query has not been set by a guest query. IsFence = 1 << 8, ///< Indicates the query is a fence. + IsQueuedForAsyncFlush = 1 <<9,///< Indicates that the query can be flushed at any moment }; DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits) diff --git a/src/video_core/query_cache/query_cache.h b/src/video_core/query_cache/query_cache.h index f6af48d14..f1393d5c7 100644 --- a/src/video_core/query_cache/query_cache.h +++ b/src/video_core/query_cache/query_cache.h @@ -489,8 +489,22 @@ void QueryCacheBase::PopAsyncFlushes() { if (mask == 0) { return; } - impl->ForEachStreamerIn(mask, - [](StreamerInterface* streamer) { streamer->PopUnsyncedQueries(); }); + u64 ran_mask = 0; + u64 next_phase = 0; + while (mask) { + impl->ForEachStreamerIn(mask, [&mask, &ran_mask, &next_phase](StreamerInterface* streamer) { + u64 dep_mask = streamer->GetDependenceMask(); + if ((dep_mask & ~ran_mask) != 0) { + next_phase |= dep_mask; + return; + } + u64 index = streamer->GetId(); + ran_mask |= (1ULL << index); + mask &= ~(1ULL << index); + streamer->PopUnsyncedQueries(); + }); + ran_mask |= next_phase; + } } // Invalidation diff --git a/src/video_core/query_cache/query_stream.h b/src/video_core/query_cache/query_stream.h index dd5f95b3c..0e9275565 100644 --- a/src/video_core/query_cache/query_stream.h +++ b/src/video_core/query_cache/query_stream.h @@ -70,6 +70,10 @@ public: return id; } + u64 GetDependenceMask() const { + return dependance_mask; + } + protected: const size_t id; const u64 dependance_mask; @@ -78,7 +82,7 @@ protected: template class SimpleStreamer : public StreamerInterface { public: - SimpleStreamer(size_t id_) : StreamerInterface{id_} {} + SimpleStreamer(size_t id_, u64 dependance_mask_ = 0) : StreamerInterface{id_, dependance_mask_} {} virtual ~SimpleStreamer() = default; protected: diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 42f571007..ef891e26b 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -525,6 +525,9 @@ private: vk::Buffer buffer; }; +template +class PrimitivesSucceededStreamer; + template class TFBCounterStreamer : public BaseStreamer { public: @@ -537,6 +540,7 @@ public: current_bank = nullptr; counter_buffers.fill(VK_NULL_HANDLE); offsets.fill(0); + last_queries.fill(0); const VkBufferCreateInfo buffer_ci = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, @@ -630,7 +634,7 @@ public: return index; } const size_t subreport = static_cast(*subreport_); - UpdateBuffers(); + last_queries[subreport] = address; if ((streams_mask & (1ULL << subreport)) == 0) { new_query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced; return index; @@ -646,6 +650,13 @@ public: return index; } + std::optional GetLastQueryStream(size_t stream) { + if (last_queries[stream] != 0) { + return {last_queries[stream]}; + } + return std::nullopt; + } + bool HasUnsyncedQueries() override { return !pending_flush_queries.empty(); } @@ -657,6 +668,7 @@ public: size_t offset_base = staging_ref.offset; for (auto q : pending_flush_queries) { auto* query = GetQuery(q); + query->flags |= VideoCommon::QueryFlagBits::IsQueuedForAsyncFlush; auto& bank = bank_pool.GetBank(query->start_bank_id); bank.Sync(staging_ref, offset_base, query->start_slot, 1); offset_base += TFBQueryBank::QUERY_SIZE; @@ -741,13 +753,15 @@ private: cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); } else { - scheduler.Record([this, total = static_cast(buffers_count)](vk::CommandBuffer cmdbuf) { + scheduler.Record([this, + total = static_cast(buffers_count)](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, total, counter_buffers.data(), offsets.data()); }); } } void UpdateBuffers() { + last_queries.fill(0); runtime.View3DRegs([this](Tegra::Engines::Maxwell3D::Regs& regs) { buffers_count = 0; for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers; @@ -804,6 +818,9 @@ private: return {current_bank_id, slot}; } + template + friend class PrimitivesSucceededStreamer; + static constexpr size_t NUM_STREAMS = 4; static constexpr size_t STREAMS_MASK = (1ULL << NUM_STREAMS) - 1ULL; @@ -833,9 +850,143 @@ private: size_t buffers_count{}; std::array counter_buffers{}; std::array offsets{}; + std::array last_queries; u64 streams_mask; }; +class PrimitivesQueryBase : public VideoCommon::QueryBase { +public: + // Default constructor + PrimitivesQueryBase() + : VideoCommon::QueryBase(0, VideoCommon::QueryFlagBits::IsHostManaged, 0), stride{}, + dependant_index{}, dependant_manage{} {} + + // Parameterized constructor + PrimitivesQueryBase(bool is_long, VAddr address) + : VideoCommon::QueryBase(address, VideoCommon::QueryFlagBits::IsHostManaged, 0), stride{}, + dependant_index{}, dependant_manage{} { + if (is_long) { + flags |= VideoCommon::QueryFlagBits::HasTimestamp; + } + } + + u64 stride; + VAddr dependant_address; + size_t dependant_index; + bool dependant_manage; +}; + +template +class PrimitivesSucceededStreamer : public VideoCommon::SimpleStreamer { +public: + PrimitivesSucceededStreamer(size_t id, QueryCacheRuntime& runtime_, + TFBCounterStreamer& tfb_streamer_, Core::Memory::Memory& cpu_memory_) + : VideoCommon::SimpleStreamer( + id, 1ULL << static_cast(VideoCommon::QueryType::StreamingByteCount)), + runtime{runtime_}, tfb_streamer{tfb_streamer_}, cpu_memory{cpu_memory_} {} + + size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, + std::optional subreport_) override { + auto index = BuildQuery(); + auto* new_query = GetQuery(index); + new_query->guest_address = address; + new_query->value = 0; + if (has_timestamp) { + new_query->flags |= VideoCommon::QueryFlagBits::HasTimestamp; + } + if (!subreport_) { + new_query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced; + return index; + } + const size_t subreport = static_cast(*subreport_); + auto dependant_address_opt = tfb_streamer.GetLastQueryStream(subreport); + bool must_manage_dependance = false; + if (dependant_address_opt) { + new_query->dependant_address = *dependant_address_opt; + } else { + new_query->dependant_index = + tfb_streamer.WriteCounter(address, has_timestamp, value, subreport_); + auto* dependant_query = tfb_streamer.GetQuery(new_query->dependant_index); + dependant_query->flags |= VideoCommon::QueryFlagBits::IsInvalidated; + must_manage_dependance = true; + if (True(dependant_query->flags & VideoCommon::QueryFlagBits::IsFinalValueSynced)) { + new_query->value = 0; + new_query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced; + if (must_manage_dependance) { + tfb_streamer.Free(new_query->dependant_index); + } + return index; + } + } + + new_query->dependant_manage = must_manage_dependance; + runtime.View3DRegs([new_query, subreport](Tegra::Engines::Maxwell3D::Regs& regs) { + for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers; + i++) { + const auto& tf = regs.transform_feedback; + if (tf.controls[i].stream != subreport) { + continue; + } + new_query->stride = tf.controls[i].stride; + break; + } + }); + pending_flush_queries.push_back(index); + return index; + } + + bool HasUnsyncedQueries() override { + return !pending_flush_queries.empty(); + } + + void PushUnsyncedQueries() override { + std::scoped_lock lk(flush_guard); + pending_flush_sets.emplace_back(std::move(pending_flush_queries)); + pending_flush_queries.clear(); + } + + void PopUnsyncedQueries() override { + std::vector flushed_queries; + { + std::scoped_lock lk(flush_guard); + flushed_queries = std::move(pending_flush_sets.front()); + pending_flush_sets.pop_front(); + } + + for (auto q : flushed_queries) { + auto* query = GetQuery(q); + if (True(query->flags & VideoCommon::QueryFlagBits::IsFinalValueSynced)) { + continue; + } + + query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced; + if (query->dependant_manage) { + auto* dependant_query = tfb_streamer.GetQuery(query->dependant_index); + query->value = dependant_query->value / query->stride; + tfb_streamer.Free(query->dependant_index); + } else { + u8* pointer = cpu_memory.GetPointer(query->dependant_address); + u32 result; + std::memcpy(&result, pointer, sizeof(u32)); + query->value = static_cast(result) / query->stride; + } + } + } + +private: + QueryCacheRuntime& runtime; + TFBCounterStreamer& tfb_streamer; + Core::Memory::Memory& cpu_memory; + + // syncing queue + std::vector pending_sync; + + // flush levels + std::vector pending_flush_queries; + std::deque> pending_flush_sets; + std::mutex flush_guard; +}; + } // namespace struct QueryCacheRuntimeImpl { @@ -853,6 +1004,8 @@ struct QueryCacheRuntimeImpl { scheduler, memory_allocator), tfb_streamer(static_cast(QueryType::StreamingByteCount), runtime, device, scheduler, memory_allocator, staging_pool), + primitives_succeeded_streamer( + static_cast(QueryType::StreamingPrimitivesSucceeded), runtime, tfb_streamer, cpu_memory_), hcr_setup{}, hcr_is_set{}, is_hcr_running{} { hcr_setup.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT; @@ -889,6 +1042,7 @@ struct QueryCacheRuntimeImpl { VideoCommon::GuestStreamer guest_streamer; SamplesStreamer sample_streamer; TFBCounterStreamer tfb_streamer; + PrimitivesSucceededStreamer primitives_succeeded_streamer; std::vector> little_cache; std::vector> buffers_to_upload_to; @@ -1086,6 +1240,8 @@ VideoCommon::StreamerInterface* QueryCacheRuntime::GetStreamerInterface(QueryTyp return &impl->sample_streamer; case QueryType::StreamingByteCount: return &impl->tfb_streamer; + case QueryType::StreamingPrimitivesSucceeded: + return &impl->primitives_succeeded_streamer; default: return nullptr; } -- cgit v1.2.3 From 282ae8fa51e060e6d4ef026b734aa871b1b9331e Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 6 Aug 2023 09:38:16 +0200 Subject: Query Cache: address issues --- src/video_core/buffer_cache/buffer_cache.h | 5 +- src/video_core/buffer_cache/buffer_cache_base.h | 4 +- src/video_core/engines/maxwell_3d.cpp | 6 - src/video_core/engines/puller.cpp | 6 +- src/video_core/fence_manager.h | 14 +- src/video_core/query_cache/bank_base.h | 16 +- src/video_core/query_cache/query_base.h | 44 +++-- src/video_core/query_cache/query_cache.h | 66 ++++--- src/video_core/query_cache/query_cache_base.h | 8 +- src/video_core/query_cache/query_stream.h | 22 ++- src/video_core/rasterizer_interface.h | 5 +- src/video_core/renderer_null/null_rasterizer.h | 3 +- src/video_core/renderer_opengl/gl_rasterizer.cpp | 24 ++- src/video_core/renderer_opengl/gl_rasterizer.h | 3 +- src/video_core/renderer_vulkan/vk_compute_pass.cpp | 6 +- src/video_core/renderer_vulkan/vk_fence_manager.h | 2 +- src/video_core/renderer_vulkan/vk_query_cache.cpp | 203 +++++++++++++-------- src/video_core/renderer_vulkan/vk_query_cache.h | 5 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 27 --- src/video_core/renderer_vulkan/vk_rasterizer.h | 3 +- src/video_core/renderer_vulkan/vk_scheduler.h | 12 +- 21 files changed, 270 insertions(+), 214 deletions(-) (limited to 'src/video_core/query_cache') diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index f91b7d1e4..9e90c587c 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -276,9 +276,8 @@ std::pair BufferCache

::ObtainBuffer(GPUVAddr gpu_ad } template -std::pair BufferCache

::ObtainCPUBuffer(VAddr cpu_addr, u32 size, - ObtainBufferSynchronize sync_info, - ObtainBufferOperation post_op) { +std::pair BufferCache

::ObtainCPUBuffer( + VAddr cpu_addr, u32 size, ObtainBufferSynchronize sync_info, ObtainBufferOperation post_op) { const BufferId buffer_id = FindBuffer(cpu_addr, size); Buffer& buffer = slot_buffers[buffer_id]; diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index 9507071e5..c4f6e8d12 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h @@ -297,8 +297,8 @@ public: ObtainBufferOperation post_op); [[nodiscard]] std::pair ObtainCPUBuffer(VAddr gpu_addr, u32 size, - ObtainBufferSynchronize sync_info, - ObtainBufferOperation post_op); + ObtainBufferSynchronize sync_info, + ObtainBufferOperation post_op); void FlushCachedWrites(); /// Return true when there are uncommitted buffers to be downloaded diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 922c399e6..46b9c548a 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -596,12 +596,6 @@ void Maxwell3D::ProcessCounterReset() { case Regs::ClearReport::ZPassPixelCount: rasterizer->ResetCounter(VideoCommon::QueryType::ZPassPixelCount64); break; - case Regs::ClearReport::PrimitivesGenerated: - rasterizer->ResetCounter(VideoCommon::QueryType::StreamingByteCount); - break; - case Regs::ClearReport::VtgPrimitivesOut: - rasterizer->ResetCounter(VideoCommon::QueryType::StreamingByteCount); - break; default: LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", regs.clear_report_value); break; diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp index 582738234..8dd34c04a 100644 --- a/src/video_core/engines/puller.cpp +++ b/src/video_core/engines/puller.cpp @@ -82,7 +82,8 @@ void Puller::ProcessSemaphoreTriggerMethod() { if (op == GpuSemaphoreOperation::WriteLong) { const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; const u32 payload = regs.semaphore_sequence; - rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload, VideoCommon::QueryPropertiesFlags::HasTimeout, payload, 0); + rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload, + VideoCommon::QueryPropertiesFlags::HasTimeout, payload, 0); } else { do { const u32 word{memory_manager.Read(regs.semaphore_address.SemaphoreAddress())}; @@ -117,7 +118,8 @@ void Puller::ProcessSemaphoreTriggerMethod() { void Puller::ProcessSemaphoreRelease() { const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; const u32 payload = regs.semaphore_release; - rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload, VideoCommon::QueryPropertiesFlags::IsAFence, payload, 0); + rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload, + VideoCommon::QueryPropertiesFlags::IsAFence, payload, 0); } void Puller::ProcessSemaphoreAcquire() { diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h index 8459a3092..805a89900 100644 --- a/src/video_core/fence_manager.h +++ b/src/video_core/fence_manager.h @@ -55,6 +55,9 @@ public: // Unlike other fences, this one doesn't void SignalOrdering() { + if constexpr (!can_async_check) { + TryReleasePendingFences(); + } std::scoped_lock lock{buffer_cache.mutex}; buffer_cache.AccumulateFlushes(); } @@ -104,13 +107,9 @@ public: SignalFence(std::move(func)); } - void WaitPendingFences(bool force) { + void WaitPendingFences([[maybe_unused]] bool force) { if constexpr (!can_async_check) { - if (force) { - TryReleasePendingFences(); - } else { - TryReleasePendingFences(); - } + TryReleasePendingFences(); } else { if (!force) { return; @@ -125,7 +124,8 @@ public: }); SignalFence(std::move(func)); std::unique_lock lk(wait_mutex); - wait_cv.wait(lk, [&wait_finished] { return wait_finished.load(std::memory_order_relaxed); }); + wait_cv.wait( + lk, [&wait_finished] { return wait_finished.load(std::memory_order_relaxed); }); } } diff --git a/src/video_core/query_cache/bank_base.h b/src/video_core/query_cache/bank_base.h index 4246a609d..420927091 100644 --- a/src/video_core/query_cache/bank_base.h +++ b/src/video_core/query_cache/bank_base.h @@ -7,21 +7,19 @@ #include #include - #include "common/common_types.h" namespace VideoCommon { class BankBase { protected: - const size_t base_bank_size; - size_t bank_size; - std::atomic references; - size_t current_slot; + const size_t base_bank_size{}; + size_t bank_size{}; + std::atomic references{}; + size_t current_slot{}; public: - BankBase(size_t bank_size_) - : base_bank_size{bank_size_}, bank_size(bank_size_), references(0), current_slot(0) {} + explicit BankBase(size_t bank_size_) : base_bank_size{bank_size_}, bank_size(bank_size_) {} virtual ~BankBase() = default; @@ -58,11 +56,11 @@ public: bank_size = current_slot; } - constexpr bool IsClosed() { + bool IsClosed() const { return current_slot >= bank_size; } - bool IsDead() { + bool IsDead() const { return IsClosed() && references == 0; } }; diff --git a/src/video_core/query_cache/query_base.h b/src/video_core/query_cache/query_base.h index 0ae23af9f..993a13eac 100644 --- a/src/video_core/query_cache/query_base.h +++ b/src/video_core/query_cache/query_base.h @@ -9,28 +9,28 @@ namespace VideoCommon { enum class QueryFlagBits : u32 { - HasTimestamp = 1 << 0, ///< Indicates if this query has a tiemstamp. - IsFinalValueSynced = 1 << 1, ///< Indicates if the query has been synced in the host - IsHostSynced = 1 << 2, ///< Indicates if the query has been synced in the host - IsGuestSynced = 1 << 3, ///< Indicates if the query has been synced with the guest. - IsHostManaged = 1 << 4, ///< Indicates if this query points to a host query - IsRewritten = 1 << 5, ///< Indicates if this query was rewritten by another query - IsInvalidated = 1 << 6, ///< Indicates the value of th query has been nullified. - IsOrphan = 1 << 7, ///< Indicates the query has not been set by a guest query. - IsFence = 1 << 8, ///< Indicates the query is a fence. - IsQueuedForAsyncFlush = 1 <<9,///< Indicates that the query can be flushed at any moment + HasTimestamp = 1 << 0, ///< Indicates if this query has a timestamp. + IsFinalValueSynced = 1 << 1, ///< Indicates if the query has been synced in the host + IsHostSynced = 1 << 2, ///< Indicates if the query has been synced in the host + IsGuestSynced = 1 << 3, ///< Indicates if the query has been synced with the guest. + IsHostManaged = 1 << 4, ///< Indicates if this query points to a host query + IsRewritten = 1 << 5, ///< Indicates if this query was rewritten by another query + IsInvalidated = 1 << 6, ///< Indicates the value of th query has been nullified. + IsOrphan = 1 << 7, ///< Indicates the query has not been set by a guest query. + IsFence = 1 << 8, ///< Indicates the query is a fence. + IsQueuedForAsyncFlush = 1 << 9, ///< Indicates that the query can be flushed at any moment }; DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits) class QueryBase { public: - VAddr guest_address; - QueryFlagBits flags; - u64 value; + VAddr guest_address{}; + QueryFlagBits flags{}; + u64 value{}; protected: // Default constructor - QueryBase() : guest_address(0), flags{}, value{} {} + QueryBase() = default; // Parameterized constructor QueryBase(VAddr address, QueryFlagBits flags_, u64 value_) @@ -51,23 +51,21 @@ public: class HostQueryBase : public QueryBase { public: // Default constructor - HostQueryBase() - : QueryBase(0, QueryFlagBits::IsHostManaged | QueryFlagBits::IsOrphan, 0), start_bank_id{}, - size_banks{}, start_slot{}, size_slots{} {} + HostQueryBase() : QueryBase(0, QueryFlagBits::IsHostManaged | QueryFlagBits::IsOrphan, 0) {} // Parameterized constructor - HostQueryBase(bool isLong, VAddr address) + HostQueryBase(bool has_timestamp, VAddr address) : QueryBase(address, QueryFlagBits::IsHostManaged, 0), start_bank_id{}, size_banks{}, start_slot{}, size_slots{} { - if (isLong) { + if (has_timestamp) { flags |= QueryFlagBits::HasTimestamp; } } - u32 start_bank_id; - u32 size_banks; - size_t start_slot; - size_t size_slots; + u32 start_bank_id{}; + u32 size_banks{}; + size_t start_slot{}; + size_t size_slots{}; }; } // namespace VideoCommon \ No newline at end of file diff --git a/src/video_core/query_cache/query_cache.h b/src/video_core/query_cache/query_cache.h index f1393d5c7..042af053c 100644 --- a/src/video_core/query_cache/query_cache.h +++ b/src/video_core/query_cache/query_cache.h @@ -54,7 +54,7 @@ public: return new_id; } - bool HasPendingSync() override { + bool HasPendingSync() const override { return !pending_sync.empty(); } @@ -71,8 +71,10 @@ public: continue; } query.flags |= QueryFlagBits::IsHostSynced; - sync_values.emplace_back(query.guest_address, query.value, - True(query.flags & QueryFlagBits::HasTimestamp) ? 8 : 4); + sync_values.emplace_back(SyncValuesStruct{ + .address = query.guest_address, + .value = query.value, + .size = static_cast(True(query.flags & QueryFlagBits::HasTimestamp) ? 8 : 4)}); } pending_sync.clear(); if (sync_values.size() > 0) { @@ -90,15 +92,20 @@ class StubStreamer : public GuestStreamer { public: using RuntimeType = typename Traits::RuntimeType; - StubStreamer(size_t id_, RuntimeType& runtime_) : GuestStreamer(id_, runtime_) {} + StubStreamer(size_t id_, RuntimeType& runtime_, u32 stub_value_) + : GuestStreamer(id_, runtime_), stub_value{stub_value_} {} ~StubStreamer() override = default; size_t WriteCounter(VAddr address, bool has_timestamp, [[maybe_unused]] u32 value, std::optional subreport = std::nullopt) override { - size_t new_id = GuestStreamer::WriteCounter(address, has_timestamp, 1U, subreport); + size_t new_id = + GuestStreamer::WriteCounter(address, has_timestamp, stub_value, subreport); return new_id; } + +private: + u32 stub_value; }; template @@ -113,7 +120,7 @@ struct QueryCacheBase::QueryCacheBaseImpl { for (size_t i = 0; i < static_cast(QueryType::MaxQueryTypes); i++) { streamers[i] = runtime.GetStreamerInterface(static_cast(i)); if (streamers[i]) { - streamer_mask |= 1ULL << i; + streamer_mask |= 1ULL << streamers[i]->GetId(); } } } @@ -152,7 +159,7 @@ struct QueryCacheBase::QueryCacheBaseImpl { QueryCacheBase* owner; VideoCore::RasterizerInterface& rasterizer; Core::Memory::Memory& cpu_memory; - Traits::RuntimeType& runtime; + RuntimeType& runtime; Tegra::GPU& gpu; std::array(QueryType::MaxQueryTypes)> streamers; u64 streamer_mask; @@ -223,15 +230,11 @@ void QueryCacheBase::CounterReport(GPUVAddr addr, QueryType counter_type const bool is_fence = True(flags & QueryPropertiesFlags::IsAFence); size_t streamer_id = static_cast(counter_type); auto* streamer = impl->streamers[streamer_id]; - if (!streamer) [[unlikely]] { - if (has_timestamp) { - u64 timestamp = impl->gpu.GetTicks(); - gpu_memory->Write(addr + 8, timestamp); - gpu_memory->Write(addr, 1ULL); - } else { - gpu_memory->Write(addr, 1U); - } - return; + if (streamer == nullptr) [[unlikely]] { + counter_type = QueryType::Payload; + payload = 1U; + streamer_id = static_cast(counter_type); + streamer = impl->streamers[streamer_id]; } auto cpu_addr_opt = gpu_memory->GpuToCpuAddress(addr); if (!cpu_addr_opt) [[unlikely]] { @@ -403,12 +406,6 @@ bool QueryCacheBase::AccelerateHostConditionalRendering() { impl->runtime.EndHostConditionalRendering(); return false; } - /*if (!Settings::IsGPULevelHigh()) { - impl->runtime.EndHostConditionalRendering(); - return gpu_memory->IsMemoryDirty(regs.render_enable.Address(), 24, - VideoCommon::CacheType::BufferCache | - VideoCommon::CacheType::QueryCache); - }*/ const ComparisonMode mode = static_cast(regs.render_enable.mode); const GPUVAddr address = regs.render_enable.Address(); switch (mode) { @@ -442,6 +439,9 @@ bool QueryCacheBase::AccelerateHostConditionalRendering() { // Async downloads template void QueryCacheBase::CommitAsyncFlushes() { + // Make sure to have the results synced in Host. + NotifyWFI(); + u64 mask{}; { std::scoped_lock lk(impl->flush_guard); @@ -458,8 +458,19 @@ void QueryCacheBase::CommitAsyncFlushes() { if (mask == 0) { return; } - impl->ForEachStreamerIn(mask, - [](StreamerInterface* streamer) { streamer->PushUnsyncedQueries(); }); + u64 ran_mask = ~mask; + while (mask) { + impl->ForEachStreamerIn(mask, [&mask, &ran_mask](StreamerInterface* streamer) { + u64 dep_mask = streamer->GetDependentMask(); + if ((dep_mask & ~ran_mask) != 0) { + return; + } + u64 index = streamer->GetId(); + ran_mask |= (1ULL << index); + mask &= ~(1ULL << index); + streamer->PushUnsyncedQueries(); + }); + } } template @@ -489,13 +500,11 @@ void QueryCacheBase::PopAsyncFlushes() { if (mask == 0) { return; } - u64 ran_mask = 0; - u64 next_phase = 0; + u64 ran_mask = ~mask; while (mask) { - impl->ForEachStreamerIn(mask, [&mask, &ran_mask, &next_phase](StreamerInterface* streamer) { + impl->ForEachStreamerIn(mask, [&mask, &ran_mask](StreamerInterface* streamer) { u64 dep_mask = streamer->GetDependenceMask(); if ((dep_mask & ~ran_mask) != 0) { - next_phase |= dep_mask; return; } u64 index = streamer->GetId(); @@ -503,7 +512,6 @@ void QueryCacheBase::PopAsyncFlushes() { mask &= ~(1ULL << index); streamer->PopUnsyncedQueries(); }); - ran_mask |= next_phase; } } diff --git a/src/video_core/query_cache/query_cache_base.h b/src/video_core/query_cache/query_cache_base.h index 55f508dd1..07be421c6 100644 --- a/src/video_core/query_cache/query_cache_base.h +++ b/src/video_core/query_cache/query_cache_base.h @@ -47,7 +47,7 @@ public: BitField<0, 27, u32> query_id; u32 raw; - std::pair unpack() { + std::pair unpack() const { return {static_cast(stream_id.Value()), static_cast(query_id.Value())}; } }; @@ -73,7 +73,7 @@ public: } } - static u64 BuildMask(std::span types) { + static u64 BuildMask(std::span types) { u64 mask = 0; for (auto query_type : types) { mask |= 1ULL << (static_cast(query_type)); @@ -160,7 +160,7 @@ protected: } } - using ContentCache = typename std::unordered_map>; + using ContentCache = std::unordered_map>; void InvalidateQuery(QueryLocation location); bool IsQueryDirty(QueryLocation location); @@ -175,7 +175,7 @@ protected: friend struct QueryCacheBaseImpl; friend RuntimeType; - std::unique_ptr impl; + std::unique_ptr impl; }; } // namespace VideoCommon \ No newline at end of file diff --git a/src/video_core/query_cache/query_stream.h b/src/video_core/query_cache/query_stream.h index 0e9275565..e7aac955b 100644 --- a/src/video_core/query_cache/query_stream.h +++ b/src/video_core/query_cache/query_stream.h @@ -16,7 +16,7 @@ namespace VideoCommon { class StreamerInterface { public: - StreamerInterface(size_t id_, u64 dependance_mask_ = 0) : id{id_}, dependance_mask{dependance_mask_} {} + explicit StreamerInterface(size_t id_) : id{id_}, dependence_mask{}, dependent_mask{} {} virtual ~StreamerInterface() = default; virtual QueryBase* GetQuery(size_t id) = 0; @@ -37,7 +37,7 @@ public: /* Do Nothing */ } - virtual bool HasPendingSync() { + virtual bool HasPendingSync() const { return false; } @@ -52,7 +52,7 @@ public: virtual size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, std::optional subreport = std::nullopt) = 0; - virtual bool HasUnsyncedQueries() { + virtual bool HasUnsyncedQueries() const { return false; } @@ -71,18 +71,28 @@ public: } u64 GetDependenceMask() const { - return dependance_mask; + return dependence_mask; + } + + u64 GetDependentMask() const { + return dependence_mask; } protected: + void MakeDependent(StreamerInterface* depend_on) { + dependence_mask |= 1ULL << depend_on->id; + depend_on->dependent_mask |= 1ULL << id; + } + const size_t id; - const u64 dependance_mask; + u64 dependence_mask; + u64 dependent_mask; }; template class SimpleStreamer : public StreamerInterface { public: - SimpleStreamer(size_t id_, u64 dependance_mask_ = 0) : StreamerInterface{id_, dependance_mask_} {} + explicit SimpleStreamer(size_t id_) : StreamerInterface{id_} {} virtual ~SimpleStreamer() = default; protected: diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 2ba7cbb0d..af1469147 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -9,10 +9,10 @@ #include #include "common/common_types.h" #include "common/polyfill_thread.h" -#include "video_core/query_cache/types.h" #include "video_core/cache_types.h" #include "video_core/engines/fermi_2d.h" #include "video_core/gpu.h" +#include "video_core/query_cache/types.h" #include "video_core/rasterizer_download_area.h" namespace Tegra { @@ -57,7 +57,8 @@ public: virtual void ResetCounter(VideoCommon::QueryType type) = 0; /// Records a GPU query and caches it - virtual void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) = 0; + virtual void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, + VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) = 0; /// Signal an uniform buffer binding virtual void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, diff --git a/src/video_core/renderer_null/null_rasterizer.h b/src/video_core/renderer_null/null_rasterizer.h index 57a8c4c85..23001eeb8 100644 --- a/src/video_core/renderer_null/null_rasterizer.h +++ b/src/video_core/renderer_null/null_rasterizer.h @@ -43,7 +43,8 @@ public: void Clear(u32 layer_count) override; void DispatchCompute() override; void ResetCounter(VideoCommon::QueryType type) override; - void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override; + void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, + VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override; void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; void FlushAll() override; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index a975bbe75..27e2de1bf 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -405,8 +405,6 @@ void RasterizerOpenGL::ResetCounter(VideoCommon::QueryType type) { void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) { if (type == VideoCommon::QueryType::ZPassPixelCount64) { - std::optional timestamp{True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout) - ? std::make_optional(gpu.GetTicks()) : std:: nullopt }; if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) { query_cache.Query(gpu_addr, VideoCore::QueryType::SamplesPassed, {gpu.GetTicks()}); } else { @@ -414,13 +412,23 @@ void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, } return; } - if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) { - u64 ticks = gpu.GetTicks(); - gpu_memory->Write(gpu_addr + 8, ticks); - gpu_memory->Write(gpu_addr, static_cast(payload)); - } else { - gpu_memory->Write(gpu_addr, payload); + if (type != VideoCommon::QueryType::Payload) { + payload = 1u; + } + std::function func([this, gpu_addr, flags, memory_manager = gpu_memory, payload]() { + if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) { + u64 ticks = gpu.GetTicks(); + memory_manager->Write(gpu_addr + 8, ticks); + memory_manager->Write(gpu_addr, static_cast(payload)); + } else { + memory_manager->Write(gpu_addr, payload); + } + }); + if (True(flags & VideoCommon::QueryPropertiesFlags::IsAFence)) { + SignalFence(std::move(func)); + return; } + func(); } void RasterizerOpenGL::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 05e048e15..ceffe1f1e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -87,7 +87,8 @@ public: void Clear(u32 layer_count) override; void DispatchCompute() override; void ResetCounter(VideoCommon::QueryType type) override; - void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override; + void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, + VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override; void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; void FlushAll() override; diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 97cd4521d..039dc95e1 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -303,9 +303,9 @@ std::pair QuadIndexedPass::Assemble( return {staging.buffer, staging.offset}; } -ConditionalRenderingResolvePass::ConditionalRenderingResolvePass(const Device& device_, - Scheduler& scheduler_, - DescriptorPool& descriptor_pool_, ComputePassDescriptorQueue& compute_pass_descriptor_queue_) +ConditionalRenderingResolvePass::ConditionalRenderingResolvePass( + const Device& device_, Scheduler& scheduler_, DescriptorPool& descriptor_pool_, + ComputePassDescriptorQueue& compute_pass_descriptor_queue_) : ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS, INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, nullptr, RESOLVE_CONDITIONAL_RENDER_COMP_SPV), diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h index 14fc5ad71..336573574 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.h +++ b/src/video_core/renderer_vulkan/vk_fence_manager.h @@ -7,8 +7,8 @@ #include "video_core/fence_manager.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" -#include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_query_cache.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" namespace Core { class System; diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index ef891e26b..add0c6fb3 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -11,11 +11,9 @@ #include #include -#include -#include - #include "common/common_types.h" #include "core/memory.h" +#include "video_core/engines/draw_manager.h" #include "video_core/query_cache/query_cache.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_compute_pass.h" @@ -30,6 +28,7 @@ namespace Vulkan { +using Tegra::Engines::Maxwell3D; using VideoCommon::QueryType; namespace { @@ -37,7 +36,7 @@ class SamplesQueryBank : public VideoCommon::BankBase { public: static constexpr size_t BANK_SIZE = 256; static constexpr size_t QUERY_SIZE = 8; - SamplesQueryBank(const Device& device_, size_t index_) + explicit SamplesQueryBank(const Device& device_, size_t index_) : BankBase(BANK_SIZE), device{device_}, index{index_} { const auto& dev = device.GetLogical(); query_pool = dev.CreateQueryPool({ @@ -109,18 +108,19 @@ struct HostSyncValues { static constexpr bool GeneratesBaseBuffer = false; }; -template class SamplesStreamer : public BaseStreamer { public: - SamplesStreamer(size_t id, QueryCacheRuntime& runtime_, const Device& device_, - Scheduler& scheduler_, const MemoryAllocator& memory_allocator_) - : BaseStreamer(id), runtime{runtime_}, device{device_}, scheduler{scheduler_}, + explicit SamplesStreamer(size_t id_, QueryCacheRuntime& runtime_, const Device& device_, + Scheduler& scheduler_, const MemoryAllocator& memory_allocator_) + : BaseStreamer(id_), runtime{runtime_}, device{device_}, scheduler{scheduler_}, memory_allocator{memory_allocator_} { BuildResolveBuffer(); current_bank = nullptr; current_query = nullptr; } + ~SamplesStreamer() = default; + void StartCounter() override { if (has_started) { return; @@ -157,7 +157,7 @@ public: PauseCounter(); } - bool HasPendingSync() override { + bool HasPendingSync() const override { return !pending_sync.empty(); } @@ -198,7 +198,7 @@ public: } resolve_slots_remaining = resolve_slots; sync_values_stash.emplace_back(); - sync_values = sync_values = &sync_values_stash.back(); + sync_values = &sync_values_stash.back(); sync_values->reserve(resolve_slots * SamplesQueryBank::BANK_SIZE); } resolve_slots_remaining--; @@ -207,6 +207,7 @@ public: const size_t base_offset = SamplesQueryBank::QUERY_SIZE * SamplesQueryBank::BANK_SIZE * (resolve_slots - resolve_slots_remaining - 1); VkQueryPool query_pool = bank->GetInnerPool(); + scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([start, amount, base_offset, query_pool, buffer = *resolve_buffer](vk::CommandBuffer cmdbuf) { size_t final_offset = base_offset + start * SamplesQueryBank::QUERY_SIZE; @@ -284,7 +285,7 @@ public: return index; } - bool HasUnsyncedQueries() override { + bool HasUnsyncedQueries() const override { return !pending_flush_queries.empty(); } @@ -348,8 +349,8 @@ private: for (auto q : queries) { auto* query = GetQuery(q); ApplyBankOp(query, [&indexer](SamplesQueryBank* bank, size_t start, size_t amount) { - auto id = bank->GetIndex(); - auto pair = indexer.try_emplace(id, std::numeric_limits::max(), + auto id_ = bank->GetIndex(); + auto pair = indexer.try_emplace(id_, std::numeric_limits::max(), std::numeric_limits::min()); auto& current_pair = pair.first->second; current_pair.first = std::min(current_pair.first, start); @@ -434,13 +435,14 @@ private: .pNext = nullptr, .flags = 0, .size = SamplesQueryBank::QUERY_SIZE * SamplesQueryBank::BANK_SIZE * resolve_slots, - .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, + .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, }; resolve_buffers.emplace_back( - std::move(memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal))); + memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal)); } static constexpr size_t resolve_slots = 8; @@ -476,7 +478,8 @@ class TFBQueryBank : public VideoCommon::BankBase { public: static constexpr size_t BANK_SIZE = 1024; static constexpr size_t QUERY_SIZE = 4; - TFBQueryBank(Scheduler& scheduler_, const MemoryAllocator& memory_allocator, size_t index_) + explicit TFBQueryBank(Scheduler& scheduler_, const MemoryAllocator& memory_allocator, + size_t index_) : BankBase(BANK_SIZE), scheduler{scheduler_}, index{index_} { const VkBufferCreateInfo buffer_ci = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, @@ -525,22 +528,21 @@ private: vk::Buffer buffer; }; -template class PrimitivesSucceededStreamer; -template class TFBCounterStreamer : public BaseStreamer { public: - TFBCounterStreamer(size_t id, QueryCacheRuntime& runtime_, const Device& device_, - Scheduler& scheduler_, const MemoryAllocator& memory_allocator_, - StagingBufferPool& staging_pool_) - : BaseStreamer(id), runtime{runtime_}, device{device_}, scheduler{scheduler_}, + explicit TFBCounterStreamer(size_t id_, QueryCacheRuntime& runtime_, const Device& device_, + Scheduler& scheduler_, const MemoryAllocator& memory_allocator_, + StagingBufferPool& staging_pool_) + : BaseStreamer(id_), runtime{runtime_}, device{device_}, scheduler{scheduler_}, memory_allocator{memory_allocator_}, staging_pool{staging_pool_} { buffers_count = 0; current_bank = nullptr; counter_buffers.fill(VK_NULL_HANDLE); offsets.fill(0); last_queries.fill(0); + last_queries_stride.fill(1); const VkBufferCreateInfo buffer_ci = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, @@ -564,6 +566,8 @@ public: } } + ~TFBCounterStreamer() = default; + void StartCounter() override { FlushBeginTFB(); has_started = true; @@ -581,15 +585,15 @@ public: if (has_flushed_end_pending) { FlushEndTFB(); } - runtime.View3DRegs([this](Tegra::Engines::Maxwell3D::Regs& regs) { - if (regs.transform_feedback_enabled == 0) { + runtime.View3DRegs([this](Maxwell3D& maxwell3d) { + if (maxwell3d.regs.transform_feedback_enabled == 0) { streams_mask = 0; has_started = false; } }); } - bool HasPendingSync() override { + bool HasPendingSync() const override { return !pending_sync.empty(); } @@ -650,14 +654,19 @@ public: return index; } - std::optional GetLastQueryStream(size_t stream) { + std::optional> GetLastQueryStream(size_t stream) { if (last_queries[stream] != 0) { - return {last_queries[stream]}; + std::pair result(last_queries[stream], last_queries_stride[stream]); + return result; } return std::nullopt; } - bool HasUnsyncedQueries() override { + Maxwell3D::Regs::PrimitiveTopology GetOutputTopology() const { + return out_topology; + } + + bool HasUnsyncedQueries() const override { return !pending_flush_queries.empty(); } @@ -762,15 +771,17 @@ private: void UpdateBuffers() { last_queries.fill(0); - runtime.View3DRegs([this](Tegra::Engines::Maxwell3D::Regs& regs) { + last_queries_stride.fill(1); + runtime.View3DRegs([this](Maxwell3D& maxwell3d) { buffers_count = 0; - for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers; - i++) { - const auto& tf = regs.transform_feedback; + out_topology = maxwell3d.draw_manager->GetDrawState().topology; + for (size_t i = 0; i < Maxwell3D::Regs::NumTransformFeedbackBuffers; i++) { + const auto& tf = maxwell3d.regs.transform_feedback; if (tf.buffers[i].enable == 0) { continue; } const size_t stream = tf.controls[i].stream; + last_queries_stride[stream] = tf.controls[i].stride; streams_mask |= 1ULL << stream; buffers_count = std::max(buffers_count, stream + 1); } @@ -785,7 +796,8 @@ private: }); current_bank = &bank_pool.GetBank(current_bank_id); } - auto [dont_care, slot] = current_bank->Reserve(); + auto [dont_care, other] = current_bank->Reserve(); + const size_t slot = other; // workaround to compile bug. current_bank->AddReference(); static constexpr VkMemoryBarrier READ_BARRIER{ @@ -818,11 +830,9 @@ private: return {current_bank_id, slot}; } - template friend class PrimitivesSucceededStreamer; static constexpr size_t NUM_STREAMS = 4; - static constexpr size_t STREAMS_MASK = (1ULL << NUM_STREAMS) - 1ULL; QueryCacheRuntime& runtime; const Device& device; @@ -851,6 +861,8 @@ private: std::array counter_buffers{}; std::array offsets{}; std::array last_queries; + std::array last_queries_stride; + Maxwell3D::Regs::PrimitiveTopology out_topology; u64 streams_mask; }; @@ -858,32 +870,34 @@ class PrimitivesQueryBase : public VideoCommon::QueryBase { public: // Default constructor PrimitivesQueryBase() - : VideoCommon::QueryBase(0, VideoCommon::QueryFlagBits::IsHostManaged, 0), stride{}, - dependant_index{}, dependant_manage{} {} + : VideoCommon::QueryBase(0, VideoCommon::QueryFlagBits::IsHostManaged, 0) {} // Parameterized constructor - PrimitivesQueryBase(bool is_long, VAddr address) - : VideoCommon::QueryBase(address, VideoCommon::QueryFlagBits::IsHostManaged, 0), stride{}, - dependant_index{}, dependant_manage{} { - if (is_long) { + PrimitivesQueryBase(bool has_timestamp, VAddr address) + : VideoCommon::QueryBase(address, VideoCommon::QueryFlagBits::IsHostManaged, 0) { + if (has_timestamp) { flags |= VideoCommon::QueryFlagBits::HasTimestamp; } } - u64 stride; - VAddr dependant_address; - size_t dependant_index; - bool dependant_manage; + u64 stride{}; + VAddr dependant_address{}; + Maxwell3D::Regs::PrimitiveTopology topology{Maxwell3D::Regs::PrimitiveTopology::Points}; + size_t dependant_index{}; + bool dependant_manage{}; }; -template class PrimitivesSucceededStreamer : public VideoCommon::SimpleStreamer { public: - PrimitivesSucceededStreamer(size_t id, QueryCacheRuntime& runtime_, - TFBCounterStreamer& tfb_streamer_, Core::Memory::Memory& cpu_memory_) - : VideoCommon::SimpleStreamer( - id, 1ULL << static_cast(VideoCommon::QueryType::StreamingByteCount)), - runtime{runtime_}, tfb_streamer{tfb_streamer_}, cpu_memory{cpu_memory_} {} + explicit PrimitivesSucceededStreamer(size_t id_, QueryCacheRuntime& runtime_, + TFBCounterStreamer& tfb_streamer_, + Core::Memory::Memory& cpu_memory_) + : VideoCommon::SimpleStreamer(id_), runtime{runtime_}, + tfb_streamer{tfb_streamer_}, cpu_memory{cpu_memory_} { + MakeDependent(&tfb_streamer); + } + + ~PrimitivesSucceededStreamer() = default; size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, std::optional subreport_) override { @@ -901,8 +915,11 @@ public: const size_t subreport = static_cast(*subreport_); auto dependant_address_opt = tfb_streamer.GetLastQueryStream(subreport); bool must_manage_dependance = false; + new_query->topology = tfb_streamer.GetOutputTopology(); if (dependant_address_opt) { - new_query->dependant_address = *dependant_address_opt; + auto [dep_address, stride] = *dependant_address_opt; + new_query->dependant_address = dep_address; + new_query->stride = stride; } else { new_query->dependant_index = tfb_streamer.WriteCounter(address, has_timestamp, value, subreport_); @@ -917,25 +934,28 @@ public: } return index; } + new_query->stride = 1; + runtime.View3DRegs([new_query, subreport](Maxwell3D& maxwell3d) { + for (size_t i = 0; i < Maxwell3D::Regs::NumTransformFeedbackBuffers; i++) { + const auto& tf = maxwell3d.regs.transform_feedback; + if (tf.buffers[i].enable == 0) { + continue; + } + if (tf.controls[i].stream != subreport) { + continue; + } + new_query->stride = tf.controls[i].stride; + break; + } + }); } new_query->dependant_manage = must_manage_dependance; - runtime.View3DRegs([new_query, subreport](Tegra::Engines::Maxwell3D::Regs& regs) { - for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers; - i++) { - const auto& tf = regs.transform_feedback; - if (tf.controls[i].stream != subreport) { - continue; - } - new_query->stride = tf.controls[i].stride; - break; - } - }); pending_flush_queries.push_back(index); return index; } - bool HasUnsyncedQueries() override { + bool HasUnsyncedQueries() const override { return !pending_flush_queries.empty(); } @@ -960,22 +980,49 @@ public: } query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced; + u64 num_vertices = 0; if (query->dependant_manage) { auto* dependant_query = tfb_streamer.GetQuery(query->dependant_index); - query->value = dependant_query->value / query->stride; + num_vertices = dependant_query->value / query->stride; tfb_streamer.Free(query->dependant_index); } else { u8* pointer = cpu_memory.GetPointer(query->dependant_address); u32 result; std::memcpy(&result, pointer, sizeof(u32)); - query->value = static_cast(result) / query->stride; + num_vertices = static_cast(result) / query->stride; } + query->value = [&]() -> u64 { + switch (query->topology) { + case Maxwell3D::Regs::PrimitiveTopology::Points: + return num_vertices; + case Maxwell3D::Regs::PrimitiveTopology::Lines: + return num_vertices / 2; + case Maxwell3D::Regs::PrimitiveTopology::LineLoop: + return (num_vertices / 2) + 1; + case Maxwell3D::Regs::PrimitiveTopology::LineStrip: + return num_vertices - 1; + case Maxwell3D::Regs::PrimitiveTopology::Patches: + case Maxwell3D::Regs::PrimitiveTopology::Triangles: + case Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency: + return num_vertices / 3; + case Maxwell3D::Regs::PrimitiveTopology::TriangleFan: + case Maxwell3D::Regs::PrimitiveTopology::TriangleStrip: + case Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency: + return num_vertices - 2; + case Maxwell3D::Regs::PrimitiveTopology::Quads: + return num_vertices / 4; + case Maxwell3D::Regs::PrimitiveTopology::Polygon: + return 1U; + default: + return num_vertices; + } + }(); } } private: QueryCacheRuntime& runtime; - TFBCounterStreamer& tfb_streamer; + TFBCounterStreamer& tfb_streamer; Core::Memory::Memory& cpu_memory; // syncing queue @@ -1005,7 +1052,10 @@ struct QueryCacheRuntimeImpl { tfb_streamer(static_cast(QueryType::StreamingByteCount), runtime, device, scheduler, memory_allocator, staging_pool), primitives_succeeded_streamer( - static_cast(QueryType::StreamingPrimitivesSucceeded), runtime, tfb_streamer, cpu_memory_), + static_cast(QueryType::StreamingPrimitivesSucceeded), runtime, tfb_streamer, + cpu_memory_), + primitives_needed_minus_suceeded_streamer( + static_cast(QueryType::StreamingPrimitivesNeededMinusSucceeded), runtime, 0u), hcr_setup{}, hcr_is_set{}, is_hcr_running{} { hcr_setup.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT; @@ -1040,9 +1090,10 @@ struct QueryCacheRuntimeImpl { // Streamers VideoCommon::GuestStreamer guest_streamer; - SamplesStreamer sample_streamer; - TFBCounterStreamer tfb_streamer; - PrimitivesSucceededStreamer primitives_succeeded_streamer; + SamplesStreamer sample_streamer; + TFBCounterStreamer tfb_streamer; + PrimitivesSucceededStreamer primitives_succeeded_streamer; + VideoCommon::StubStreamer primitives_needed_minus_suceeded_streamer; std::vector> little_cache; std::vector> buffers_to_upload_to; @@ -1059,7 +1110,7 @@ struct QueryCacheRuntimeImpl { bool is_hcr_running; // maxwell3d - Tegra::Engines::Maxwell3D* maxwell3d; + Maxwell3D* maxwell3d; }; QueryCacheRuntime::QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer, @@ -1074,13 +1125,13 @@ QueryCacheRuntime::QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer, staging_pool_, compute_pass_descriptor_queue, descriptor_pool); } -void QueryCacheRuntime::Bind3DEngine(Tegra::Engines::Maxwell3D* maxwell3d) { +void QueryCacheRuntime::Bind3DEngine(Maxwell3D* maxwell3d) { impl->maxwell3d = maxwell3d; } template void QueryCacheRuntime::View3DRegs(Func&& func) { - func(impl->maxwell3d->regs); + func(*impl->maxwell3d); } void QueryCacheRuntime::EndHostConditionalRendering() { @@ -1240,8 +1291,12 @@ VideoCommon::StreamerInterface* QueryCacheRuntime::GetStreamerInterface(QueryTyp return &impl->sample_streamer; case QueryType::StreamingByteCount: return &impl->tfb_streamer; + case QueryType::StreamingPrimitivesNeeded: + case QueryType::VtgPrimitivesOut: case QueryType::StreamingPrimitivesSucceeded: return &impl->primitives_succeeded_streamer; + case QueryType::StreamingPrimitivesNeededMinusSucceeded: + return &impl->primitives_needed_minus_suceeded_streamer; default: return nullptr; } diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h index 9ad2929d7..e9a1ea169 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.h +++ b/src/video_core/renderer_vulkan/vk_query_cache.h @@ -49,7 +49,8 @@ public: bool HostConditionalRenderingCompareValue(VideoCommon::LookupData object_1, bool qc_dirty); bool HostConditionalRenderingCompareValues(VideoCommon::LookupData object_1, - VideoCommon::LookupData object_2, bool qc_dirty, bool equal_check); + VideoCommon::LookupData object_2, bool qc_dirty, + bool equal_check); VideoCommon::StreamerInterface* GetStreamerInterface(VideoCommon::QueryType query_type); @@ -66,7 +67,7 @@ private: }; struct QueryCacheParams { - using RuntimeType = Vulkan::QueryCacheRuntime; + using RuntimeType = typename Vulkan::QueryCacheRuntime; }; using QueryCache = VideoCommon::QueryCacheBase; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index e8862ba04..c7ce7c312 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -194,15 +194,6 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) { query_cache.NotifySegment(true); -#if ANDROID - if (Settings::IsGPULevelHigh()) { - // This is problematic on Android, disable on GPU Normal. - // query_cache.UpdateCounters(); - } -#else - // query_cache.UpdateCounters(); -#endif - GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()}; if (!pipeline) { return; @@ -294,15 +285,6 @@ void RasterizerVulkan::DrawTexture() { query_cache.NotifySegment(true); -#if ANDROID - if (Settings::IsGPULevelHigh()) { - // This is problematic on Android, disable on GPU Normal. - // query_cache.UpdateCounters(); - } -#else - // query_cache.UpdateCounters(); -#endif - texture_cache.SynchronizeGraphicsDescriptors(); texture_cache.UpdateRenderTargets(false); @@ -332,15 +314,6 @@ void RasterizerVulkan::Clear(u32 layer_count) { FlushWork(); gpu_memory->FlushCaching(); -#if ANDROID - if (Settings::IsGPULevelHigh()) { - // This is problematic on Android, disable on GPU Normal. - // query_cache.UpdateCounters(); - } -#else - // query_cache.UpdateCounters(); -#endif - query_cache.NotifySegment(true); query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, maxwell3d->regs.zpass_pixel_count_enable); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index ffd44c68d..ad069556c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -85,7 +85,8 @@ public: void Clear(u32 layer_count) override; void DispatchCompute() override; void ResetCounter(VideoCommon::QueryType type) override; - void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override; + void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, + VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override; void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; void FlushAll() override; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index c87e5fb07..da03803aa 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -15,9 +15,13 @@ #include "common/common_types.h" #include "common/polyfill_thread.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h" -#include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/vulkan_common/vulkan_wrapper.h" +namespace VideoCommon { +template +class QueryCacheBase; +} + namespace Vulkan { class CommandPool; @@ -26,6 +30,8 @@ class Framebuffer; class GraphicsPipeline; class StateTracker; +struct QueryCacheParams; + /// The scheduler abstracts command buffer and fence management with an interface that's able to do /// OpenGL-like operations on Vulkan command buffers. class Scheduler { @@ -63,7 +69,7 @@ public: void InvalidateState(); /// Assigns the query cache. - void SetQueryCache(QueryCache& query_cache_) { + void SetQueryCache(VideoCommon::QueryCacheBase& query_cache_) { query_cache = &query_cache_; } @@ -219,7 +225,7 @@ private: std::unique_ptr master_semaphore; std::unique_ptr command_pool; - QueryCache* query_cache = nullptr; + VideoCommon::QueryCacheBase* query_cache = nullptr; vk::CommandBuffer current_cmdbuf; -- cgit v1.2.3 From 2fea1b8407b66dd0e9ed1776c34dad043e1becf4 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 19 Aug 2023 21:49:38 +0200 Subject: Query Cache: Fix guest side sample counting --- src/video_core/engines/maxwell_3d.cpp | 6 --- src/video_core/query_cache/query_base.h | 19 ++++--- src/video_core/query_cache/query_cache.h | 46 +++++++++-------- src/video_core/query_cache/query_stream.h | 10 ++++ src/video_core/renderer_vulkan/vk_query_cache.cpp | 62 ++++++++++++++++++++--- 5 files changed, 97 insertions(+), 46 deletions(-) (limited to 'src/video_core/query_cache') diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 46b9c548a..32d767d85 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -586,12 +586,6 @@ void Maxwell3D::ProcessQueryCondition() { } void Maxwell3D::ProcessCounterReset() { -#if ANDROID - if (!Settings::IsGPULevelHigh()) { - // This is problematic on Android, disable on GPU Normal. - return; - } -#endif switch (regs.clear_report_value) { case Regs::ClearReport::ZPassPixelCount: rasterizer->ResetCounter(VideoCommon::QueryType::ZPassPixelCount64); diff --git a/src/video_core/query_cache/query_base.h b/src/video_core/query_cache/query_base.h index 993a13eac..1d786b3a7 100644 --- a/src/video_core/query_cache/query_base.h +++ b/src/video_core/query_cache/query_base.h @@ -9,16 +9,15 @@ namespace VideoCommon { enum class QueryFlagBits : u32 { - HasTimestamp = 1 << 0, ///< Indicates if this query has a timestamp. - IsFinalValueSynced = 1 << 1, ///< Indicates if the query has been synced in the host - IsHostSynced = 1 << 2, ///< Indicates if the query has been synced in the host - IsGuestSynced = 1 << 3, ///< Indicates if the query has been synced with the guest. - IsHostManaged = 1 << 4, ///< Indicates if this query points to a host query - IsRewritten = 1 << 5, ///< Indicates if this query was rewritten by another query - IsInvalidated = 1 << 6, ///< Indicates the value of th query has been nullified. - IsOrphan = 1 << 7, ///< Indicates the query has not been set by a guest query. - IsFence = 1 << 8, ///< Indicates the query is a fence. - IsQueuedForAsyncFlush = 1 << 9, ///< Indicates that the query can be flushed at any moment + HasTimestamp = 1 << 0, ///< Indicates if this query has a timestamp. + IsFinalValueSynced = 1 << 1, ///< Indicates if the query has been synced in the host + IsHostSynced = 1 << 2, ///< Indicates if the query has been synced in the host + IsGuestSynced = 1 << 3, ///< Indicates if the query has been synced with the guest. + IsHostManaged = 1 << 4, ///< Indicates if this query points to a host query + IsRewritten = 1 << 5, ///< Indicates if this query was rewritten by another query + IsInvalidated = 1 << 6, ///< Indicates the value of th query has been nullified. + IsOrphan = 1 << 7, ///< Indicates the query has not been set by a guest query. + IsFence = 1 << 8, ///< Indicates the query is a fence. }; DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits) diff --git a/src/video_core/query_cache/query_cache.h b/src/video_core/query_cache/query_cache.h index 042af053c..4b89b5bf6 100644 --- a/src/video_core/query_cache/query_cache.h +++ b/src/video_core/query_cache/query_cache.h @@ -256,30 +256,32 @@ void QueryCacheBase::CounterReport(GPUVAddr addr, QueryType counter_type u8* pointer = impl->cpu_memory.GetPointer(cpu_addr); u8* pointer_timestamp = impl->cpu_memory.GetPointer(cpu_addr + 8); bool is_synced = !Settings::IsGPULevelHigh() && is_fence; - std::function operation( - [this, is_synced, query_base = query, query_location, pointer, pointer_timestamp] { - if (True(query_base->flags & QueryFlagBits::IsInvalidated)) { - if (!is_synced) [[likely]] { - impl->pending_unregister.push_back(query_location); - } - return; - } - if (False(query_base->flags & QueryFlagBits::IsFinalValueSynced)) [[unlikely]] { - UNREACHABLE(); - return; - } - if (True(query_base->flags & QueryFlagBits::HasTimestamp)) { - u64 timestamp = impl->gpu.GetTicks(); - std::memcpy(pointer_timestamp, ×tamp, sizeof(timestamp)); - std::memcpy(pointer, &query_base->value, sizeof(query_base->value)); - } else { - u32 value = static_cast(query_base->value); - std::memcpy(pointer, &value, sizeof(value)); - } + std::function operation([this, is_synced, streamer, query_base = query, query_location, + pointer, pointer_timestamp] { + if (True(query_base->flags & QueryFlagBits::IsInvalidated)) { if (!is_synced) [[likely]] { impl->pending_unregister.push_back(query_location); } - }); + return; + } + if (False(query_base->flags & QueryFlagBits::IsFinalValueSynced)) [[unlikely]] { + UNREACHABLE(); + return; + } + query_base->value += streamer->GetAmmendValue(); + streamer->SetAccumulationValue(query_base->value); + if (True(query_base->flags & QueryFlagBits::HasTimestamp)) { + u64 timestamp = impl->gpu.GetTicks(); + std::memcpy(pointer_timestamp, ×tamp, sizeof(timestamp)); + std::memcpy(pointer, &query_base->value, sizeof(query_base->value)); + } else { + u32 value = static_cast(query_base->value); + std::memcpy(pointer, &value, sizeof(value)); + } + if (!is_synced) [[likely]] { + impl->pending_unregister.push_back(query_location); + } + }); if (is_fence) { impl->rasterizer.SignalFence(std::move(operation)); } else { @@ -354,9 +356,9 @@ void QueryCacheBase::NotifySegment(bool resume) { if (resume) { impl->runtime.ResumeHostConditionalRendering(); } else { - impl->runtime.PauseHostConditionalRendering(); CounterClose(VideoCommon::QueryType::ZPassPixelCount64); CounterClose(VideoCommon::QueryType::StreamingByteCount); + impl->runtime.PauseHostConditionalRendering(); } } diff --git a/src/video_core/query_cache/query_stream.h b/src/video_core/query_cache/query_stream.h index e7aac955b..39da6ac07 100644 --- a/src/video_core/query_cache/query_stream.h +++ b/src/video_core/query_cache/query_stream.h @@ -78,6 +78,14 @@ public: return dependence_mask; } + u64 GetAmmendValue() const { + return ammend_value; + } + + void SetAccumulationValue(u64 new_value) { + acumulation_value = new_value; + } + protected: void MakeDependent(StreamerInterface* depend_on) { dependence_mask |= 1ULL << depend_on->id; @@ -87,6 +95,8 @@ protected: const size_t id; u64 dependence_mask; u64 dependent_mask; + u64 ammend_value{}; + u64 acumulation_value{}; }; template diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index add0c6fb3..2147776f8 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -110,13 +110,16 @@ struct HostSyncValues { class SamplesStreamer : public BaseStreamer { public: - explicit SamplesStreamer(size_t id_, QueryCacheRuntime& runtime_, const Device& device_, + explicit SamplesStreamer(size_t id_, QueryCacheRuntime& runtime_, + VideoCore::RasterizerInterface* rasterizer_, const Device& device_, Scheduler& scheduler_, const MemoryAllocator& memory_allocator_) - : BaseStreamer(id_), runtime{runtime_}, device{device_}, scheduler{scheduler_}, - memory_allocator{memory_allocator_} { + : BaseStreamer(id_), runtime{runtime_}, rasterizer{rasterizer_}, device{device_}, + scheduler{scheduler_}, memory_allocator{memory_allocator_} { BuildResolveBuffer(); current_bank = nullptr; current_query = nullptr; + ammend_value = 0; + acumulation_value = 0; } ~SamplesStreamer() = default; @@ -151,6 +154,11 @@ public: PauseCounter(); } AbandonCurrentQuery(); + std::function func([this, counts = pending_flush_queries.size()] { + ammend_value = 0; + acumulation_value = 0; + }); + rasterizer->SyncOperation(std::move(func)); } void CloseCounter() override { @@ -244,7 +252,7 @@ public: } if (query->size_slots > 1) { // This is problematic. - UNIMPLEMENTED(); + // UNIMPLEMENTED(); } query->flags |= VideoCommon::QueryFlagBits::IsHostSynced; auto loc_data = offsets[query->start_bank_id]; @@ -255,16 +263,20 @@ public: }); } + ReplicateCurrentQueryIfNeeded(); + std::function func([this] { ammend_value = acumulation_value; }); + rasterizer->SyncOperation(std::move(func)); AbandonCurrentQuery(); pending_sync.clear(); } size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, [[maybe_unused]] std::optional subreport) override { + PauseCounter(); auto index = BuildQuery(); auto* new_query = GetQuery(index); new_query->guest_address = address; - new_query->value = 100; + new_query->value = 0; new_query->flags &= ~VideoCommon::QueryFlagBits::IsOrphan; if (has_timestamp) { new_query->flags |= VideoCommon::QueryFlagBits::HasTimestamp; @@ -291,6 +303,7 @@ public: void PushUnsyncedQueries() override { PauseCounter(); + current_bank->Close(); { std::scoped_lock lk(flush_guard); pending_flush_sets.emplace_back(std::move(pending_flush_queries)); @@ -429,6 +442,34 @@ private: current_query_id = 0; } + void ReplicateCurrentQueryIfNeeded() { + if (pending_sync.empty()) { + return; + } + if (!current_query) { + return; + } + auto index = BuildQuery(); + auto* new_query = GetQuery(index); + new_query->guest_address = 0; + new_query->value = 0; + new_query->flags &= ~VideoCommon::QueryFlagBits::IsOrphan; + new_query->start_bank_id = current_query->start_bank_id; + new_query->size_banks = current_query->size_banks; + new_query->start_slot = current_query->start_slot; + new_query->size_slots = current_query->size_slots; + ApplyBankOp(new_query, [](SamplesQueryBank* bank, size_t start, size_t amount) { + bank->AddReference(amount); + }); + pending_flush_queries.push_back(index); + std::function func([this, index] { + auto* query = GetQuery(index); + query->value += GetAmmendValue(); + SetAccumulationValue(query->value); + Free(index); + }); + } + void BuildResolveBuffer() { const VkBufferCreateInfo buffer_ci = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, @@ -448,6 +489,7 @@ private: static constexpr size_t resolve_slots = 8; QueryCacheRuntime& runtime; + VideoCore::RasterizerInterface* rasterizer; const Device& device; Scheduler& scheduler; const MemoryAllocator& memory_allocator; @@ -470,6 +512,7 @@ private: size_t current_query_id; VideoCommon::HostQueryBase* current_query; bool has_started{}; + bool current_unset{}; std::mutex flush_guard; }; @@ -677,7 +720,6 @@ public: size_t offset_base = staging_ref.offset; for (auto q : pending_flush_queries) { auto* query = GetQuery(q); - query->flags |= VideoCommon::QueryFlagBits::IsQueuedForAsyncFlush; auto& bank = bank_pool.GetBank(query->start_bank_id); bank.Sync(staging_ref, offset_base, query->start_slot, 1); offset_base += TFBQueryBank::QUERY_SIZE; @@ -1047,8 +1089,8 @@ struct QueryCacheRuntimeImpl { buffer_cache{buffer_cache_}, device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, staging_pool{staging_pool_}, guest_streamer(0, runtime), - sample_streamer(static_cast(QueryType::ZPassPixelCount64), runtime, device, - scheduler, memory_allocator), + sample_streamer(static_cast(QueryType::ZPassPixelCount64), runtime, rasterizer, + device, scheduler, memory_allocator), tfb_streamer(static_cast(QueryType::StreamingByteCount), runtime, device, scheduler, memory_allocator, staging_pool), primitives_succeeded_streamer( @@ -1277,6 +1319,10 @@ bool QueryCacheRuntime::HostConditionalRenderingCompareValues(VideoCommon::Looku return true; } } + if (!is_in_bc[0] && !is_in_bc[1]) { + // Both queries are in query cache, it's best to just flush. + return false; + } HostConditionalRenderingCompareBCImpl(object_1.address, equal_check); return true; } -- cgit v1.2.3 From bf0d6b8806b7367a17bbeb2bb59f4bcba1fb1375 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 22 Aug 2023 17:44:03 +0200 Subject: Query Cache: Fix behavior in Normal Accuracy --- src/video_core/query_cache/query_cache.h | 13 +++++++++++++ src/video_core/renderer_vulkan/vk_query_cache.cpp | 10 ++++++++++ 2 files changed, 23 insertions(+) (limited to 'src/video_core/query_cache') diff --git a/src/video_core/query_cache/query_cache.h b/src/video_core/query_cache/query_cache.h index 4b89b5bf6..78b42b518 100644 --- a/src/video_core/query_cache/query_cache.h +++ b/src/video_core/query_cache/query_cache.h @@ -256,6 +256,7 @@ void QueryCacheBase::CounterReport(GPUVAddr addr, QueryType counter_type u8* pointer = impl->cpu_memory.GetPointer(cpu_addr); u8* pointer_timestamp = impl->cpu_memory.GetPointer(cpu_addr + 8); bool is_synced = !Settings::IsGPULevelHigh() && is_fence; + std::function operation([this, is_synced, streamer, query_base = query, query_location, pointer, pointer_timestamp] { if (True(query_base->flags & QueryFlagBits::IsInvalidated)) { @@ -285,6 +286,18 @@ void QueryCacheBase::CounterReport(GPUVAddr addr, QueryType counter_type if (is_fence) { impl->rasterizer.SignalFence(std::move(operation)); } else { + if (!Settings::IsGPULevelHigh() && counter_type == QueryType::Payload) { + if (has_timestamp) { + u64 timestamp = impl->gpu.GetTicks(); + u64 value = static_cast(payload); + std::memcpy(pointer_timestamp, ×tamp, sizeof(timestamp)); + std::memcpy(pointer, &value, sizeof(value)); + } else { + std::memcpy(pointer, &payload, sizeof(payload)); + } + streamer->Free(new_query_id); + return; + } impl->rasterizer.SyncOperation(std::move(operation)); } if (is_synced) { diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 825e1a72e..2cc007716 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -1365,6 +1365,11 @@ bool QueryCacheRuntime::HostConditionalRenderingCompareValues(VideoCommon::Looku return false; } + const bool is_gpu_high = Settings::IsGPULevelHigh(); + if (!is_gpu_high && impl->device.GetDriverID() == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { + return true; + } + for (size_t i = 0; i < 2; i++) { is_null[i] = !is_in_ac[i] && check_value(objects[i]->address); } @@ -1376,6 +1381,11 @@ bool QueryCacheRuntime::HostConditionalRenderingCompareValues(VideoCommon::Looku return true; } } + + if (!is_gpu_high) { + return true; + } + if (!is_in_bc[0] && !is_in_bc[1]) { // Both queries are in query cache, it's best to just flush. return true; -- cgit v1.2.3