query_cache: Abstract OpenGL implementation

Abstract the current OpenGL implementation into the VideoCommon namespace and reimplement it on top of that. Doing this avoids repeating code and logic in the Vulkan implementation.
author: ReinUsesLisp <reinuseslisp@airmail.cc> 2020-02-11 16:02:41 -0300
committer: ReinUsesLisp <reinuseslisp@airmail.cc> 2020-02-14 17:38:27 -0300
commit: c31382ced54c07650ae41fa2f75dc53da894784e (patch)
tree: aeff230f51000fc357bbc2859c5ef66892f9a841 /src/video_core/renderer_opengl
parent: 73d2d3342dc8867d32f08f89b2ca36ff071598dc (diff)
2 files changed, 70 insertions, 339 deletions
diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp
index 74cb73209..7d5a044c7 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_query_cache.cpp
@@ -20,211 +20,49 @@
 
 namespace OpenGL {
 
-using VideoCore::QueryType;
-
 namespace {
 
-constexpr std::uintptr_t PAGE_SIZE = 4096;
-constexpr int PAGE_SHIFT = 12;
-
-constexpr std::size_t SMALL_QUERY_SIZE = 8;  // Query size without timestamp
-constexpr std::size_t LARGE_QUERY_SIZE = 16; // Query size with timestamp
-constexpr std::ptrdiff_t TIMESTAMP_OFFSET = 8;
-
 constexpr std::array<GLenum, VideoCore::NumQueryTypes> QueryTargets = {GL_SAMPLES_PASSED};
 
-constexpr GLenum GetTarget(QueryType type) {
+constexpr GLenum GetTarget(VideoCore::QueryType type) {
     return QueryTargets[static_cast<std::size_t>(type)];
 }
 
 } // Anonymous namespace
 
-CounterStream::CounterStream(QueryCache& cache, QueryType type)
-    : cache{cache}, type{type}, target{GetTarget(type)} {}
-
-CounterStream::~CounterStream() = default;
-
-void CounterStream::Update(bool enabled, bool any_command_queued) {
-    if (enabled) {
-        Enable();
-    } else {
-        Disable(any_command_queued);
-    }
-}
-
-void CounterStream::Reset(bool any_command_queued) {
-    if (current) {
-        EndQuery(any_command_queued);
-
-        // Immediately start a new query to avoid disabling its state.
-        current = cache.GetHostCounter(nullptr, type);
-    }
-    last = nullptr;
-}
-
-std::shared_ptr<HostCounter> CounterStream::GetCurrent(bool any_command_queued) {
-    if (!current) {
-        return nullptr;
-    }
-    EndQuery(any_command_queued);
-    last = std::move(current);
-    current = cache.GetHostCounter(last, type);
-    return last;
-}
-
-void CounterStream::Enable() {
-    if (current) {
-        return;
-    }
-    current = cache.GetHostCounter(last, type);
-}
-
-void CounterStream::Disable(bool any_command_queued) {
-    if (current) {
-        EndQuery(any_command_queued);
-    }
-    last = std::exchange(current, nullptr);
-}
-
-void CounterStream::EndQuery(bool any_command_queued) {
-    if (!any_command_queued) {
-        // There are chances a query waited on without commands (glDraw, glClear, glDispatch). Not
-        // having any of these causes a lock. glFlush is considered a command, so we can safely wait
-        // for this. Insert to the OpenGL command stream a flush.
-        glFlush();
-    }
-    glEndQuery(target);
-}
-
-QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& rasterizer)
-    : system{system}, rasterizer{rasterizer}, streams{{CounterStream{*this,
-                                                                     QueryType::SamplesPassed}}} {}
+QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& gl_rasterizer)
+    : VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream,
+                                  HostCounter>{system, static_cast<VideoCore::RasterizerInterface&>(
+                                                           gl_rasterizer)},
+      gl_rasterizer{gl_rasterizer} {}
 
 QueryCache::~QueryCache() = default;
 
-void QueryCache::InvalidateRegion(CacheAddr addr, std::size_t size) {
-    const u64 addr_begin = static_cast<u64>(addr);
-    const u64 addr_end = addr_begin + static_cast<u64>(size);
-    const auto in_range = [addr_begin, addr_end](CachedQuery& query) {
-        const u64 cache_begin = query.GetCacheAddr();
-        const u64 cache_end = cache_begin + query.GetSizeInBytes();
-        return cache_begin < addr_end && addr_begin < cache_end;
-    };
-
-    const u64 page_end = addr_end >> PAGE_SHIFT;
-    for (u64 page = addr_begin >> PAGE_SHIFT; page <= page_end; ++page) {
-        const auto& it = cached_queries.find(page);
-        if (it == std::end(cached_queries)) {
-            continue;
-        }
-        auto& contents = it->second;
-        for (auto& query : contents) {
-            if (!in_range(query)) {
-                continue;
-            }
-            rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.GetSizeInBytes(), -1);
-            Flush(query);
-        }
-        contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range),
-                       std::end(contents));
-    }
-}
-
-void QueryCache::FlushRegion(CacheAddr addr, std::size_t size) {
-    // We can handle flushes in the same way as invalidations.
-    InvalidateRegion(addr, size);
-}
-
-void QueryCache::Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) {
-    auto& memory_manager = system.GPU().MemoryManager();
-    const auto host_ptr = memory_manager.GetPointer(gpu_addr);
-
-    CachedQuery* query = TryGet(ToCacheAddr(host_ptr));
-    if (!query) {
-        const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
-        ASSERT_OR_EXECUTE(cpu_addr, return;);
-
-        query = &Register(CachedQuery(type, *cpu_addr, host_ptr));
-    }
-
-    query->SetCounter(GetStream(type).GetCurrent(rasterizer.AnyCommandQueued()), timestamp);
-}
-
-void QueryCache::UpdateCounters() {
-    auto& samples_passed = GetStream(QueryType::SamplesPassed);
-
-    const auto& regs = system.GPU().Maxwell3D().regs;
-    samples_passed.Update(regs.samplecnt_enable, rasterizer.AnyCommandQueued());
-}
-
-void QueryCache::ResetCounter(QueryType type) {
-    GetStream(type).Reset(rasterizer.AnyCommandQueued());
-}
-
-void QueryCache::Reserve(QueryType type, OGLQuery&& query) {
-    reserved_queries[static_cast<std::size_t>(type)].push_back(std::move(query));
-}
-
-std::shared_ptr<HostCounter> QueryCache::GetHostCounter(std::shared_ptr<HostCounter> dependency,
-                                                        QueryType type) {
-    auto& reserve = reserved_queries[static_cast<std::size_t>(type)];
+OGLQuery QueryCache::AllocateQuery(VideoCore::QueryType type) {
+    auto& reserve = queries_reserve[static_cast<std::size_t>(type)];
     OGLQuery query;
     if (reserve.empty()) {
         query.Create(GetTarget(type));
-    } else {
-        query = std::move(reserve.back());
-        reserve.pop_back();
+        return query;
     }
 
-    return std::make_shared<HostCounter>(*this, std::move(dependency), type, std::move(query));
+    query = std::move(reserve.back());
+    reserve.pop_back();
+    return query;
 }
 
-CachedQuery& QueryCache::Register(CachedQuery&& cached_query) {
-    const u64 page = static_cast<u64>(cached_query.GetCacheAddr()) >> PAGE_SHIFT;
-    auto& stored_ref = cached_queries[page].emplace_back(std::move(cached_query));
-    rasterizer.UpdatePagesCachedCount(stored_ref.GetCpuAddr(), stored_ref.GetSizeInBytes(), 1);
-    return stored_ref;
-}
-
-CachedQuery* QueryCache::TryGet(CacheAddr addr) {
-    const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT;
-    const auto it = cached_queries.find(page);
-    if (it == std::end(cached_queries)) {
-        return nullptr;
-    }
-    auto& contents = it->second;
-    const auto found =
-        std::find_if(std::begin(contents), std::end(contents),
-                     [addr](const auto& query) { return query.GetCacheAddr() == addr; });
-    return found != std::end(contents) ? &*found : nullptr;
-}
-
-void QueryCache::Flush(CachedQuery& cached_query) {
-    auto& stream = GetStream(cached_query.GetType());
-
-    // Waiting for a query while another query of the same target is enabled locks Nvidia's driver.
-    // To avoid this disable and re-enable keeping the dependency stream.
-    // But we only have to do this if we have pending waits to be done.
-    const bool slice_counter = stream.IsEnabled() && cached_query.WaitPending();
-    const bool any_command_queued = rasterizer.AnyCommandQueued();
-    if (slice_counter) {
-        stream.Update(false, any_command_queued);
-    }
-
-    cached_query.Flush();
-
-    if (slice_counter) {
-        stream.Update(true, any_command_queued);
-    }
+void QueryCache::Reserve(VideoCore::QueryType type, OGLQuery&& query) {
+    queries_reserve[static_cast<std::size_t>(type)].push_back(std::move(query));
 }
 
-CounterStream& QueryCache::GetStream(QueryType type) {
-    return streams[static_cast<std::size_t>(type)];
+bool QueryCache::AnyCommandQueued() const noexcept {
+    return gl_rasterizer.AnyCommandQueued();
 }
 
-HostCounter::HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency, QueryType type,
-                         OGLQuery&& query_)
-    : cache{cache}, type{type}, dependency{std::move(dependency)}, query{std::move(query_)} {
+HostCounter::HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency,
+                         VideoCore::QueryType type)
+    : VideoCommon::HostCounterBase<QueryCache, HostCounter>{std::move(dependency)}, cache{cache},
+      type{type}, query{cache.AllocateQuery(type)} {
     glBeginQuery(GetTarget(type), query.handle);
 }
 
@@ -232,81 +70,50 @@ HostCounter::~HostCounter() {
     cache.Reserve(type, std::move(query));
 }
 
-u64 HostCounter::Query() {
-    if (result) {
-        return *result;
-    }
-
-    u64 value;
-    glGetQueryObjectui64v(query.handle, GL_QUERY_RESULT, &value);
-    if (dependency) {
-        value += dependency->Query();
+void HostCounter::EndQuery() {
+    if (!cache.AnyCommandQueued()) {
+        // There are chances a query waited on without commands (glDraw, glClear, glDispatch). Not
+        // having any of these causes a lock. glFlush is considered a command, so we can safely wait
+        // for this. Insert to the OpenGL command stream a flush.
+        glFlush();
     }
-
-    return *(result = value);
+    glEndQuery(GetTarget(type));
 }
 
-bool HostCounter::WaitPending() const noexcept {
-    return result.has_value();
+u64 HostCounter::BlockingQuery() const {
+    GLint64 value;
+    glGetQueryObjecti64v(query.handle, GL_QUERY_RESULT, &value);
+    return static_cast<u64>(value);
 }
 
-CachedQuery::CachedQuery(QueryType type, VAddr cpu_addr, u8* host_ptr)
-    : type{type}, cpu_addr{cpu_addr}, host_ptr{host_ptr} {}
+CachedQuery::CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr)
+    : VideoCommon::CachedQueryBase<HostCounter>{cpu_addr, host_ptr}, cache{&cache}, type{type} {}
 
 CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept
-    : type{rhs.type}, cpu_addr{rhs.cpu_addr}, host_ptr{rhs.host_ptr},
-      counter{std::move(rhs.counter)}, timestamp{rhs.timestamp} {}
-
-CachedQuery::~CachedQuery() = default;
+    : VideoCommon::CachedQueryBase<HostCounter>(std::move(rhs)), cache{rhs.cache}, type{rhs.type} {}
 
 CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept {
+    VideoCommon::CachedQueryBase<HostCounter>::operator=(std::move(rhs));
+    cache = rhs.cache;
     type = rhs.type;
-    cpu_addr = rhs.cpu_addr;
-    host_ptr = rhs.host_ptr;
-    counter = std::move(rhs.counter);
-    timestamp = rhs.timestamp;
     return *this;
 }
 
 void CachedQuery::Flush() {
-    // When counter is nullptr it means that it's just been reseted. We are supposed to write a zero
-    // in these cases.
-    const u64 value = counter ? counter->Query() : 0;
-    std::memcpy(host_ptr, &value, sizeof(u64));
-
-    if (timestamp) {
-        std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64));
-    }
-}
-
-void CachedQuery::SetCounter(std::shared_ptr<HostCounter> counter_, std::optional<u64> timestamp_) {
-    if (counter) {
-        // If there's an old counter set it means the query is being rewritten by the game.
-        // To avoid losing the data forever, flush here.
-        Flush();
+    // Waiting for a query while another query of the same target is enabled locks Nvidia's driver.
+    // To avoid this disable and re-enable keeping the dependency stream.
+    // But we only have to do this if we have pending waits to be done.
+    auto& stream = cache->Stream(type);
+    const bool slice_counter = WaitPending() && stream.IsEnabled();
+    if (slice_counter) {
+        stream.Update(false);
     }
-    counter = std::move(counter_);
-    timestamp = timestamp_;
-}
-
-bool CachedQuery::WaitPending() const noexcept {
-    return counter && counter->WaitPending();
-}
 
-QueryType CachedQuery::GetType() const noexcept {
-    return type;
-}
+    VideoCommon::CachedQueryBase<HostCounter>::Flush();
 
-VAddr CachedQuery::GetCpuAddr() const noexcept {
-    return cpu_addr;
-}
-
-CacheAddr CachedQuery::GetCacheAddr() const noexcept {
-    return ToCacheAddr(host_ptr);
-}
-
-u64 CachedQuery::GetSizeInBytes() const noexcept {
-    return timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE;
+    if (slice_counter) {
+        stream.Update(true);
+    }
 }
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h
index d9f22b44d..20d337f15 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.h
+++ b/src/video_core/renderer_opengl/gl_query_cache.h
@@ -13,6 +13,7 @@
 #include <glad/glad.h>
 
 #include "common/common_types.h"
+#include "video_core/query_cache.h"
 #include "video_core/rasterizer_interface.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 
@@ -24,134 +25,57 @@ namespace OpenGL {
 
 class CachedQuery;
 class HostCounter;
-class RasterizerOpenGL;
 class QueryCache;
+class RasterizerOpenGL;
 
-class CounterStream final {
-public:
-    explicit CounterStream(QueryCache& cache, VideoCore::QueryType type);
-    ~CounterStream();
-
-    void Update(bool enabled, bool any_command_queued);
-
-    void Reset(bool any_command_queued);
-
-    std::shared_ptr<HostCounter> GetCurrent(bool any_command_queued);
-
-    bool IsEnabled() const {
-        return current != nullptr;
-    }
-
-private:
-    void Enable();
-
-    void Disable(bool any_command_queued);
-
-    void EndQuery(bool any_command_queued);
+using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
 
-    QueryCache& cache;
-
-    std::shared_ptr<HostCounter> current;
-    std::shared_ptr<HostCounter> last;
-    VideoCore::QueryType type;
-    GLenum target;
-};
-
-class QueryCache final {
+class QueryCache final
+    : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
 public:
     explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer);
     ~QueryCache();
 
-    void InvalidateRegion(CacheAddr addr, std::size_t size);
-
-    void FlushRegion(CacheAddr addr, std::size_t size);
-
-    void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp);
-
-    void UpdateCounters();
-
-    void ResetCounter(VideoCore::QueryType type);
+    OGLQuery AllocateQuery(VideoCore::QueryType type);
 
     void Reserve(VideoCore::QueryType type, OGLQuery&& query);
 
-    std::shared_ptr<HostCounter> GetHostCounter(std::shared_ptr<HostCounter> dependency,
-                                                VideoCore::QueryType type);
+    bool AnyCommandQueued() const noexcept;
 
 private:
-    CachedQuery& Register(CachedQuery&& cached_query);
-
-    CachedQuery* TryGet(CacheAddr addr);
-
-    void Flush(CachedQuery& cached_query);
-
-    CounterStream& GetStream(VideoCore::QueryType type);
-
-    Core::System& system;
-    RasterizerOpenGL& rasterizer;
-
-    std::unordered_map<u64, std::vector<CachedQuery>> cached_queries;
-
-    std::array<CounterStream, VideoCore::NumQueryTypes> streams;
-    std::array<std::vector<OGLQuery>, VideoCore::NumQueryTypes> reserved_queries;
+    RasterizerOpenGL& gl_rasterizer;
+    std::array<std::vector<OGLQuery>, VideoCore::NumQueryTypes> queries_reserve;
 };
 
-class HostCounter final {
+class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> {
 public:
     explicit HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency,
-                         VideoCore::QueryType type, OGLQuery&& query);
+                         VideoCore::QueryType type);
     ~HostCounter();
 
-    /// Returns the current value of the query.
-    u64 Query();
-
-    /// Returns true when querying this counter will potentially wait for OpenGL.
-    bool WaitPending() const noexcept;
+    void EndQuery();
 
 private:
+    u64 BlockingQuery() const override;
+
     QueryCache& cache;
     VideoCore::QueryType type;
-
-    std::shared_ptr<HostCounter> dependency; ///< Counter queued before this one.
-    OGLQuery query;                          ///< OpenGL query.
-    std::optional<u64> result;               ///< Added values of the counter.
+    OGLQuery query;
 };
 
-class CachedQuery final {
+class CachedQuery final : public VideoCommon::CachedQueryBase<HostCounter> {
 public:
-    explicit CachedQuery(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr);
-    CachedQuery(CachedQuery&&) noexcept;
-    CachedQuery(const CachedQuery&) = delete;
-    ~CachedQuery();
-
-    CachedQuery& operator=(CachedQuery&&) noexcept;
-
-    /// Writes the counter value to host memory.
-    void Flush();
-
-    /// Updates the counter this cached query registered in guest memory will write when requested.
-    void SetCounter(std::shared_ptr<HostCounter> counter, std::optional<u64> timestamp);
+    explicit CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr,
+                         u8* host_ptr);
+    CachedQuery(CachedQuery&& rhs) noexcept;
 
-    /// Returns true when a flushing this query will potentially wait for OpenGL.
-    bool WaitPending() const noexcept;
+    CachedQuery& operator=(CachedQuery&& rhs) noexcept;
 
-    /// Returns the query type.
-    VideoCore::QueryType GetType() const noexcept;
-
-    /// Returns the guest CPU address for this query.
-    VAddr GetCpuAddr() const noexcept;
-
-    /// Returns the cache address for this query.
-    CacheAddr GetCacheAddr() const noexcept;
-
-    /// Returns the number of cached bytes.
-    u64 GetSizeInBytes() const noexcept;
+    void Flush() override;
 
 private:
-    VideoCore::QueryType type;            ///< Abstracted query type (e.g. samples passed).
-    VAddr cpu_addr;                       ///< Guest CPU address.
-    u8* host_ptr;                         ///< Writable host pointer.
-    std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree.
-    std::optional<u64> timestamp;         ///< Timestamp to flush to guest memory.
+    QueryCache* cache;
+    VideoCore::QueryType type;
 };
 
 } // namespace OpenGL
author	ReinUsesLisp <reinuseslisp@airmail.cc>	2020-02-11 16:02:41 -0300
committer	ReinUsesLisp <reinuseslisp@airmail.cc>	2020-02-14 17:38:27 -0300
commit	c31382ced54c07650ae41fa2f75dc53da894784e (patch)
tree	aeff230f51000fc357bbc2859c5ef66892f9a841 /src/video_core/renderer_opengl
parent	73d2d3342dc8867d32f08f89b2ca36ff071598dc (diff)