video_core: Reimplement the buffer cache

Reimplement the buffer cache using cached bindings and page level granularity for modification tracking. This also drops the usage of shared pointers and virtual functions from the cache. - Bindings are cached, allowing to skip work when the game changes few bits between draws. - OpenGL Assembly shaders no longer copy when a region has been modified from the GPU to emulate constant buffers, instead GL_EXT_memory_object is used to alias sub-buffers within the same allocation. - OpenGL Assembly shaders stream constant buffer data using glProgramBufferParametersIuivNV, from NV_parameter_buffer_object. In theory this should save one hash table resolve inside the driver compared to glBufferSubData. - A new OpenGL stream buffer is implemented based on fences for drivers that are not Nvidia's proprietary, due to their low performance on partial glBufferSubData calls synchronized with 3D rendering (that some games use a lot). - Most optimizations are shared between APIs now, allowing Vulkan to cache more bindings than before, skipping unnecesarry work. This commit adds the necessary infrastructure to use Vulkan object from OpenGL. Overall, it improves performance and fixes some bugs present on the old cache. There are still some edge cases hit by some games that harm performance on some vendors, this are planned to be fixed in later commits.
author: ReinUsesLisp <reinuseslisp@airmail.cc> 2021-01-16 20:48:58 -0300
committer: ReinUsesLisp <reinuseslisp@airmail.cc> 2021-02-13 02:17:22 -0300
commit: 82c2601555b59a94d7160f2fd686cb63d32dd423 (patch)
tree: cd0ecd865945452fa589b572de614fc487f2f96a /src/video_core/texture_cache/texture_cache.h
parent: a39d9c5194a5fa230ee987ebfc73476f2011d6fa (diff)
1 files changed, 10 insertions, 18 deletions
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index d1080300f..f336b705f 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -103,9 +103,6 @@ public:
     /// Notify the cache that a new frame has been queued
     void TickFrame();
 
-    /// Return an unique mutually exclusive lock for the cache
-    [[nodiscard]] std::unique_lock<std::mutex> AcquireLock();
-
     /// Return a constant reference to the given image view id
     [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept;
 
@@ -179,6 +176,8 @@ public:
     /// Return true when a CPU region is modified from the GPU
     [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
 
+    std::mutex mutex;
+
 private:
     /// Iterate over all page indices in a range
     template <typename Func>
@@ -212,8 +211,8 @@ private:
     void RefreshContents(Image& image);
 
     /// Upload data from guest to an image
-    template <typename MapBuffer>
-    void UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset);
+    template <typename StagingBuffer>
+    void UploadImageContents(Image& image, StagingBuffer& staging_buffer, size_t buffer_offset);
 
     /// Find or create an image view from a guest descriptor
     [[nodiscard]] ImageViewId FindImageView(const TICEntry& config);
@@ -325,8 +324,6 @@ private:
 
     RenderTargets render_targets;
 
-    std::mutex mutex;
-
     std::unordered_map<TICEntry, ImageViewId> image_views;
     std::unordered_map<TSCEntry, SamplerId> samplers;
     std::unordered_map<RenderTargets, FramebufferId> framebuffers;
@@ -386,11 +383,6 @@ void TextureCache<P>::TickFrame() {
 }
 
 template <class P>
-std::unique_lock<std::mutex> TextureCache<P>::AcquireLock() {
-    return std::unique_lock{mutex};
-}
-
-template <class P>
 const typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) const noexcept {
     return slot_image_views[id];
 }
@@ -598,11 +590,11 @@ void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
     });
     for (const ImageId image_id : images) {
         Image& image = slot_images[image_id];
-        auto map = runtime.MapDownloadBuffer(image.unswizzled_size_bytes);
+        auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes);
         const auto copies = FullDownloadCopies(image.info);
         image.DownloadMemory(map, 0, copies);
         runtime.Finish();
-        SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.Span());
+        SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
     }
 }
 
@@ -757,7 +749,7 @@ void TextureCache<P>::PopAsyncFlushes() {
     for (const ImageId image_id : download_ids) {
         total_size_bytes += slot_images[image_id].unswizzled_size_bytes;
     }
-    auto download_map = runtime.MapDownloadBuffer(total_size_bytes);
+    auto download_map = runtime.DownloadStagingBuffer(total_size_bytes);
     size_t buffer_offset = 0;
     for (const ImageId image_id : download_ids) {
         Image& image = slot_images[image_id];
@@ -769,7 +761,7 @@ void TextureCache<P>::PopAsyncFlushes() {
     runtime.Finish();
 
     buffer_offset = 0;
-    const std::span<u8> download_span = download_map.Span();
+    const std::span<u8> download_span = download_map.mapped_span;
     for (const ImageId image_id : download_ids) {
         const ImageBase& image = slot_images[image_id];
         const auto copies = FullDownloadCopies(image.info);
@@ -806,7 +798,7 @@ void TextureCache<P>::RefreshContents(Image& image) {
         LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented");
         return;
     }
-    auto map = runtime.MapUploadBuffer(MapSizeBytes(image));
+    auto map = runtime.UploadStagingBuffer(MapSizeBytes(image));
     UploadImageContents(image, map, 0);
     runtime.InsertUploadMemoryBarrier();
 }
@@ -814,7 +806,7 @@ void TextureCache<P>::RefreshContents(Image& image) {
 template <class P>
 template <typename MapBuffer>
 void TextureCache<P>::UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset) {
-    const std::span<u8> mapped_span = map.Span().subspan(buffer_offset);
+    const std::span<u8> mapped_span = map.mapped_span.subspan(buffer_offset);
     const GPUVAddr gpu_addr = image.gpu_addr;
 
     if (True(image.flags & ImageFlagBits::AcceleratedUpload)) {
author	ReinUsesLisp <reinuseslisp@airmail.cc>	2021-01-16 20:48:58 -0300
committer	ReinUsesLisp <reinuseslisp@airmail.cc>	2021-02-13 02:17:22 -0300
commit	82c2601555b59a94d7160f2fd686cb63d32dd423 (patch)
tree	cd0ecd865945452fa589b572de614fc487f2f96a /src/video_core/texture_cache/texture_cache.h
parent	a39d9c5194a5fa230ee987ebfc73476f2011d6fa (diff)