From 1b4503c571d3b961efe74fa7e35d5fa14941ec09 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 24 Apr 2019 16:35:54 -0300 Subject: texture_cache: Split texture cache into different files --- src/video_core/texture_cache/texture_cache.h | 282 +++++++++++++++++++++++++++ 1 file changed, 282 insertions(+) create mode 100644 src/video_core/texture_cache/texture_cache.h (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h new file mode 100644 index 000000000..fb43fa65e --- /dev/null +++ b/src/video_core/texture_cache/texture_cache.h @@ -0,0 +1,282 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "common/assert.h" +#include "common/common_types.h" +#include "core/memory.h" +#include "video_core/engines/fermi_2d.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/gpu.h" +#include "video_core/memory_manager.h" +#include "video_core/rasterizer_interface.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/surface_base.h" +#include "video_core/texture_cache/surface_params.h" +#include "video_core/texture_cache/surface_view.h" + +namespace Core { +class System; +} + +namespace Tegra::Texture { +struct FullTextureInfo; +} + +namespace VideoCore { +class RasterizerInterface; +} + +namespace VideoCommon { + +template +class TextureCache { + static_assert(std::is_trivially_copyable_v); + + using ResultType = std::tuple; + using IntervalMap = boost::icl::interval_map>>; + using IntervalType = typename IntervalMap::interval_type; + +public: + void InvalidateRegion(CacheAddr addr, std::size_t size) { + for (const auto& surface : GetSurfacesInRegion(addr, size)) { + if (!surface->IsRegistered()) { + // Skip duplicates + continue; + } + Unregister(surface); + } + } + + ResultType GetTextureSurface(TExecutionContext exctx, + const Tegra::Texture::FullTextureInfo& config) { + const auto gpu_addr{config.tic.Address()}; + if (!gpu_addr) { + return {{}, exctx}; + } + const auto params{SurfaceParams::CreateForTexture(system, config)}; + return GetSurfaceView(exctx, gpu_addr, params, true); + } + + ResultType GetDepthBufferSurface(TExecutionContext exctx, bool preserve_contents) { + const auto& regs{system.GPU().Maxwell3D().regs}; + const auto gpu_addr{regs.zeta.Address()}; + if (!gpu_addr || !regs.zeta_enable) { + return {{}, exctx}; + } + const auto depth_params{SurfaceParams::CreateForDepthBuffer( + system, regs.zeta_width, regs.zeta_height, regs.zeta.format, + regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height, + regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; + return GetSurfaceView(exctx, gpu_addr, depth_params, preserve_contents); + } + + ResultType GetColorBufferSurface(TExecutionContext exctx, std::size_t index, + bool preserve_contents) { + ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); + + const auto& regs{system.GPU().Maxwell3D().regs}; + if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || + regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { + return {{}, exctx}; + } + + auto& memory_manager{system.GPU().MemoryManager()}; + const auto& config{system.GPU().Maxwell3D().regs.rt[index]}; + const auto gpu_addr{config.Address() + + config.base_layer * config.layer_stride * sizeof(u32)}; + if (!gpu_addr) { + return {{}, exctx}; + } + + return GetSurfaceView(exctx, gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), + preserve_contents); + } + + ResultType GetFermiSurface(TExecutionContext exctx, + const Tegra::Engines::Fermi2D::Regs::Surface& config) { + return GetSurfaceView(exctx, config.Address(), + SurfaceParams::CreateForFermiCopySurface(config), true); + } + + std::shared_ptr TryFindFramebufferSurface(const u8* host_ptr) const { + const auto it{registered_surfaces.find(ToCacheAddr(host_ptr))}; + return it != registered_surfaces.end() ? *it->second.begin() : nullptr; + } + + u64 Tick() { + return ++ticks; + } + +protected: + TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) + : system{system}, rasterizer{rasterizer} {} + + ~TextureCache() = default; + + virtual ResultType TryFastGetSurfaceView( + TExecutionContext exctx, GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, + const SurfaceParams& params, bool preserve_contents, + const std::vector>& overlaps) = 0; + + virtual std::shared_ptr CreateSurface(const SurfaceParams& params) = 0; + + void Register(std::shared_ptr surface, GPUVAddr gpu_addr, VAddr cpu_addr, + u8* host_ptr) { + surface->Register(gpu_addr, cpu_addr, host_ptr); + registered_surfaces.add({GetSurfaceInterval(surface), {surface}}); + rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), 1); + } + + void Unregister(std::shared_ptr surface) { + registered_surfaces.subtract({GetSurfaceInterval(surface), {surface}}); + rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), -1); + surface->Unregister(); + } + + std::shared_ptr GetUncachedSurface(const SurfaceParams& params) { + if (const auto surface = TryGetReservedSurface(params); surface) + return surface; + // No reserved surface available, create a new one and reserve it + auto new_surface{CreateSurface(params)}; + ReserveSurface(params, new_surface); + return new_surface; + } + + Core::System& system; + +private: + ResultType GetSurfaceView(TExecutionContext exctx, GPUVAddr gpu_addr, + const SurfaceParams& params, bool preserve_contents) { + auto& memory_manager{system.GPU().MemoryManager()}; + const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; + DEBUG_ASSERT(cpu_addr); + + const auto host_ptr{memory_manager.GetPointer(gpu_addr)}; + const auto cache_addr{ToCacheAddr(host_ptr)}; + auto overlaps{GetSurfacesInRegion(cache_addr, params.GetGuestSizeInBytes())}; + if (overlaps.empty()) { + return LoadSurfaceView(exctx, gpu_addr, *cpu_addr, host_ptr, params, preserve_contents); + } + + if (overlaps.size() == 1) { + if (TView* view = overlaps[0]->TryGetView(gpu_addr, params); view) { + return {view, exctx}; + } + } + + TView* fast_view; + std::tie(fast_view, exctx) = TryFastGetSurfaceView(exctx, gpu_addr, *cpu_addr, host_ptr, + params, preserve_contents, overlaps); + + if (!fast_view) { + std::sort(overlaps.begin(), overlaps.end(), [](const auto& lhs, const auto& rhs) { + return lhs->GetModificationTick() < rhs->GetModificationTick(); + }); + } + + for (const auto& surface : overlaps) { + if (!fast_view) { + // Flush even when we don't care about the contents, to preserve memory not + // written by the new surface. + exctx = FlushSurface(exctx, surface); + } + Unregister(surface); + } + + if (fast_view) { + return {fast_view, exctx}; + } + + return LoadSurfaceView(exctx, gpu_addr, *cpu_addr, host_ptr, params, preserve_contents); + } + + ResultType LoadSurfaceView(TExecutionContext exctx, GPUVAddr gpu_addr, VAddr cpu_addr, + u8* host_ptr, const SurfaceParams& params, bool preserve_contents) { + const auto new_surface{GetUncachedSurface(params)}; + Register(new_surface, gpu_addr, cpu_addr, host_ptr); + if (preserve_contents) { + exctx = LoadSurface(exctx, new_surface); + } + return {new_surface->GetView(gpu_addr, params), exctx}; + } + + TExecutionContext LoadSurface(TExecutionContext exctx, + const std::shared_ptr& surface) { + surface->LoadBuffer(); + exctx = surface->UploadTexture(exctx); + surface->MarkAsModified(false); + return exctx; + } + + TExecutionContext FlushSurface(TExecutionContext exctx, + const std::shared_ptr& surface) { + if (!surface->IsModified()) { + return exctx; + } + exctx = surface->DownloadTexture(exctx); + surface->FlushBuffer(); + return exctx; + } + + std::vector> GetSurfacesInRegion(CacheAddr cache_addr, + std::size_t size) const { + if (size == 0) { + return {}; + } + const IntervalType interval{cache_addr, cache_addr + size}; + + std::vector> surfaces; + for (auto& pair : boost::make_iterator_range(registered_surfaces.equal_range(interval))) { + surfaces.push_back(*pair.second.begin()); + } + return surfaces; + } + + void ReserveSurface(const SurfaceParams& params, std::shared_ptr surface) { + surface_reserve[params].push_back(std::move(surface)); + } + + std::shared_ptr TryGetReservedSurface(const SurfaceParams& params) { + auto search{surface_reserve.find(params)}; + if (search == surface_reserve.end()) { + return {}; + } + for (auto& surface : search->second) { + if (!surface->IsRegistered()) { + return surface; + } + } + return {}; + } + + IntervalType GetSurfaceInterval(std::shared_ptr surface) const { + return IntervalType::right_open(surface->GetCacheAddr(), + surface->GetCacheAddr() + surface->GetSizeInBytes()); + } + + VideoCore::RasterizerInterface& rasterizer; + + u64 ticks{}; + + IntervalMap registered_surfaces; + + /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have + /// previously been used. This is to prevent surfaces from being constantly created and + /// destroyed when used with different surface parameters. + std::unordered_map>> surface_reserve; +}; + +} // namespace VideoCommon -- cgit v1.2.3 From 6c410104f4f6953ac37095aa5e65804bf115c026 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 25 Apr 2019 13:41:57 -0300 Subject: texture_cache: Remove execution context copies from the texture cache This is done to simplify the OpenGL implementation, it is needed for Vulkan. --- src/video_core/texture_cache/texture_cache.h | 83 ++++++++++++---------------- 1 file changed, 34 insertions(+), 49 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index fb43fa65e..c5c01957a 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -8,7 +8,6 @@ #include #include #include -#include #include #include @@ -41,11 +40,8 @@ class RasterizerInterface; namespace VideoCommon { -template +template class TextureCache { - static_assert(std::is_trivially_copyable_v); - - using ResultType = std::tuple; using IntervalMap = boost::icl::interval_map>>; using IntervalType = typename IntervalMap::interval_type; @@ -60,37 +56,35 @@ public: } } - ResultType GetTextureSurface(TExecutionContext exctx, - const Tegra::Texture::FullTextureInfo& config) { + TView* GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) { const auto gpu_addr{config.tic.Address()}; if (!gpu_addr) { - return {{}, exctx}; + return {}; } const auto params{SurfaceParams::CreateForTexture(system, config)}; - return GetSurfaceView(exctx, gpu_addr, params, true); + return GetSurfaceView(gpu_addr, params, true); } - ResultType GetDepthBufferSurface(TExecutionContext exctx, bool preserve_contents) { + TView* GetDepthBufferSurface(bool preserve_contents) { const auto& regs{system.GPU().Maxwell3D().regs}; const auto gpu_addr{regs.zeta.Address()}; if (!gpu_addr || !regs.zeta_enable) { - return {{}, exctx}; + return {}; } const auto depth_params{SurfaceParams::CreateForDepthBuffer( system, regs.zeta_width, regs.zeta_height, regs.zeta.format, regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height, regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; - return GetSurfaceView(exctx, gpu_addr, depth_params, preserve_contents); + return GetSurfaceView(gpu_addr, depth_params, preserve_contents); } - ResultType GetColorBufferSurface(TExecutionContext exctx, std::size_t index, - bool preserve_contents) { + TView* GetColorBufferSurface(std::size_t index, bool preserve_contents) { ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); const auto& regs{system.GPU().Maxwell3D().regs}; if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { - return {{}, exctx}; + return {}; } auto& memory_manager{system.GPU().MemoryManager()}; @@ -98,17 +92,16 @@ public: const auto gpu_addr{config.Address() + config.base_layer * config.layer_stride * sizeof(u32)}; if (!gpu_addr) { - return {{}, exctx}; + return {}; } - return GetSurfaceView(exctx, gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), + return GetSurfaceView(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), preserve_contents); } - ResultType GetFermiSurface(TExecutionContext exctx, - const Tegra::Engines::Fermi2D::Regs::Surface& config) { - return GetSurfaceView(exctx, config.Address(), - SurfaceParams::CreateForFermiCopySurface(config), true); + TView* GetFermiSurface(const Tegra::Engines::Fermi2D::Regs::Surface& config) { + return GetSurfaceView(config.Address(), SurfaceParams::CreateForFermiCopySurface(config), + true); } std::shared_ptr TryFindFramebufferSurface(const u8* host_ptr) const { @@ -126,10 +119,9 @@ protected: ~TextureCache() = default; - virtual ResultType TryFastGetSurfaceView( - TExecutionContext exctx, GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, - const SurfaceParams& params, bool preserve_contents, - const std::vector>& overlaps) = 0; + virtual TView* TryFastGetSurfaceView( + GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, const SurfaceParams& params, + bool preserve_contents, const std::vector>& overlaps) = 0; virtual std::shared_ptr CreateSurface(const SurfaceParams& params) = 0; @@ -158,8 +150,7 @@ protected: Core::System& system; private: - ResultType GetSurfaceView(TExecutionContext exctx, GPUVAddr gpu_addr, - const SurfaceParams& params, bool preserve_contents) { + TView* GetSurfaceView(GPUVAddr gpu_addr, const SurfaceParams& params, bool preserve_contents) { auto& memory_manager{system.GPU().MemoryManager()}; const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; DEBUG_ASSERT(cpu_addr); @@ -168,18 +159,17 @@ private: const auto cache_addr{ToCacheAddr(host_ptr)}; auto overlaps{GetSurfacesInRegion(cache_addr, params.GetGuestSizeInBytes())}; if (overlaps.empty()) { - return LoadSurfaceView(exctx, gpu_addr, *cpu_addr, host_ptr, params, preserve_contents); + return LoadSurfaceView(gpu_addr, *cpu_addr, host_ptr, params, preserve_contents); } if (overlaps.size() == 1) { if (TView* view = overlaps[0]->TryGetView(gpu_addr, params); view) { - return {view, exctx}; + return view; } } - TView* fast_view; - std::tie(fast_view, exctx) = TryFastGetSurfaceView(exctx, gpu_addr, *cpu_addr, host_ptr, - params, preserve_contents, overlaps); + const auto fast_view{TryFastGetSurfaceView(gpu_addr, *cpu_addr, host_ptr, params, + preserve_contents, overlaps)}; if (!fast_view) { std::sort(overlaps.begin(), overlaps.end(), [](const auto& lhs, const auto& rhs) { @@ -191,44 +181,39 @@ private: if (!fast_view) { // Flush even when we don't care about the contents, to preserve memory not // written by the new surface. - exctx = FlushSurface(exctx, surface); + FlushSurface(surface); } Unregister(surface); } - if (fast_view) { - return {fast_view, exctx}; + return fast_view; } - return LoadSurfaceView(exctx, gpu_addr, *cpu_addr, host_ptr, params, preserve_contents); + return LoadSurfaceView(gpu_addr, *cpu_addr, host_ptr, params, preserve_contents); } - ResultType LoadSurfaceView(TExecutionContext exctx, GPUVAddr gpu_addr, VAddr cpu_addr, - u8* host_ptr, const SurfaceParams& params, bool preserve_contents) { + TView* LoadSurfaceView(GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, + const SurfaceParams& params, bool preserve_contents) { const auto new_surface{GetUncachedSurface(params)}; Register(new_surface, gpu_addr, cpu_addr, host_ptr); if (preserve_contents) { - exctx = LoadSurface(exctx, new_surface); + LoadSurface(new_surface); } - return {new_surface->GetView(gpu_addr, params), exctx}; + return new_surface->GetView(gpu_addr, params); } - TExecutionContext LoadSurface(TExecutionContext exctx, - const std::shared_ptr& surface) { + void LoadSurface(const std::shared_ptr& surface) { surface->LoadBuffer(); - exctx = surface->UploadTexture(exctx); + surface->UploadTexture(); surface->MarkAsModified(false); - return exctx; } - TExecutionContext FlushSurface(TExecutionContext exctx, - const std::shared_ptr& surface) { + void FlushSurface(const std::shared_ptr& surface) { if (!surface->IsModified()) { - return exctx; + return; } - exctx = surface->DownloadTexture(exctx); + surface->DownloadTexture(); surface->FlushBuffer(); - return exctx; } std::vector> GetSurfacesInRegion(CacheAddr cache_addr, -- cgit v1.2.3 From bc930754cc9437ddd86e7d246b3eb4302540896a Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 7 May 2019 10:57:16 -0400 Subject: Implement Texture Cache V2 --- src/video_core/texture_cache/texture_cache.h | 454 +++++++++++++++++++++------ 1 file changed, 366 insertions(+), 88 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index c5c01957a..eb0d9bc10 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -22,6 +22,7 @@ #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" #include "video_core/surface.h" +#include "video_core/texture_cache/copy_params.h" #include "video_core/texture_cache/surface_base.h" #include "video_core/texture_cache/surface_params.h" #include "video_core/texture_cache/surface_view.h" @@ -40,32 +41,42 @@ class RasterizerInterface; namespace VideoCommon { +using VideoCore::Surface::SurfaceTarget; +using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; + template class TextureCache { - using IntervalMap = boost::icl::interval_map>>; + using IntervalMap = boost::icl::interval_map>; using IntervalType = typename IntervalMap::interval_type; public: + void InitMemoryMananger(Tegra::MemoryManager& memory_manager) { + this->memory_manager = &memory_manager; + } + void InvalidateRegion(CacheAddr addr, std::size_t size) { for (const auto& surface : GetSurfacesInRegion(addr, size)) { - if (!surface->IsRegistered()) { - // Skip duplicates - continue; - } Unregister(surface); } } - TView* GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) { + void InvalidateRegionEx(GPUVAddr addr, std::size_t size) { + for (const auto& surface : GetSurfacesInRegionInner(addr, size)) { + Unregister(surface); + } + } + + TView GetTextureSurface(const Tegra::Texture::FullTextureInfo& config, + const VideoCommon::Shader::Sampler& entry) { const auto gpu_addr{config.tic.Address()}; if (!gpu_addr) { return {}; } - const auto params{SurfaceParams::CreateForTexture(system, config)}; - return GetSurfaceView(gpu_addr, params, true); + const auto params{SurfaceParams::CreateForTexture(system, config, entry)}; + return GetSurface(gpu_addr, params, true).second; } - TView* GetDepthBufferSurface(bool preserve_contents) { + TView GetDepthBufferSurface(bool preserve_contents) { const auto& regs{system.GPU().Maxwell3D().regs}; const auto gpu_addr{regs.zeta.Address()}; if (!gpu_addr || !regs.zeta_enable) { @@ -75,36 +86,75 @@ public: system, regs.zeta_width, regs.zeta_height, regs.zeta.format, regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height, regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; - return GetSurfaceView(gpu_addr, depth_params, preserve_contents); + auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents); + if (depth_buffer.target) + depth_buffer.target->MarkAsProtected(false); + if (depth_buffer.target) + depth_buffer.target->MarkAsProtected(true); + return surface_view.second; } - TView* GetColorBufferSurface(std::size_t index, bool preserve_contents) { + TView GetColorBufferSurface(std::size_t index, bool preserve_contents) { ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); const auto& regs{system.GPU().Maxwell3D().regs}; if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { + SetEmptyColorBuffer(index); return {}; } - auto& memory_manager{system.GPU().MemoryManager()}; - const auto& config{system.GPU().Maxwell3D().regs.rt[index]}; - const auto gpu_addr{config.Address() + - config.base_layer * config.layer_stride * sizeof(u32)}; + const auto& config{regs.rt[index]}; + const auto gpu_addr{config.Address()}; if (!gpu_addr) { + SetEmptyColorBuffer(index); return {}; } - return GetSurfaceView(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), - preserve_contents); + auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), + preserve_contents); + if (render_targets[index].target) + render_targets[index].target->MarkAsProtected(false); + render_targets[index].target = surface_view.first; + if (render_targets[index].target) + render_targets[index].target->MarkAsProtected(true); + return surface_view.second; + } + + void MarkColorBufferInUse(std::size_t index) { + if (render_targets[index].target) + render_targets[index].target->MarkAsModified(true, Tick()); } - TView* GetFermiSurface(const Tegra::Engines::Fermi2D::Regs::Surface& config) { - return GetSurfaceView(config.Address(), SurfaceParams::CreateForFermiCopySurface(config), - true); + void MarkDepthBufferInUse() { + if (depth_buffer.target) + depth_buffer.target->MarkAsModified(true, Tick()); } - std::shared_ptr TryFindFramebufferSurface(const u8* host_ptr) const { + void SetEmptyDepthBuffer() { + if (depth_buffer.target != nullptr) { + depth_buffer.target->MarkAsProtected(false); + depth_buffer.target = nullptr; + depth_buffer.view = nullptr; + } + } + + void SetEmptyColorBuffer(std::size_t index) { + if (render_targets[index].target != nullptr) { + render_targets[index].target->MarkAsProtected(false); + std::memset(&render_targets[index].config, sizeof(RenderTargetConfig), 0); + render_targets[index].target = nullptr; + render_targets[index].view = nullptr; + } + } + + TView GetFermiSurface(const Tegra::Engines::Fermi2D::Regs::Surface& config) { + SurfaceParams params = SurfaceParams::CreateForFermiCopySurface(config); + const GPUVAddr gpu_addr = config.Address(); + return GetSurface(gpu_addr, params, true).second; + } + + TSurface TryFindFramebufferSurface(const u8* host_ptr) const { const auto it{registered_surfaces.find(ToCacheAddr(host_ptr))}; return it != registered_surfaces.end() ? *it->second.begin() : nullptr; } @@ -115,126 +165,334 @@ public: protected: TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) - : system{system}, rasterizer{rasterizer} {} + : system{system}, rasterizer{rasterizer} { + for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { + SetEmptyColorBuffer(i); + } + SetEmptyDepthBuffer(); + } ~TextureCache() = default; - virtual TView* TryFastGetSurfaceView( - GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, const SurfaceParams& params, - bool preserve_contents, const std::vector>& overlaps) = 0; + virtual TSurface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) = 0; - virtual std::shared_ptr CreateSurface(const SurfaceParams& params) = 0; + virtual void ImageCopy(TSurface src_surface, TSurface dst_surface, + const CopyParams& copy_params) = 0; - void Register(std::shared_ptr surface, GPUVAddr gpu_addr, VAddr cpu_addr, - u8* host_ptr) { - surface->Register(gpu_addr, cpu_addr, host_ptr); - registered_surfaces.add({GetSurfaceInterval(surface), {surface}}); - rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), 1); + void Register(TSurface surface) { + const GPUVAddr gpu_addr = surface->GetGpuAddr(); + u8* host_ptr = memory_manager->GetPointer(gpu_addr); + const std::size_t size = surface->GetSizeInBytes(); + const std::optional cpu_addr = memory_manager->GpuToCpuAddress(gpu_addr); + if (!host_ptr || !cpu_addr) { + LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", + gpu_addr); + return; + } + surface->SetHostPtr(host_ptr); + surface->SetCpuAddr(*cpu_addr); + registered_surfaces.add({GetInterval(host_ptr, size), {surface}}); + rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); + RegisterInnerCache(surface); + surface->MarkAsRegistered(true); } - void Unregister(std::shared_ptr surface) { - registered_surfaces.subtract({GetSurfaceInterval(surface), {surface}}); - rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), -1); - surface->Unregister(); + void Unregister(TSurface surface) { + if (surface->IsProtected()) + return; + const GPUVAddr gpu_addr = surface->GetGpuAddr(); + const void* host_ptr = surface->GetHostPtr(); + const std::size_t size = surface->GetSizeInBytes(); + const VAddr cpu_addr = surface->GetCpuAddr(); + registered_surfaces.erase(GetInterval(host_ptr, size)); + rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); + UnregisterInnerCache(surface); + surface->MarkAsRegistered(false); + ReserveSurface(surface->GetSurfaceParams(), surface); } - std::shared_ptr GetUncachedSurface(const SurfaceParams& params) { - if (const auto surface = TryGetReservedSurface(params); surface) + TSurface GetUncachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { + if (const auto surface = TryGetReservedSurface(params); surface) { + surface->SetGpuAddr(gpu_addr); return surface; + } // No reserved surface available, create a new one and reserve it - auto new_surface{CreateSurface(params)}; - ReserveSurface(params, new_surface); + auto new_surface{CreateSurface(gpu_addr, params)}; return new_surface; } Core::System& system; private: - TView* GetSurfaceView(GPUVAddr gpu_addr, const SurfaceParams& params, bool preserve_contents) { - auto& memory_manager{system.GPU().MemoryManager()}; - const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; - DEBUG_ASSERT(cpu_addr); - - const auto host_ptr{memory_manager.GetPointer(gpu_addr)}; - const auto cache_addr{ToCacheAddr(host_ptr)}; - auto overlaps{GetSurfacesInRegion(cache_addr, params.GetGuestSizeInBytes())}; - if (overlaps.empty()) { - return LoadSurfaceView(gpu_addr, *cpu_addr, host_ptr, params, preserve_contents); + enum class RecycleStrategy : u32 { + Ignore = 0, + Flush = 1, + BufferCopy = 3, + }; + + RecycleStrategy PickStrategy(std::vector& overlaps, const SurfaceParams& params, + const GPUVAddr gpu_addr, const bool untopological) { + // Untopological decision + if (untopological) { + return RecycleStrategy::Ignore; + } + // 3D Textures decision + if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) { + return RecycleStrategy::Flush; } + for (auto s : overlaps) { + const auto& s_params = s->GetSurfaceParams(); + if (s_params.block_depth > 1 || s_params.target == SurfaceTarget::Texture3D) { + return RecycleStrategy::Flush; + } + } + return RecycleStrategy::Ignore; + } - if (overlaps.size() == 1) { - if (TView* view = overlaps[0]->TryGetView(gpu_addr, params); view) { - return view; + std::pair RecycleSurface(std::vector& overlaps, + const SurfaceParams& params, const GPUVAddr gpu_addr, + const u8* host_ptr, const bool preserve_contents, + const bool untopological) { + for (auto surface : overlaps) { + Unregister(surface); + } + RecycleStrategy strategy = !Settings::values.use_accurate_gpu_emulation + ? PickStrategy(overlaps, params, gpu_addr, untopological) + : RecycleStrategy::Flush; + switch (strategy) { + case RecycleStrategy::Ignore: { + return InitializeSurface(gpu_addr, params, preserve_contents); + } + case RecycleStrategy::Flush: { + std::sort(overlaps.begin(), overlaps.end(), + [](const TSurface& a, const TSurface& b) -> bool { + return a->GetModificationTick() < b->GetModificationTick(); + }); + for (auto surface : overlaps) { + FlushSurface(surface); } + return InitializeSurface(gpu_addr, params, preserve_contents); } + default: { + UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!"); + return InitializeSurface(gpu_addr, params, preserve_contents); + } + } + } - const auto fast_view{TryFastGetSurfaceView(gpu_addr, *cpu_addr, host_ptr, params, - preserve_contents, overlaps)}; + std::pair RebuildMirage(TSurface current_surface, + const SurfaceParams& params) { + const auto gpu_addr = current_surface->GetGpuAddr(); + TSurface new_surface = GetUncachedSurface(gpu_addr, params); + std::vector bricks = current_surface->BreakDown(); + for (auto& brick : bricks) { + ImageCopy(current_surface, new_surface, brick); + } + Unregister(current_surface); + Register(new_surface); + return {new_surface, new_surface->GetMainView()}; + } - if (!fast_view) { - std::sort(overlaps.begin(), overlaps.end(), [](const auto& lhs, const auto& rhs) { - return lhs->GetModificationTick() < rhs->GetModificationTick(); - }); + std::pair ManageStructuralMatch(TSurface current_surface, + const SurfaceParams& params) { + const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); + if (is_mirage) { + return RebuildMirage(current_surface, params); } + const bool matches_target = current_surface->MatchTarget(params.target); + if (matches_target) { + return {current_surface, current_surface->GetMainView()}; + } + return {current_surface, current_surface->EmplaceOverview(params)}; + } - for (const auto& surface : overlaps) { - if (!fast_view) { - // Flush even when we don't care about the contents, to preserve memory not - // written by the new surface. - FlushSurface(surface); + std::optional> ReconstructSurface(std::vector& overlaps, + const SurfaceParams& params, + const GPUVAddr gpu_addr, + const u8* host_ptr) { + if (!params.is_layered || params.target == SurfaceTarget::Texture3D) { + return {}; + } + TSurface new_surface = GetUncachedSurface(gpu_addr, params); + for (auto surface : overlaps) { + const SurfaceParams& src_params = surface->GetSurfaceParams(); + if (src_params.is_layered || src_params.num_levels > 1) { + // We send this cases to recycle as they are more complex to handle + return {}; + } + const std::size_t candidate_size = src_params.GetGuestSizeInBytes(); + auto mipmap_layer = new_surface->GetLayerMipmap(surface->GetGpuAddr()); + if (!mipmap_layer) { + return {}; } + const u32 layer = (*mipmap_layer).first; + const u32 mipmap = (*mipmap_layer).second; + if (new_surface->GetMipmapSize(mipmap) != candidate_size) { + return {}; + } + // Now we got all the data set up + CopyParams copy_params{}; + const u32 dst_width = params.GetMipWidth(mipmap); + const u32 dst_height = params.GetMipHeight(mipmap); + copy_params.width = std::min(src_params.width, dst_width); + copy_params.height = std::min(src_params.height, dst_height); + copy_params.depth = 1; + copy_params.source_level = 0; + copy_params.dest_level = mipmap; + copy_params.source_z = 0; + copy_params.dest_z = layer; + ImageCopy(surface, new_surface, copy_params); + } + for (auto surface : overlaps) { Unregister(surface); } - if (fast_view) { - return fast_view; + Register(new_surface); + return {{new_surface, new_surface->GetMainView()}}; + } + + std::pair GetSurface(const GPUVAddr gpu_addr, const SurfaceParams& params, + bool preserve_contents) { + + const auto host_ptr{memory_manager->GetPointer(gpu_addr)}; + const auto cache_addr{ToCacheAddr(host_ptr)}; + const std::size_t candidate_size = params.GetGuestSizeInBytes(); + auto overlaps{GetSurfacesInRegionInner(gpu_addr, candidate_size)}; + if (overlaps.empty()) { + return InitializeSurface(gpu_addr, params, preserve_contents); + } + + for (auto surface : overlaps) { + if (!surface->MatchesTopology(params)) { + return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, + true); + } } - return LoadSurfaceView(gpu_addr, *cpu_addr, host_ptr, params, preserve_contents); + if (overlaps.size() == 1) { + TSurface current_surface = overlaps[0]; + if (current_surface->MatchesStructure(params) && + current_surface->GetGpuAddr() == gpu_addr && + (params.target != SurfaceTarget::Texture3D || + current_surface->MatchTarget(params.target))) { + return ManageStructuralMatch(current_surface, params); + } + if (current_surface->GetSizeInBytes() <= candidate_size) { + return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, + false); + } + std::optional view = current_surface->EmplaceView(params, gpu_addr); + if (view.has_value()) { + const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); + if (is_mirage) { + LOG_CRITICAL(HW_GPU, "Mirage View Unsupported"); + return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, + false); + } + return {current_surface, *view}; + } + return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, false); + } else { + std::optional> view = + ReconstructSurface(overlaps, params, gpu_addr, host_ptr); + if (view.has_value()) { + return *view; + } + return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, false); + } } - TView* LoadSurfaceView(GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, - const SurfaceParams& params, bool preserve_contents) { - const auto new_surface{GetUncachedSurface(params)}; - Register(new_surface, gpu_addr, cpu_addr, host_ptr); + std::pair InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params, + bool preserve_contents) { + auto new_surface{GetUncachedSurface(gpu_addr, params)}; + Register(new_surface); if (preserve_contents) { LoadSurface(new_surface); } - return new_surface->GetView(gpu_addr, params); + return {new_surface, new_surface->GetMainView()}; } - void LoadSurface(const std::shared_ptr& surface) { - surface->LoadBuffer(); - surface->UploadTexture(); - surface->MarkAsModified(false); + void LoadSurface(const TSurface& surface) { + staging_buffer.resize(surface->GetHostSizeInBytes()); + surface->LoadBuffer(*memory_manager, staging_buffer); + surface->UploadTexture(staging_buffer); + surface->MarkAsModified(false, Tick()); } - void FlushSurface(const std::shared_ptr& surface) { + void FlushSurface(const TSurface& surface) { if (!surface->IsModified()) { return; } - surface->DownloadTexture(); - surface->FlushBuffer(); + staging_buffer.resize(surface->GetHostSizeInBytes()); + surface->DownloadTexture(staging_buffer); + surface->FlushBuffer(staging_buffer); + surface->MarkAsModified(false, Tick()); } - std::vector> GetSurfacesInRegion(CacheAddr cache_addr, - std::size_t size) const { + std::vector GetSurfacesInRegion(CacheAddr cache_addr, std::size_t size) const { if (size == 0) { return {}; } const IntervalType interval{cache_addr, cache_addr + size}; - std::vector> surfaces; + std::vector surfaces; for (auto& pair : boost::make_iterator_range(registered_surfaces.equal_range(interval))) { - surfaces.push_back(*pair.second.begin()); + for (auto& s : pair.second) { + if (!s || !s->IsRegistered()) { + continue; + } + surfaces.push_back(s); + } } return surfaces; } - void ReserveSurface(const SurfaceParams& params, std::shared_ptr surface) { + void RegisterInnerCache(TSurface& surface) { + GPUVAddr start = surface->GetGpuAddr() >> inner_cache_page_bits; + const GPUVAddr end = (surface->GetGpuAddrEnd() - 1) >> inner_cache_page_bits; + while (start <= end) { + inner_cache[start].push_back(surface); + start++; + } + } + + void UnregisterInnerCache(TSurface& surface) { + GPUVAddr start = surface->GetGpuAddr() >> inner_cache_page_bits; + const GPUVAddr end = (surface->GetGpuAddrEnd() - 1) >> inner_cache_page_bits; + while (start <= end) { + inner_cache[start].remove(surface); + start++; + } + } + + std::vector GetSurfacesInRegionInner(const GPUVAddr gpu_addr, const std::size_t size) { + if (size == 0) { + return {}; + } + const GPUVAddr gpu_addr_end = gpu_addr + size; + GPUVAddr start = gpu_addr >> inner_cache_page_bits; + const GPUVAddr end = (gpu_addr_end - 1) >> inner_cache_page_bits; + std::vector surfaces; + while (start <= end) { + std::list& list = inner_cache[start]; + for (auto& s : list) { + if (!s->IsPicked() && s->Overlaps(gpu_addr, gpu_addr_end)) { + s->MarkAsPicked(true); + surfaces.push_back(s); + } + } + start++; + } + for (auto& s : surfaces) { + s->MarkAsPicked(false); + } + return surfaces; + } + + void ReserveSurface(const SurfaceParams& params, TSurface surface) { surface_reserve[params].push_back(std::move(surface)); } - std::shared_ptr TryGetReservedSurface(const SurfaceParams& params) { + TSurface TryGetReservedSurface(const SurfaceParams& params) { auto search{surface_reserve.find(params)}; if (search == surface_reserve.end()) { return {}; @@ -247,21 +505,41 @@ private: return {}; } - IntervalType GetSurfaceInterval(std::shared_ptr surface) const { - return IntervalType::right_open(surface->GetCacheAddr(), - surface->GetCacheAddr() + surface->GetSizeInBytes()); + IntervalType GetInterval(const void* host_ptr, const std::size_t size) const { + const CacheAddr addr = ToCacheAddr(host_ptr); + return IntervalType::right_open(addr, addr + size); } + struct RenderInfo { + RenderTargetConfig config; + TSurface target; + TView view; + }; + + struct DepthBufferInfo { + TSurface target; + TView view; + }; + VideoCore::RasterizerInterface& rasterizer; + Tegra::MemoryManager* memory_manager; u64 ticks{}; IntervalMap registered_surfaces; + static constexpr u64 inner_cache_page_bits{20}; + static constexpr u64 inner_cache_page_size{1 << inner_cache_page_bits}; + std::unordered_map> inner_cache; + /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have /// previously been used. This is to prevent surfaces from being constantly created and /// destroyed when used with different surface parameters. - std::unordered_map>> surface_reserve; + std::unordered_map> surface_reserve; + std::array render_targets; + DepthBufferInfo depth_buffer; + + std::vector staging_buffer; }; } // namespace VideoCommon -- cgit v1.2.3 From b711cdce782ee604edc3c52628eb76e6b9a08b72 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 7 May 2019 13:58:37 -0400 Subject: Corrections to Structural Matching The texture will now be reconstructed if the width only matches on GoB alignment. --- src/video_core/texture_cache/texture_cache.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index eb0d9bc10..f3b28453a 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -284,7 +284,7 @@ private: const SurfaceParams& params) { const auto gpu_addr = current_surface->GetGpuAddr(); TSurface new_surface = GetUncachedSurface(gpu_addr, params); - std::vector bricks = current_surface->BreakDown(); + std::vector bricks = current_surface->BreakDown(params); for (auto& brick : bricks) { ImageCopy(current_surface, new_surface, brick); } @@ -370,11 +370,16 @@ private: if (overlaps.size() == 1) { TSurface current_surface = overlaps[0]; - if (current_surface->MatchesStructure(params) && + MatchStructureResult s_result = current_surface->MatchesStructure(params); + if (s_result != MatchStructureResult::None && current_surface->GetGpuAddr() == gpu_addr && (params.target != SurfaceTarget::Texture3D || current_surface->MatchTarget(params.target))) { - return ManageStructuralMatch(current_surface, params); + if (s_result == MatchStructureResult::FullMatch) { + return ManageStructuralMatch(current_surface, params); + } else { + return RebuildMirage(current_surface, params); + } } if (current_surface->GetSizeInBytes() <= candidate_size) { return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, -- cgit v1.2.3 From d86f9cd70910d4b96ec301e7d532b11d18a290a4 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 7 May 2019 17:30:36 -0400 Subject: Change texture_cache chaching from GPUAddr to CacheAddr This also reverses the changes to make invalidation and flushing through the GPU address. --- src/video_core/texture_cache/texture_cache.h | 102 +++++++++++---------------- 1 file changed, 41 insertions(+), 61 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index f3b28453a..43aaec011 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -60,12 +60,6 @@ public: } } - void InvalidateRegionEx(GPUVAddr addr, std::size_t size) { - for (const auto& surface : GetSurfacesInRegionInner(addr, size)) { - Unregister(surface); - } - } - TView GetTextureSurface(const Tegra::Texture::FullTextureInfo& config, const VideoCommon::Shader::Sampler& entry) { const auto gpu_addr{config.tic.Address()}; @@ -154,9 +148,19 @@ public: return GetSurface(gpu_addr, params, true).second; } - TSurface TryFindFramebufferSurface(const u8* host_ptr) const { - const auto it{registered_surfaces.find(ToCacheAddr(host_ptr))}; - return it != registered_surfaces.end() ? *it->second.begin() : nullptr; + TSurface TryFindFramebufferSurface(const u8* host_ptr) { + const CacheAddr cache_addr = ToCacheAddr(host_ptr); + if (!cache_addr) { + return nullptr; + } + const CacheAddr page = cache_addr >> registry_page_bits; + std::list& list = registry[page]; + for (auto& s : list) { + if (s->GetCacheAddr() == cache_addr) { + return s; + } + } + return nullptr; } u64 Tick() { @@ -181,30 +185,28 @@ protected: void Register(TSurface surface) { const GPUVAddr gpu_addr = surface->GetGpuAddr(); - u8* host_ptr = memory_manager->GetPointer(gpu_addr); + const CacheAddr cache_ptr = ToCacheAddr(memory_manager->GetPointer(gpu_addr)); const std::size_t size = surface->GetSizeInBytes(); const std::optional cpu_addr = memory_manager->GpuToCpuAddress(gpu_addr); - if (!host_ptr || !cpu_addr) { + if (!cache_ptr || !cpu_addr) { LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", gpu_addr); return; } - surface->SetHostPtr(host_ptr); + surface->SetCacheAddr(cache_ptr); surface->SetCpuAddr(*cpu_addr); - registered_surfaces.add({GetInterval(host_ptr, size), {surface}}); - rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); RegisterInnerCache(surface); surface->MarkAsRegistered(true); + rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); } void Unregister(TSurface surface) { if (surface->IsProtected()) return; const GPUVAddr gpu_addr = surface->GetGpuAddr(); - const void* host_ptr = surface->GetHostPtr(); + const CacheAddr cache_ptr = surface->GetCacheAddr(); const std::size_t size = surface->GetSizeInBytes(); const VAddr cpu_addr = surface->GetCpuAddr(); - registered_surfaces.erase(GetInterval(host_ptr, size)); rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); UnregisterInnerCache(surface); surface->MarkAsRegistered(false); @@ -280,7 +282,7 @@ private: } } - std::pair RebuildMirage(TSurface current_surface, + std::pair RebuildSurface(TSurface current_surface, const SurfaceParams& params) { const auto gpu_addr = current_surface->GetGpuAddr(); TSurface new_surface = GetUncachedSurface(gpu_addr, params); @@ -297,7 +299,7 @@ private: const SurfaceParams& params) { const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); if (is_mirage) { - return RebuildMirage(current_surface, params); + return RebuildSurface(current_surface, params); } const bool matches_target = current_surface->MatchTarget(params.target); if (matches_target) { @@ -356,7 +358,7 @@ private: const auto host_ptr{memory_manager->GetPointer(gpu_addr)}; const auto cache_addr{ToCacheAddr(host_ptr)}; const std::size_t candidate_size = params.GetGuestSizeInBytes(); - auto overlaps{GetSurfacesInRegionInner(gpu_addr, candidate_size)}; + auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)}; if (overlaps.empty()) { return InitializeSurface(gpu_addr, params, preserve_contents); } @@ -378,7 +380,7 @@ private: if (s_result == MatchStructureResult::FullMatch) { return ManageStructuralMatch(current_surface, params); } else { - return RebuildMirage(current_surface, params); + return RebuildSurface(current_surface, params); } } if (current_surface->GetSizeInBytes() <= candidate_size) { @@ -429,58 +431,40 @@ private: } staging_buffer.resize(surface->GetHostSizeInBytes()); surface->DownloadTexture(staging_buffer); - surface->FlushBuffer(staging_buffer); + surface->FlushBuffer(*memory_manager, staging_buffer); surface->MarkAsModified(false, Tick()); } - std::vector GetSurfacesInRegion(CacheAddr cache_addr, std::size_t size) const { - if (size == 0) { - return {}; - } - const IntervalType interval{cache_addr, cache_addr + size}; - - std::vector surfaces; - for (auto& pair : boost::make_iterator_range(registered_surfaces.equal_range(interval))) { - for (auto& s : pair.second) { - if (!s || !s->IsRegistered()) { - continue; - } - surfaces.push_back(s); - } - } - return surfaces; - } - void RegisterInnerCache(TSurface& surface) { - GPUVAddr start = surface->GetGpuAddr() >> inner_cache_page_bits; - const GPUVAddr end = (surface->GetGpuAddrEnd() - 1) >> inner_cache_page_bits; + CacheAddr start = surface->GetCacheAddr() >> registry_page_bits; + const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; while (start <= end) { - inner_cache[start].push_back(surface); + registry[start].push_back(surface); start++; } } void UnregisterInnerCache(TSurface& surface) { - GPUVAddr start = surface->GetGpuAddr() >> inner_cache_page_bits; - const GPUVAddr end = (surface->GetGpuAddrEnd() - 1) >> inner_cache_page_bits; + CacheAddr start = surface->GetCacheAddr() >> registry_page_bits; + const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; while (start <= end) { - inner_cache[start].remove(surface); + registry[start].remove(surface); start++; } } - std::vector GetSurfacesInRegionInner(const GPUVAddr gpu_addr, const std::size_t size) { + std::vector GetSurfacesInRegion(const CacheAddr cache_addr, const std::size_t size) { if (size == 0) { return {}; } - const GPUVAddr gpu_addr_end = gpu_addr + size; - GPUVAddr start = gpu_addr >> inner_cache_page_bits; - const GPUVAddr end = (gpu_addr_end - 1) >> inner_cache_page_bits; + const CacheAddr cache_addr_end = cache_addr + size; + CacheAddr start = cache_addr >> registry_page_bits; + const CacheAddr end = (cache_addr_end - 1) >> registry_page_bits; std::vector surfaces; while (start <= end) { - std::list& list = inner_cache[start]; + std::list& list = registry[start]; for (auto& s : list) { - if (!s->IsPicked() && s->Overlaps(gpu_addr, gpu_addr_end)) { + if (!s->IsPicked() && s->Overlaps(cache_addr, cache_addr_end)) { s->MarkAsPicked(true); surfaces.push_back(s); } @@ -510,11 +494,6 @@ private: return {}; } - IntervalType GetInterval(const void* host_ptr, const std::size_t size) const { - const CacheAddr addr = ToCacheAddr(host_ptr); - return IntervalType::right_open(addr, addr + size); - } - struct RenderInfo { RenderTargetConfig config; TSurface target; @@ -531,11 +510,12 @@ private: u64 ticks{}; - IntervalMap registered_surfaces; - - static constexpr u64 inner_cache_page_bits{20}; - static constexpr u64 inner_cache_page_size{1 << inner_cache_page_bits}; - std::unordered_map> inner_cache; + // The internal Cache is different for the Texture Cache. It's based on buckets + // of 1MB. This fits better for the purpose of this cache as textures are normaly + // large in size. + static constexpr u64 registry_page_bits{20}; + static constexpr u64 registry_page_size{1 << registry_page_bits}; + std::unordered_map> registry; /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have /// previously been used. This is to prevent surfaces from being constantly created and -- cgit v1.2.3 From 03d10ea3b420c923c14a11c86b47e2f00bc30e00 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 7 May 2019 21:28:31 -0300 Subject: copy_params: Use constructor instead of C-like initialization --- src/video_core/texture_cache/texture_cache.h | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 43aaec011..c9a648bbd 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -283,7 +283,7 @@ private: } std::pair RebuildSurface(TSurface current_surface, - const SurfaceParams& params) { + const SurfaceParams& params) { const auto gpu_addr = current_surface->GetGpuAddr(); TSurface new_surface = GetUncachedSurface(gpu_addr, params); std::vector bricks = current_surface->BreakDown(params); @@ -323,26 +323,21 @@ private: return {}; } const std::size_t candidate_size = src_params.GetGuestSizeInBytes(); - auto mipmap_layer = new_surface->GetLayerMipmap(surface->GetGpuAddr()); + auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; if (!mipmap_layer) { return {}; } - const u32 layer = (*mipmap_layer).first; - const u32 mipmap = (*mipmap_layer).second; + const u32 layer{mipmap_layer->first}; + const u32 mipmap{mipmap_layer->second}; if (new_surface->GetMipmapSize(mipmap) != candidate_size) { return {}; } // Now we got all the data set up - CopyParams copy_params{}; - const u32 dst_width = params.GetMipWidth(mipmap); - const u32 dst_height = params.GetMipHeight(mipmap); - copy_params.width = std::min(src_params.width, dst_width); - copy_params.height = std::min(src_params.height, dst_height); - copy_params.depth = 1; - copy_params.source_level = 0; - copy_params.dest_level = mipmap; - copy_params.source_z = 0; - copy_params.dest_z = layer; + const u32 dst_width{params.GetMipWidth(mipmap)}; + const u32 dst_height{params.GetMipHeight(mipmap)}; + const CopyParams copy_params(0, 0, 0, 0, 0, layer, 0, mipmap, + std::min(src_params.width, dst_width), + std::min(src_params.height, dst_height), 1); ImageCopy(surface, new_surface, copy_params); } for (auto surface : overlaps) { -- cgit v1.2.3 From 324e470879e63423844a687f7d675a0536006f07 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 7 May 2019 23:13:05 -0400 Subject: Texture Cache: Implement Blitting and Fermi Copies --- src/video_core/texture_cache/texture_cache.h | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index c9a648bbd..bb5a50ab9 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -15,6 +15,7 @@ #include "common/assert.h" #include "common/common_types.h" +#include "common/math_util.h" #include "core/memory.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/maxwell_3d.h" @@ -142,10 +143,11 @@ public: } } - TView GetFermiSurface(const Tegra::Engines::Fermi2D::Regs::Surface& config) { - SurfaceParams params = SurfaceParams::CreateForFermiCopySurface(config); - const GPUVAddr gpu_addr = config.Address(); - return GetSurface(gpu_addr, params, true).second; + void DoFermiCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, + const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, + const Common::Rectangle& src_rect, + const Common::Rectangle& dst_rect) { + ImageBlit(GetFermiSurface(src_config), GetFermiSurface(dst_config), src_rect, dst_rect); } TSurface TryFindFramebufferSurface(const u8* host_ptr) { @@ -183,6 +185,9 @@ protected: virtual void ImageCopy(TSurface src_surface, TSurface dst_surface, const CopyParams& copy_params) = 0; + virtual void ImageBlit(TSurface src, TSurface dst, const Common::Rectangle& src_rect, + const Common::Rectangle& dst_rect) = 0; + void Register(TSurface surface) { const GPUVAddr gpu_addr = surface->GetGpuAddr(); const CacheAddr cache_ptr = ToCacheAddr(memory_manager->GetPointer(gpu_addr)); @@ -223,6 +228,12 @@ protected: return new_surface; } + TSurface GetFermiSurface(const Tegra::Engines::Fermi2D::Regs::Surface& config) { + SurfaceParams params = SurfaceParams::CreateForFermiCopySurface(config); + const GPUVAddr gpu_addr = config.Address(); + return GetSurface(gpu_addr, params, true).first; + } + Core::System& system; private: -- cgit v1.2.3 From de0b1cb2b2199bd8efff78938d385fa74652cdfb Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 8 May 2019 07:09:02 -0400 Subject: Fixes to mipmap's process and reconstruct process --- src/video_core/texture_cache/texture_cache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index bb5a50ab9..554b9a228 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -323,7 +323,7 @@ private: const SurfaceParams& params, const GPUVAddr gpu_addr, const u8* host_ptr) { - if (!params.is_layered || params.target == SurfaceTarget::Texture3D) { + if (params.target == SurfaceTarget::Texture3D) { return {}; } TSurface new_surface = GetUncachedSurface(gpu_addr, params); -- cgit v1.2.3 From ba677ccb5a8ae0c889751fcdd40b0c9e818ad992 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 8 May 2019 10:32:30 -0400 Subject: texture_cache: Implement guest flushing --- src/video_core/texture_cache/texture_cache.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 554b9a228..422bf3e58 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -61,6 +61,20 @@ public: } } + void FlushRegion(CacheAddr addr, std::size_t size) { + auto surfaces = GetSurfacesInRegion(addr, size); + if (surfaces.empty()) { + return; + } + std::sort(surfaces.begin(), surfaces.end(), + [](const TSurface& a, const TSurface& b) -> bool { + return a->GetModificationTick() < b->GetModificationTick(); + }); + for (const auto& surface : surfaces) { + FlushSurface(surface); + } + } + TView GetTextureSurface(const Tegra::Texture::FullTextureInfo& config, const VideoCommon::Shader::Sampler& entry) { const auto gpu_addr{config.tic.Address()}; -- cgit v1.2.3 From 4e2071b6d9b414fa0152deb5e9d55674d636afe4 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 8 May 2019 17:45:59 -0400 Subject: texture_cache: Correct premature texceptions Due to our current infrastructure, it is possible for a mipmap to be set on as a render target before a texception of that mipmap's superset be set afterwards. This is problematic as we rely on texture views to set up texceptions and protecting render targets targets for 3D texture rendering. One simple solution is to configure framebuffers after texture setup but this brings other problems. This solution, forces a reconfiguration of the framebuffers after such event happens. --- src/video_core/texture_cache/texture_cache.h | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 422bf3e58..96d108147 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -120,6 +120,10 @@ public: return {}; } + if (regs.color_mask[index].raw == 0) { + return {}; + } + auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), preserve_contents); if (render_targets[index].target) @@ -183,6 +187,12 @@ public: return ++ticks; } + bool ConsumeReconfigurationFlag() { + const bool result = force_reconfiguration; + force_reconfiguration = false; + return result; + } + protected: TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) : system{system}, rasterizer{rasterizer} { @@ -219,9 +229,10 @@ protected: rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); } - void Unregister(TSurface surface) { - if (surface->IsProtected()) + void Unregister(TSurface surface, const bool force_unregister = false) { + if (surface->IsProtected() && !force_unregister) { return; + } const GPUVAddr gpu_addr = surface->GetGpuAddr(); const CacheAddr cache_ptr = surface->GetCacheAddr(); const std::size_t size = surface->GetSizeInBytes(); @@ -365,8 +376,10 @@ private: std::min(src_params.height, dst_height), 1); ImageCopy(surface, new_surface, copy_params); } + force_reconfiguration = false; for (auto surface : overlaps) { - Unregister(surface); + force_reconfiguration |= surface->IsProtected(); + Unregister(surface, true); } Register(new_surface); return {{new_surface, new_surface->GetMainView()}}; @@ -379,6 +392,7 @@ private: const auto cache_addr{ToCacheAddr(host_ptr)}; const std::size_t candidate_size = params.GetGuestSizeInBytes(); auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)}; + if (overlaps.empty()) { return InitializeSurface(gpu_addr, params, preserve_contents); } @@ -403,7 +417,7 @@ private: return RebuildSurface(current_surface, params); } } - if (current_surface->GetSizeInBytes() <= candidate_size) { + if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, false); } @@ -530,6 +544,10 @@ private: u64 ticks{}; + // Sometimes Setup Textures can hit a surface that's on the render target, when this happens + // we force a reconfiguration of the frame buffer after setup. + bool force_reconfiguration; + // The internal Cache is different for the Texture Cache. It's based on buckets // of 1MB. This fits better for the purpose of this cache as textures are normaly // large in size. -- cgit v1.2.3 From b347543e8341ae323ea232d47df2c144fe21c739 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 8 May 2019 18:27:29 -0400 Subject: Reduce amount of size calculations. --- src/video_core/texture_cache/texture_cache.h | 40 ++++++++++++++-------------- 1 file changed, 20 insertions(+), 20 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 96d108147..fbfd1ff0b 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -120,10 +120,6 @@ public: return {}; } - if (regs.color_mask[index].raw == 0) { - return {}; - } - auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), preserve_contents); if (render_targets[index].target) @@ -165,7 +161,9 @@ public: const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, const Common::Rectangle& src_rect, const Common::Rectangle& dst_rect) { - ImageBlit(GetFermiSurface(src_config), GetFermiSurface(dst_config), src_rect, dst_rect); + TSurface dst_surface = GetFermiSurface(dst_config); + ImageBlit(GetFermiSurface(src_config), dst_surface, src_rect, dst_rect); + dst_surface->MarkAsModified(true, Tick()); } TSurface TryFindFramebufferSurface(const u8* host_ptr) { @@ -270,10 +268,6 @@ private: RecycleStrategy PickStrategy(std::vector& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, const bool untopological) { - // Untopological decision - if (untopological) { - return RecycleStrategy::Ignore; - } // 3D Textures decision if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) { return RecycleStrategy::Flush; @@ -284,12 +278,16 @@ private: return RecycleStrategy::Flush; } } + // Untopological decision + if (untopological) { + return RecycleStrategy::Ignore; + } return RecycleStrategy::Ignore; } std::pair RecycleSurface(std::vector& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, - const u8* host_ptr, const bool preserve_contents, + const bool preserve_contents, const bool untopological) { for (auto surface : overlaps) { Unregister(surface); @@ -328,6 +326,7 @@ private: } Unregister(current_surface); Register(new_surface); + new_surface->MarkAsModified(current_surface->IsModified(), Tick()); return {new_surface, new_surface->GetMainView()}; } @@ -351,6 +350,7 @@ private: if (params.target == SurfaceTarget::Texture3D) { return {}; } + bool modified = false; TSurface new_surface = GetUncachedSurface(gpu_addr, params); for (auto surface : overlaps) { const SurfaceParams& src_params = surface->GetSurfaceParams(); @@ -358,7 +358,7 @@ private: // We send this cases to recycle as they are more complex to handle return {}; } - const std::size_t candidate_size = src_params.GetGuestSizeInBytes(); + const std::size_t candidate_size = surface->GetSizeInBytes(); auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; if (!mipmap_layer) { return {}; @@ -368,6 +368,7 @@ private: if (new_surface->GetMipmapSize(mipmap) != candidate_size) { return {}; } + modified |= surface->IsModified(); // Now we got all the data set up const u32 dst_width{params.GetMipWidth(mipmap)}; const u32 dst_height{params.GetMipHeight(mipmap)}; @@ -381,6 +382,7 @@ private: force_reconfiguration |= surface->IsProtected(); Unregister(surface, true); } + new_surface->MarkAsModified(modified, Tick()); Register(new_surface); return {{new_surface, new_surface->GetMainView()}}; } @@ -399,8 +401,7 @@ private: for (auto surface : overlaps) { if (!surface->MatchesTopology(params)) { - return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, - true); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, true); } } @@ -418,27 +419,26 @@ private: } } if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { - return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, - false); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } - std::optional view = current_surface->EmplaceView(params, gpu_addr); + std::optional view = + current_surface->EmplaceView(params, gpu_addr, candidate_size); if (view.has_value()) { const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); if (is_mirage) { LOG_CRITICAL(HW_GPU, "Mirage View Unsupported"); - return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, - false); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } return {current_surface, *view}; } - return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, false); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } else { std::optional> view = ReconstructSurface(overlaps, params, gpu_addr, host_ptr); if (view.has_value()) { return *view; } - return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, false); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } } -- cgit v1.2.3 From 28d7c2f5a5089051410d37a03d5a4a42e4230842 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 10 May 2019 01:10:16 -0300 Subject: texture_cache: Change internal cache from lists to vectors --- src/video_core/texture_cache/texture_cache.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index fbfd1ff0b..1c2b63dae 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -4,11 +4,11 @@ #pragma once -#include #include #include #include #include +#include #include #include @@ -172,7 +172,7 @@ public: return nullptr; } const CacheAddr page = cache_addr >> registry_page_bits; - std::list& list = registry[page]; + std::vector& list = registry[page]; for (auto& s : list) { if (s->GetCacheAddr() == cache_addr) { return s; @@ -482,7 +482,8 @@ private: CacheAddr start = surface->GetCacheAddr() >> registry_page_bits; const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; while (start <= end) { - registry[start].remove(surface); + auto& reg{registry[start]}; + reg.erase(std::find(reg.begin(), reg.end(), surface)); start++; } } @@ -496,7 +497,7 @@ private: const CacheAddr end = (cache_addr_end - 1) >> registry_page_bits; std::vector surfaces; while (start <= end) { - std::list& list = registry[start]; + std::vector& list = registry[start]; for (auto& s : list) { if (!s->IsPicked() && s->Overlaps(cache_addr, cache_addr_end)) { s->MarkAsPicked(true); @@ -553,12 +554,12 @@ private: // large in size. static constexpr u64 registry_page_bits{20}; static constexpr u64 registry_page_size{1 << registry_page_bits}; - std::unordered_map> registry; + std::unordered_map> registry; /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have /// previously been used. This is to prevent surfaces from being constantly created and /// destroyed when used with different surface parameters. - std::unordered_map> surface_reserve; + std::unordered_map> surface_reserve; std::array render_targets; DepthBufferInfo depth_buffer; -- cgit v1.2.3 From 345e73f2feb0701e3c3099d002a1c21fb524eae4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 10 May 2019 04:17:48 -0300 Subject: video_core: Use un-shifted block sizes to avoid integer divisions Instead of storing all block width, height and depths in their shifted form: block_width = 1U << block_shift; Store them like they are provided by the emulated hardware (their block_shift form). This way we can avoid doing the costly Common::AlignUp operation to align texture sizes and drop CPU integer divisions with bitwise logic (defined in Common::AlignBits). --- src/video_core/texture_cache/texture_cache.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 1c2b63dae..f35d0c88f 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -81,6 +81,9 @@ public: if (!gpu_addr) { return {}; } + if (gpu_addr == 0x1b7ec0000) { + // __debugbreak(); + } const auto params{SurfaceParams::CreateForTexture(system, config, entry)}; return GetSurface(gpu_addr, params, true).second; } -- cgit v1.2.3 From a4a58be2d46e95df4cead2916b6efbd658a0deaa Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 10 May 2019 17:59:18 -0400 Subject: texture_cache: Implement L1_Inner_cache --- src/video_core/texture_cache/texture_cache.h | 43 +++++++++++++++++++--------- 1 file changed, 30 insertions(+), 13 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index f35d0c88f..ad0fbd7ce 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -395,6 +395,26 @@ private: const auto host_ptr{memory_manager->GetPointer(gpu_addr)}; const auto cache_addr{ToCacheAddr(host_ptr)}; + + if (l1_cache.count(cache_addr) > 0) { + TSurface current_surface = l1_cache[cache_addr]; + if (!current_surface->MatchesTopology(params)) { + std::vector overlaps{current_surface}; + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, true); + } + MatchStructureResult s_result = current_surface->MatchesStructure(params); + if (s_result != MatchStructureResult::None && + current_surface->GetGpuAddr() == gpu_addr && + (params.target != SurfaceTarget::Texture3D || + current_surface->MatchTarget(params.target))) { + if (s_result == MatchStructureResult::FullMatch) { + return ManageStructuralMatch(current_surface, params); + } else { + return RebuildSurface(current_surface, params); + } + } + } + const std::size_t candidate_size = params.GetGuestSizeInBytes(); auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)}; @@ -410,17 +430,6 @@ private: if (overlaps.size() == 1) { TSurface current_surface = overlaps[0]; - MatchStructureResult s_result = current_surface->MatchesStructure(params); - if (s_result != MatchStructureResult::None && - current_surface->GetGpuAddr() == gpu_addr && - (params.target != SurfaceTarget::Texture3D || - current_surface->MatchTarget(params.target))) { - if (s_result == MatchStructureResult::FullMatch) { - return ManageStructuralMatch(current_surface, params); - } else { - return RebuildSurface(current_surface, params); - } - } if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } @@ -473,8 +482,10 @@ private: } void RegisterInnerCache(TSurface& surface) { - CacheAddr start = surface->GetCacheAddr() >> registry_page_bits; + const CacheAddr cache_addr = surface->GetCacheAddr(); + CacheAddr start = cache_addr >> registry_page_bits; const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; + l1_cache[cache_addr] = surface; while (start <= end) { registry[start].push_back(surface); start++; @@ -482,8 +493,10 @@ private: } void UnregisterInnerCache(TSurface& surface) { - CacheAddr start = surface->GetCacheAddr() >> registry_page_bits; + const CacheAddr cache_addr = surface->GetCacheAddr(); + CacheAddr start = cache_addr >> registry_page_bits; const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; + l1_cache.erase(cache_addr); while (start <= end) { auto& reg{registry[start]}; reg.erase(std::find(reg.begin(), reg.end(), surface)); @@ -559,6 +572,10 @@ private: static constexpr u64 registry_page_size{1 << registry_page_bits}; std::unordered_map> registry; + // The L1 Cache is used for fast texture lookup before checking the overlaps + // This avoids calculating size and other stuffs. + std::unordered_map l1_cache; + /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have /// previously been used. This is to prevent surfaces from being constantly created and /// destroyed when used with different surface parameters. -- cgit v1.2.3 From 5192521dc3f752c385de356158706899f523e498 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 10 May 2019 22:26:46 -0400 Subject: texture_cache: Implement GPU Dirty Flags --- src/video_core/texture_cache/texture_cache.h | 37 +++++++++++++++++----------- 1 file changed, 22 insertions(+), 15 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index ad0fbd7ce..8aa0d6515 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -81,17 +81,22 @@ public: if (!gpu_addr) { return {}; } - if (gpu_addr == 0x1b7ec0000) { - // __debugbreak(); - } const auto params{SurfaceParams::CreateForTexture(system, config, entry)}; return GetSurface(gpu_addr, params, true).second; } TView GetDepthBufferSurface(bool preserve_contents) { - const auto& regs{system.GPU().Maxwell3D().regs}; + auto& maxwell3d = system.GPU().Maxwell3D(); + + if (!maxwell3d.dirty_flags.zeta_buffer) { + return depth_buffer.view; + } + maxwell3d.dirty_flags.zeta_buffer = false; + + const auto& regs{maxwell3d.regs}; const auto gpu_addr{regs.zeta.Address()}; if (!gpu_addr || !regs.zeta_enable) { + SetEmptyDepthBuffer(); return {}; } const auto depth_params{SurfaceParams::CreateForDepthBuffer( @@ -101,6 +106,8 @@ public: auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents); if (depth_buffer.target) depth_buffer.target->MarkAsProtected(false); + depth_buffer.target = surface_view.first; + depth_buffer.view = surface_view.second; if (depth_buffer.target) depth_buffer.target->MarkAsProtected(true); return surface_view.second; @@ -108,8 +115,13 @@ public: TView GetColorBufferSurface(std::size_t index, bool preserve_contents) { ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); + auto& maxwell3d = system.GPU().Maxwell3D(); + if (!maxwell3d.dirty_flags.color_buffer[index]) { + return render_targets[index].view; + } + maxwell3d.dirty_flags.color_buffer.reset(index); - const auto& regs{system.GPU().Maxwell3D().regs}; + const auto& regs{maxwell3d.regs}; if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { SetEmptyColorBuffer(index); @@ -128,6 +140,7 @@ public: if (render_targets[index].target) render_targets[index].target->MarkAsProtected(false); render_targets[index].target = surface_view.first; + render_targets[index].view = surface_view.second; if (render_targets[index].target) render_targets[index].target->MarkAsProtected(true); return surface_view.second; @@ -154,7 +167,6 @@ public: void SetEmptyColorBuffer(std::size_t index) { if (render_targets[index].target != nullptr) { render_targets[index].target->MarkAsProtected(false); - std::memset(&render_targets[index].config, sizeof(RenderTargetConfig), 0); render_targets[index].target = nullptr; render_targets[index].view = nullptr; } @@ -545,13 +557,7 @@ private: return {}; } - struct RenderInfo { - RenderTargetConfig config; - TSurface target; - TView view; - }; - - struct DepthBufferInfo { + struct FramebufferTargetInfo { TSurface target; TView view; }; @@ -580,8 +586,9 @@ private: /// previously been used. This is to prevent surfaces from being constantly created and /// destroyed when used with different surface parameters. std::unordered_map> surface_reserve; - std::array render_targets; - DepthBufferInfo depth_buffer; + std::array + render_targets; + FramebufferTargetInfo depth_buffer; std::vector staging_buffer; }; -- cgit v1.2.3 From 1bbc9debfbcbd960874e2f877604506d174f613c Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 10 May 2019 23:42:08 -0400 Subject: Remove Framebuffer reconfiguration and restrict rendertarget protection --- src/video_core/texture_cache/texture_cache.h | 30 +++++++++------------------- 1 file changed, 9 insertions(+), 21 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 8aa0d6515..4ac5668c8 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -105,11 +105,11 @@ public: regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents); if (depth_buffer.target) - depth_buffer.target->MarkAsProtected(false); + depth_buffer.target->MarkAsRenderTarget(false); depth_buffer.target = surface_view.first; depth_buffer.view = surface_view.second; if (depth_buffer.target) - depth_buffer.target->MarkAsProtected(true); + depth_buffer.target->MarkAsRenderTarget(true); return surface_view.second; } @@ -138,11 +138,11 @@ public: auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), preserve_contents); if (render_targets[index].target) - render_targets[index].target->MarkAsProtected(false); + render_targets[index].target->MarkAsRenderTarget(false); render_targets[index].target = surface_view.first; render_targets[index].view = surface_view.second; if (render_targets[index].target) - render_targets[index].target->MarkAsProtected(true); + render_targets[index].target->MarkAsRenderTarget(true); return surface_view.second; } @@ -158,7 +158,7 @@ public: void SetEmptyDepthBuffer() { if (depth_buffer.target != nullptr) { - depth_buffer.target->MarkAsProtected(false); + depth_buffer.target->MarkAsRenderTarget(false); depth_buffer.target = nullptr; depth_buffer.view = nullptr; } @@ -166,7 +166,7 @@ public: void SetEmptyColorBuffer(std::size_t index) { if (render_targets[index].target != nullptr) { - render_targets[index].target->MarkAsProtected(false); + render_targets[index].target->MarkAsRenderTarget(false); render_targets[index].target = nullptr; render_targets[index].view = nullptr; } @@ -200,12 +200,6 @@ public: return ++ticks; } - bool ConsumeReconfigurationFlag() { - const bool result = force_reconfiguration; - force_reconfiguration = false; - return result; - } - protected: TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) : system{system}, rasterizer{rasterizer} { @@ -242,8 +236,8 @@ protected: rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); } - void Unregister(TSurface surface, const bool force_unregister = false) { - if (surface->IsProtected() && !force_unregister) { + void Unregister(TSurface surface) { + if (surface->IsProtected()) { return; } const GPUVAddr gpu_addr = surface->GetGpuAddr(); @@ -392,10 +386,8 @@ private: std::min(src_params.height, dst_height), 1); ImageCopy(surface, new_surface, copy_params); } - force_reconfiguration = false; for (auto surface : overlaps) { - force_reconfiguration |= surface->IsProtected(); - Unregister(surface, true); + Unregister(surface); } new_surface->MarkAsModified(modified, Tick()); Register(new_surface); @@ -567,10 +559,6 @@ private: u64 ticks{}; - // Sometimes Setup Textures can hit a surface that's on the render target, when this happens - // we force a reconfiguration of the frame buffer after setup. - bool force_reconfiguration; - // The internal Cache is different for the Texture Cache. It's based on buckets // of 1MB. This fits better for the purpose of this cache as textures are normaly // large in size. -- cgit v1.2.3 From 07cc7e0c12143a84744abb8dc03eb46eb615b308 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 10 May 2019 23:50:01 -0400 Subject: texture_cache: Add ASync Protections --- src/video_core/texture_cache/texture_cache.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 4ac5668c8..1b8ada910 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -5,6 +5,7 @@ #pragma once #include +#include #include #include #include @@ -56,12 +57,16 @@ public: } void InvalidateRegion(CacheAddr addr, std::size_t size) { + std::lock_guard lock{mutex}; + for (const auto& surface : GetSurfacesInRegion(addr, size)) { Unregister(surface); } } void FlushRegion(CacheAddr addr, std::size_t size) { + std::lock_guard lock{mutex}; + auto surfaces = GetSurfacesInRegion(addr, size); if (surfaces.empty()) { return; @@ -220,6 +225,8 @@ protected: const Common::Rectangle& dst_rect) = 0; void Register(TSurface surface) { + std::lock_guard lock{mutex}; + const GPUVAddr gpu_addr = surface->GetGpuAddr(); const CacheAddr cache_ptr = ToCacheAddr(memory_manager->GetPointer(gpu_addr)); const std::size_t size = surface->GetSizeInBytes(); @@ -237,6 +244,8 @@ protected: } void Unregister(TSurface surface) { + std::lock_guard lock{mutex}; + if (surface->IsProtected()) { return; } @@ -579,6 +588,7 @@ private: FramebufferTargetInfo depth_buffer; std::vector staging_buffer; + std::recursive_mutex mutex; }; } // namespace VideoCommon -- cgit v1.2.3 From d65a4af89582f272efbbfd47d1ee78e616553312 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 11 May 2019 01:21:02 -0400 Subject: texture_cache return invalid buffer on deactivated color_mask --- src/video_core/texture_cache/texture_cache.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 1b8ada910..7058399e2 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -133,6 +133,11 @@ public: return {}; } + if (regs.color_mask[index].raw != 0) { + SetEmptyColorBuffer(index); + return {}; + } + const auto& config{regs.rt[index]}; const auto gpu_addr{config.Address()}; if (!gpu_addr) { -- cgit v1.2.3 From 9098905dd13bb68f2fe49a9590688b76cc999fdd Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 11 May 2019 03:15:49 -0300 Subject: gl_framebuffer_cache: Use a hashed struct to cache framebuffers --- src/video_core/texture_cache/texture_cache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 7058399e2..419c0de5e 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -133,7 +133,7 @@ public: return {}; } - if (regs.color_mask[index].raw != 0) { + if (regs.color_mask[index].raw == 0) { SetEmptyColorBuffer(index); return {}; } -- cgit v1.2.3 From a79831d9d02f7c42d82ea36210cac7952a3ef16e Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 13 May 2019 19:14:02 -0400 Subject: texture_cache: Implement Guard mechanism --- src/video_core/texture_cache/texture_cache.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 419c0de5e..2ad6210dd 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -64,6 +64,10 @@ public: } } + void Guard(bool new_guard) { + guard_cache = new_guard; + } + void FlushRegion(CacheAddr addr, std::size_t size) { std::lock_guard lock{mutex}; @@ -251,7 +255,7 @@ protected: void Unregister(TSurface surface) { std::lock_guard lock{mutex}; - if (surface->IsProtected()) { + if (guard_cache && surface->IsProtected()) { return; } const GPUVAddr gpu_addr = surface->GetGpuAddr(); @@ -573,6 +577,9 @@ private: u64 ticks{}; + // Guards the cache for protection conflicts. + bool guard_cache{}; + // The internal Cache is different for the Texture Cache. It's based on buckets // of 1MB. This fits better for the purpose of this cache as textures are normaly // large in size. -- cgit v1.2.3 From 4530511ee4dfc92ddbfed7f91978f332be517c90 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 13 May 2019 21:35:32 -0400 Subject: texture_cache: Try to Reconstruct Surface on bigger than overlap. This fixes clouds in SMO Cap Kingdom and lens on Cloud Kingdom. Also moved accurate_gpu setting check to Pick Strategy --- src/video_core/texture_cache/texture_cache.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 2ad6210dd..38b56475f 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -295,6 +295,9 @@ private: RecycleStrategy PickStrategy(std::vector& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, const bool untopological) { + if (Settings::values.use_accurate_gpu_emulation) { + return RecycleStrategy::Flush; + } // 3D Textures decision if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) { return RecycleStrategy::Flush; @@ -319,10 +322,7 @@ private: for (auto surface : overlaps) { Unregister(surface); } - RecycleStrategy strategy = !Settings::values.use_accurate_gpu_emulation - ? PickStrategy(overlaps, params, gpu_addr, untopological) - : RecycleStrategy::Flush; - switch (strategy) { + switch (PickStrategy(overlaps, params, gpu_addr, untopological)) { case RecycleStrategy::Ignore: { return InitializeSurface(gpu_addr, params, preserve_contents); } @@ -453,6 +453,13 @@ private: if (overlaps.size() == 1) { TSurface current_surface = overlaps[0]; if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { + if (current_surface->GetGpuAddr() == gpu_addr) { + std::optional> view = + ReconstructSurface(overlaps, params, gpu_addr, host_ptr); + if (view.has_value()) { + return *view; + } + } return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } std::optional view = -- cgit v1.2.3 From 6162cb922e67c6c529fb17a91da726fdf3444a50 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 13 May 2019 22:59:18 -0400 Subject: texture_cache: Document the most important methods. --- src/video_core/texture_cache/texture_cache.h | 95 +++++++++++++++++++++++++--- 1 file changed, 87 insertions(+), 8 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 38b56475f..04e9528b8 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -64,6 +64,10 @@ public: } } + /** + * `Guard` guarantees that rendertargets don't unregister themselves if the + * collide. Protection is currently only done on 3D slices. + **/ void Guard(bool new_guard) { guard_cache = new_guard; } @@ -293,6 +297,14 @@ private: BufferCopy = 3, }; + /** + * `PickStrategy` takes care of selecting a proper strategy to deal with a texture recycle. + * @param overlaps, the overlapping surfaces registered in the cache. + * @param params, the paremeters on the new surface. + * @param gpu_addr, the starting address of the new surface. + * @param untopological, tells the recycler that the texture has no way to match the overlaps + * due to topological reasons. + **/ RecycleStrategy PickStrategy(std::vector& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, const bool untopological) { if (Settings::values.use_accurate_gpu_emulation) { @@ -315,6 +327,18 @@ private: return RecycleStrategy::Ignore; } + /** + * `RecycleSurface` es a method we use to decide what to do with textures we can't resolve in + *the cache It has 2 implemented strategies: Ignore and Flush. Ignore just unregisters all the + *overlaps and loads the new texture. Flush, flushes all the overlaps into memory and loads the + *new surface from that data. + * @param overlaps, the overlapping surfaces registered in the cache. + * @param params, the paremeters on the new surface. + * @param gpu_addr, the starting address of the new surface. + * @param preserve_contents, tells if the new surface should be loaded from meory or left blank + * @param untopological, tells the recycler that the texture has no way to match the overlaps + * due to topological reasons. + **/ std::pair RecycleSurface(std::vector& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, const bool preserve_contents, @@ -343,6 +367,12 @@ private: } } + /** + * `RebuildSurface` this method takes a single surface and recreates into another that + * may differ in format, target or width alingment. + * @param current_surface, the registered surface in the cache which we want to convert. + * @param params, the new surface params which we'll use to recreate the surface. + **/ std::pair RebuildSurface(TSurface current_surface, const SurfaceParams& params) { const auto gpu_addr = current_surface->GetGpuAddr(); @@ -357,6 +387,14 @@ private: return {new_surface, new_surface->GetMainView()}; } + /** + * `ManageStructuralMatch` this method takes a single surface and checks with the new surface's + * params if it's an exact match, we return the main view of the registered surface. If it's + * formats don't match, we rebuild the surface. We call this last method a `Mirage`. If formats + * match but the targets don't, we create an overview View of the registered surface. + * @param current_surface, the registered surface in the cache which we want to convert. + * @param params, the new surface params which we want to check. + **/ std::pair ManageStructuralMatch(TSurface current_surface, const SurfaceParams& params) { const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); @@ -370,10 +408,18 @@ private: return {current_surface, current_surface->EmplaceOverview(params)}; } - std::optional> ReconstructSurface(std::vector& overlaps, - const SurfaceParams& params, - const GPUVAddr gpu_addr, - const u8* host_ptr) { + /** + * `TryReconstructSurface` unlike `RebuildSurface` where we know the registered surface + * matches the candidate in some way, we got no guarantess here. We try to see if the overlaps + * are sublayers/mipmaps of the new surface, if they all match we end up recreating a surface + * for them, else we return nothing. + * @param overlaps, the overlapping surfaces registered in the cache. + * @param params, the paremeters on the new surface. + * @param gpu_addr, the starting address of the new surface. + **/ + std::optional> TryReconstructSurface(std::vector& overlaps, + const SurfaceParams& params, + const GPUVAddr gpu_addr) { if (params.target == SurfaceTarget::Texture3D) { return {}; } @@ -412,12 +458,30 @@ private: return {{new_surface, new_surface->GetMainView()}}; } + /** + * `GetSurface` gets the starting address and parameters of a candidate surface and tries + * to find a matching surface within the cache. This is done in 3 big steps. The first is to + * check the 1st Level Cache in order to find an exact match, if we fail, we move to step 2. + * Step 2 is checking if there are any overlaps at all, if none, we just load the texture from + * memory else we move to step 3. Step 3 consists on figuring the relationship between the + * candidate texture and the overlaps. We divide the scenarios depending if there's 1 or many + * overlaps. If there's many, we just try to reconstruct a new surface out of them based on the + * candidate's parameters, if we fail, we recycle. When there's only 1 overlap then we have to + * check if the candidate is a view (layer/mipmap) of the overlap or if the registered surface + * is a mipmap/layer of the candidate. In this last case we reconstruct a new surface. + * @param gpu_addr, the starting address of the candidate surface. + * @param params, the paremeters on the candidate surface. + * @param preserve_contents, tells if the new surface should be loaded from meory or left blank. + **/ std::pair GetSurface(const GPUVAddr gpu_addr, const SurfaceParams& params, bool preserve_contents) { const auto host_ptr{memory_manager->GetPointer(gpu_addr)}; const auto cache_addr{ToCacheAddr(host_ptr)}; + // Step 1 + // Check Level 1 Cache for a fast structural match. If candidate surface + // matches at certain level we are pretty much done. if (l1_cache.count(cache_addr) > 0) { TSurface current_surface = l1_cache[cache_addr]; if (!current_surface->MatchesTopology(params)) { @@ -437,31 +501,43 @@ private: } } + // Step 2 + // Obtain all possible overlaps in the memory region const std::size_t candidate_size = params.GetGuestSizeInBytes(); auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)}; + // If none are found, we are done. we just load the surface and create it. if (overlaps.empty()) { return InitializeSurface(gpu_addr, params, preserve_contents); } + // Step 3 + // Now we need to figure the relationship between the texture and its overlaps + // we do a topological test to ensure we can find some relationship. If it fails + // inmediatly recycle the texture for (auto surface : overlaps) { if (!surface->MatchesTopology(params)) { return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, true); } } + // Split cases between 1 overlap or many. if (overlaps.size() == 1) { TSurface current_surface = overlaps[0]; + // First check if the surface is within the overlap. If not, it means + // two things either the candidate surface is a supertexture of the overlap + // or they don't match in any known way. if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { if (current_surface->GetGpuAddr() == gpu_addr) { std::optional> view = - ReconstructSurface(overlaps, params, gpu_addr, host_ptr); + TryReconstructSurface(overlaps, params, gpu_addr); if (view.has_value()) { return *view; } } return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } + // Now we check if the candidate is a mipmap/layer of the overlap std::optional view = current_surface->EmplaceView(params, gpu_addr, candidate_size); if (view.has_value()) { @@ -472,15 +548,18 @@ private: } return {current_surface, *view}; } - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } else { + // If there are many overlaps, odds are they are subtextures of the candidate + // surface. We try to construct a new surface based on the candidate parameters, + // using the overlaps. If a single overlap fails, this will fail. std::optional> view = - ReconstructSurface(overlaps, params, gpu_addr, host_ptr); + TryReconstructSurface(overlaps, params, gpu_addr); if (view.has_value()) { return *view; } - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } + // We failed all the tests, recycle the overlaps into a new texture. + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } std::pair InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params, -- cgit v1.2.3 From d267948a73d2364949660a24d07833ea05c9fcc8 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 14 May 2019 00:55:32 -0400 Subject: texture_cache: loose TryReconstructSurface when accurate GPU is not on. Also corrects some asserts. --- src/video_core/texture_cache/texture_cache.h | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 04e9528b8..85c9160e0 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -425,6 +425,7 @@ private: } bool modified = false; TSurface new_surface = GetUncachedSurface(gpu_addr, params); + u32 passed_tests = 0; for (auto surface : overlaps) { const SurfaceParams& src_params = surface->GetSurfaceParams(); if (src_params.is_layered || src_params.num_levels > 1) { @@ -434,12 +435,12 @@ private: const std::size_t candidate_size = surface->GetSizeInBytes(); auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; if (!mipmap_layer) { - return {}; + continue; } const u32 layer{mipmap_layer->first}; const u32 mipmap{mipmap_layer->second}; if (new_surface->GetMipmapSize(mipmap) != candidate_size) { - return {}; + continue; } modified |= surface->IsModified(); // Now we got all the data set up @@ -448,8 +449,15 @@ private: const CopyParams copy_params(0, 0, 0, 0, 0, layer, 0, mipmap, std::min(src_params.width, dst_width), std::min(src_params.height, dst_height), 1); + passed_tests++; ImageCopy(surface, new_surface, copy_params); } + if (passed_tests == 0) { + return {}; + // In Accurate GPU all test should pass, else we recycle + } else if (Settings::values.use_accurate_gpu_emulation && passed_tests != overlaps.size()) { + return {}; + } for (auto surface : overlaps) { Unregister(surface); } @@ -548,6 +556,14 @@ private: } return {current_surface, *view}; } + // The next case is unsafe, so if we r in accurate GPU, just skip it + if (Settings::values.use_accurate_gpu_emulation) { + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); + } + // This is the case the texture is a part of the parent. + if (current_surface->MatchesSubTexture(params, gpu_addr)) { + return RebuildSurface(current_surface, params); + } } else { // If there are many overlaps, odds are they are subtextures of the candidate // surface. We try to construct a new surface based on the candidate parameters, -- cgit v1.2.3 From 175aa343ff1c9f931b266caf2d19b8df943dab0d Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 18 May 2019 04:57:49 -0400 Subject: texture_cache: Fermi2D reform and implement View Mirage This also does some fixes on compressed textures reinterpret and on the Fermi2D engine in general. --- src/video_core/texture_cache/texture_cache.h | 40 +++++++++++++++++----------- 1 file changed, 25 insertions(+), 15 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 85c9160e0..593ceeaf6 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -141,11 +141,6 @@ public: return {}; } - if (regs.color_mask[index].raw == 0) { - SetEmptyColorBuffer(index); - return {}; - } - const auto& config{regs.rt[index]}; const auto gpu_addr{config.Address()}; if (!gpu_addr) { @@ -192,11 +187,11 @@ public: void DoFermiCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, - const Common::Rectangle& src_rect, - const Common::Rectangle& dst_rect) { - TSurface dst_surface = GetFermiSurface(dst_config); - ImageBlit(GetFermiSurface(src_config), dst_surface, src_rect, dst_rect); - dst_surface->MarkAsModified(true, Tick()); + const Tegra::Engines::Fermi2D::Config& copy_config) { + std::pair dst_surface = GetFermiSurface(dst_config); + std::pair src_surface = GetFermiSurface(src_config); + ImageBlit(src_surface.second, dst_surface.second, copy_config); + dst_surface.first->MarkAsModified(true, Tick()); } TSurface TryFindFramebufferSurface(const u8* host_ptr) { @@ -234,8 +229,8 @@ protected: virtual void ImageCopy(TSurface src_surface, TSurface dst_surface, const CopyParams& copy_params) = 0; - virtual void ImageBlit(TSurface src, TSurface dst, const Common::Rectangle& src_rect, - const Common::Rectangle& dst_rect) = 0; + virtual void ImageBlit(TView src_view, TView dst_view, + const Tegra::Engines::Fermi2D::Config& copy_config) = 0; void Register(TSurface surface) { std::lock_guard lock{mutex}; @@ -282,10 +277,11 @@ protected: return new_surface; } - TSurface GetFermiSurface(const Tegra::Engines::Fermi2D::Regs::Surface& config) { + std::pair GetFermiSurface( + const Tegra::Engines::Fermi2D::Regs::Surface& config) { SurfaceParams params = SurfaceParams::CreateForFermiCopySurface(config); const GPUVAddr gpu_addr = config.Address(); - return GetSurface(gpu_addr, params, true).first; + return GetSurface(gpu_addr, params, true); } Core::System& system; @@ -551,7 +547,21 @@ private: if (view.has_value()) { const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); if (is_mirage) { - LOG_CRITICAL(HW_GPU, "Mirage View Unsupported"); + // On a mirage view, we need to recreate the surface under this new view + // and then obtain a view again. + SurfaceParams new_params = current_surface->GetSurfaceParams(); + const u32 wh = SurfaceParams::ConvertWidth( + new_params.width, new_params.pixel_format, params.pixel_format); + const u32 hh = SurfaceParams::ConvertHeight( + new_params.height, new_params.pixel_format, params.pixel_format); + new_params.width = wh; + new_params.height = hh; + new_params.pixel_format = params.pixel_format; + std::pair pair = RebuildSurface(current_surface, new_params); + std::optional mirage_view = + pair.first->EmplaceView(params, gpu_addr, candidate_size); + if (mirage_view) + return {pair.first, *mirage_view}; return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } return {current_surface, *view}; -- cgit v1.2.3 From e60ed2bb3e7e4ce63cc263019cce72a080c536ed Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 21 May 2019 08:36:00 -0400 Subject: texture_cache: return null surface on invalid address --- src/video_core/texture_cache/texture_cache.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 593ceeaf6..24c87127d 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -483,6 +483,18 @@ private: const auto host_ptr{memory_manager->GetPointer(gpu_addr)}; const auto cache_addr{ToCacheAddr(host_ptr)}; + // Step 0: guarantee a valid surface + if (!cache_addr) { + // Return a null surface if it's invalid + SurfaceParams new_params = params; + new_params.width = 1; + new_params.height = 1; + new_params.depth = 1; + new_params.block_height = 0; + new_params.block_depth = 0; + return InitializeSurface(gpu_addr, new_params, false); + } + // Step 1 // Check Level 1 Cache for a fast structural match. If candidate surface // matches at certain level we are pretty much done. -- cgit v1.2.3 From bdf9faab331cd79ca5c5e51c2369fc801e8cecea Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 21 May 2019 11:24:20 -0400 Subject: texture_cache: Handle uncontinuous surfaces. --- src/video_core/texture_cache/texture_cache.h | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 24c87127d..ab4e094ea 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -220,6 +220,7 @@ protected: SetEmptyColorBuffer(i); } SetEmptyDepthBuffer(); + staging_cache.SetSize(2); } ~TextureCache() = default; @@ -244,6 +245,8 @@ protected: gpu_addr); return; } + bool continuouty = memory_manager->IsBlockContinuous(gpu_addr, size); + surface->MarkAsContinuous(continuouty); surface->SetCacheAddr(cache_ptr); surface->SetCpuAddr(*cpu_addr); RegisterInnerCache(surface); @@ -611,9 +614,9 @@ private: } void LoadSurface(const TSurface& surface) { - staging_buffer.resize(surface->GetHostSizeInBytes()); - surface->LoadBuffer(*memory_manager, staging_buffer); - surface->UploadTexture(staging_buffer); + staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); + surface->LoadBuffer(*memory_manager, staging_cache); + surface->UploadTexture(staging_cache.GetBuffer(0)); surface->MarkAsModified(false, Tick()); } @@ -621,9 +624,9 @@ private: if (!surface->IsModified()) { return; } - staging_buffer.resize(surface->GetHostSizeInBytes()); - surface->DownloadTexture(staging_buffer); - surface->FlushBuffer(*memory_manager, staging_buffer); + staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); + surface->DownloadTexture(staging_cache.GetBuffer(0)); + surface->FlushBuffer(*memory_manager, staging_cache); surface->MarkAsModified(false, Tick()); } @@ -723,7 +726,7 @@ private: render_targets; FramebufferTargetInfo depth_buffer; - std::vector staging_buffer; + StagingCache staging_cache; std::recursive_mutex mutex; }; -- cgit v1.2.3 From 0966665fc225eee29b3ed87baefd74f79c19d307 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 22 May 2019 12:30:53 -0400 Subject: texture_cache: Only load on recycle with accurate GPU. Testing so far has proven this to be quite safe as texture memory read added a 2-5ms load to the current cache. --- src/video_core/texture_cache/texture_cache.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index ab4e094ea..685bd28f4 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -342,12 +342,13 @@ private: const SurfaceParams& params, const GPUVAddr gpu_addr, const bool preserve_contents, const bool untopological) { + const bool do_load = Settings::values.use_accurate_gpu_emulation && preserve_contents; for (auto surface : overlaps) { Unregister(surface); } switch (PickStrategy(overlaps, params, gpu_addr, untopological)) { case RecycleStrategy::Ignore: { - return InitializeSurface(gpu_addr, params, preserve_contents); + return InitializeSurface(gpu_addr, params, do_load); } case RecycleStrategy::Flush: { std::sort(overlaps.begin(), overlaps.end(), @@ -361,7 +362,7 @@ private: } default: { UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!"); - return InitializeSurface(gpu_addr, params, preserve_contents); + return InitializeSurface(gpu_addr, params, do_load); } } } -- cgit v1.2.3 From 92513541529e90f4f79a1f2c3f8ccf5a199e4c20 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 24 May 2019 11:59:23 -0400 Subject: texture_cache: Correct copying between compressed and uncompressed formats --- src/video_core/texture_cache/texture_cache.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 685bd28f4..d2093e581 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -444,11 +444,9 @@ private: } modified |= surface->IsModified(); // Now we got all the data set up - const u32 dst_width{params.GetMipWidth(mipmap)}; - const u32 dst_height{params.GetMipHeight(mipmap)}; - const CopyParams copy_params(0, 0, 0, 0, 0, layer, 0, mipmap, - std::min(src_params.width, dst_width), - std::min(src_params.height, dst_height), 1); + const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap); + const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap); + const CopyParams copy_params(0, 0, 0, 0, 0, layer, 0, mipmap, width, height, 1); passed_tests++; ImageCopy(surface, new_surface, copy_params); } -- cgit v1.2.3 From 228f516bb4426a41a4d1c1756751557f7a0eecda Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 24 May 2019 15:34:31 -0400 Subject: texture_cache uncompress-compress is untopological. This makes conflicts between non compress and compress textures to be auto recycled. It also limits the amount of mipmaps a texture can have if it goes above it's limit. --- src/video_core/texture_cache/texture_cache.h | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index d2093e581..69ef7a2bd 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -305,7 +305,7 @@ private: * due to topological reasons. **/ RecycleStrategy PickStrategy(std::vector& overlaps, const SurfaceParams& params, - const GPUVAddr gpu_addr, const bool untopological) { + const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { if (Settings::values.use_accurate_gpu_emulation) { return RecycleStrategy::Flush; } @@ -320,8 +320,8 @@ private: } } // Untopological decision - if (untopological) { - return RecycleStrategy::Ignore; + if (untopological == MatchTopologyResult::CompressUnmatch) { + return RecycleStrategy::Flush; } return RecycleStrategy::Ignore; } @@ -341,7 +341,7 @@ private: std::pair RecycleSurface(std::vector& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, const bool preserve_contents, - const bool untopological) { + const MatchTopologyResult untopological) { const bool do_load = Settings::values.use_accurate_gpu_emulation && preserve_contents; for (auto surface : overlaps) { Unregister(surface); @@ -502,9 +502,10 @@ private: // matches at certain level we are pretty much done. if (l1_cache.count(cache_addr) > 0) { TSurface current_surface = l1_cache[cache_addr]; - if (!current_surface->MatchesTopology(params)) { + auto topological_result = current_surface->MatchesTopology(params); + if (topological_result != MatchTopologyResult::FullMatch) { std::vector overlaps{current_surface}; - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, true); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, topological_result); } MatchStructureResult s_result = current_surface->MatchesStructure(params); if (s_result != MatchStructureResult::None && @@ -534,8 +535,9 @@ private: // we do a topological test to ensure we can find some relationship. If it fails // inmediatly recycle the texture for (auto surface : overlaps) { - if (!surface->MatchesTopology(params)) { - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, true); + auto topological_result = surface->MatchesTopology(params); + if (topological_result != MatchTopologyResult::FullMatch) { + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, topological_result); } } @@ -553,7 +555,7 @@ private: return *view; } } - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); } // Now we check if the candidate is a mipmap/layer of the overlap std::optional view = @@ -576,13 +578,13 @@ private: pair.first->EmplaceView(params, gpu_addr, candidate_size); if (mirage_view) return {pair.first, *mirage_view}; - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); } return {current_surface, *view}; } // The next case is unsafe, so if we r in accurate GPU, just skip it if (Settings::values.use_accurate_gpu_emulation) { - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); } // This is the case the texture is a part of the parent. if (current_surface->MatchesSubTexture(params, gpu_addr)) { @@ -599,7 +601,7 @@ private: } } // We failed all the tests, recycle the overlaps into a new texture. - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); } std::pair InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params, -- cgit v1.2.3 From 60bf761afbb125abd324e4b798d18a1611b5777b Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 1 Jun 2019 19:12:00 -0400 Subject: texture_cache: Implement Buffer Copy and detect Turing GPUs Image Copies --- src/video_core/texture_cache/texture_cache.h | 40 +++++++++++++++++++++------- 1 file changed, 31 insertions(+), 9 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 69ef7a2bd..e0d0e1f70 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -214,6 +214,13 @@ public: } protected: + // This structure is used for communicating with the backend, on which behaviors + // it supports and what not, to avoid assuming certain things about hardware. + // The backend is RESPONSIBLE for filling this settings on creation. + struct Support { + bool depth_color_image_copies; + } support_info; + TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) : system{system}, rasterizer{rasterizer} { for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { @@ -233,6 +240,10 @@ protected: virtual void ImageBlit(TView src_view, TView dst_view, const Tegra::Engines::Fermi2D::Config& copy_config) = 0; + // Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture + // and reading it from a sepparate buffer. + virtual void BufferCopy(TSurface src_surface, TSurface dst_surface) = 0; + void Register(TSurface surface) { std::lock_guard lock{mutex}; @@ -377,9 +388,14 @@ private: const SurfaceParams& params) { const auto gpu_addr = current_surface->GetGpuAddr(); TSurface new_surface = GetUncachedSurface(gpu_addr, params); - std::vector bricks = current_surface->BreakDown(params); - for (auto& brick : bricks) { - ImageCopy(current_surface, new_surface, brick); + const auto& cr_params = current_surface->GetSurfaceParams(); + if (!support_info.depth_color_image_copies && cr_params.type != params.type) { + BufferCopy(current_surface, new_surface); + } else { + std::vector bricks = current_surface->BreakDown(params); + for (auto& brick : bricks) { + ImageCopy(current_surface, new_surface, brick); + } } Unregister(current_surface); Register(new_surface); @@ -505,7 +521,8 @@ private: auto topological_result = current_surface->MatchesTopology(params); if (topological_result != MatchTopologyResult::FullMatch) { std::vector overlaps{current_surface}; - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, topological_result); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, + topological_result); } MatchStructureResult s_result = current_surface->MatchesStructure(params); if (s_result != MatchStructureResult::None && @@ -537,7 +554,8 @@ private: for (auto surface : overlaps) { auto topological_result = surface->MatchesTopology(params); if (topological_result != MatchTopologyResult::FullMatch) { - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, topological_result); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, + topological_result); } } @@ -555,7 +573,8 @@ private: return *view; } } - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, + MatchTopologyResult::FullMatch); } // Now we check if the candidate is a mipmap/layer of the overlap std::optional view = @@ -578,13 +597,15 @@ private: pair.first->EmplaceView(params, gpu_addr, candidate_size); if (mirage_view) return {pair.first, *mirage_view}; - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, + MatchTopologyResult::FullMatch); } return {current_surface, *view}; } // The next case is unsafe, so if we r in accurate GPU, just skip it if (Settings::values.use_accurate_gpu_emulation) { - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, + MatchTopologyResult::FullMatch); } // This is the case the texture is a part of the parent. if (current_surface->MatchesSubTexture(params, gpu_addr)) { @@ -601,7 +622,8 @@ private: } } // We failed all the tests, recycle the overlaps into a new texture. - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, + MatchTopologyResult::FullMatch); } std::pair InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params, -- cgit v1.2.3 From 3809041c24a6ebea009923c14fb36aa1031bf188 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 1 Jun 2019 22:15:55 -0400 Subject: texture_cache: Optimize GetSurface and use references on functions that don't change a surface. --- src/video_core/texture_cache/texture_cache.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index e0d0e1f70..951168357 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -234,15 +234,15 @@ protected: virtual TSurface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) = 0; - virtual void ImageCopy(TSurface src_surface, TSurface dst_surface, + virtual void ImageCopy(TSurface& src_surface, TSurface& dst_surface, const CopyParams& copy_params) = 0; - virtual void ImageBlit(TView src_view, TView dst_view, + virtual void ImageBlit(TView& src_view, TView& dst_view, const Tegra::Engines::Fermi2D::Config& copy_config) = 0; // Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture // and reading it from a sepparate buffer. - virtual void BufferCopy(TSurface src_surface, TSurface dst_surface) = 0; + virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; void Register(TSurface surface) { std::lock_guard lock{mutex}; @@ -516,8 +516,9 @@ private: // Step 1 // Check Level 1 Cache for a fast structural match. If candidate surface // matches at certain level we are pretty much done. - if (l1_cache.count(cache_addr) > 0) { - TSurface current_surface = l1_cache[cache_addr]; + auto iter = l1_cache.find(cache_addr); + if (iter != l1_cache.end()) { + TSurface& current_surface = iter->second; auto topological_result = current_surface->MatchesTopology(params); if (topological_result != MatchTopologyResult::FullMatch) { std::vector overlaps{current_surface}; @@ -526,7 +527,6 @@ private: } MatchStructureResult s_result = current_surface->MatchesStructure(params); if (s_result != MatchStructureResult::None && - current_surface->GetGpuAddr() == gpu_addr && (params.target != SurfaceTarget::Texture3D || current_surface->MatchTarget(params.target))) { if (s_result == MatchStructureResult::FullMatch) { -- cgit v1.2.3 From 6f69f06873f666174d3c0306055bc5f097d64afc Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 4 Jun 2019 12:12:40 -0400 Subject: texture_cache: Don't Image Copy if component types differ --- src/video_core/texture_cache/texture_cache.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 951168357..d2c27bcef 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -389,7 +389,8 @@ private: const auto gpu_addr = current_surface->GetGpuAddr(); TSurface new_surface = GetUncachedSurface(gpu_addr, params); const auto& cr_params = current_surface->GetSurfaceParams(); - if (!support_info.depth_color_image_copies && cr_params.type != params.type) { + if (cr_params.type != params.type && (!support_info.depth_color_image_copies || + cr_params.component_type != params.component_type)) { BufferCopy(current_surface, new_surface); } else { std::vector bricks = current_surface->BreakDown(params); -- cgit v1.2.3 From 561ce29c98bf822941061023e1f71a62175318ae Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 10 Jun 2019 10:39:59 -0400 Subject: texture_cache: correct mutex locks --- src/video_core/texture_cache/texture_cache.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index d2c27bcef..503bd2b43 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -90,6 +90,7 @@ public: TView GetTextureSurface(const Tegra::Texture::FullTextureInfo& config, const VideoCommon::Shader::Sampler& entry) { + std::lock_guard lock{mutex}; const auto gpu_addr{config.tic.Address()}; if (!gpu_addr) { return {}; @@ -99,6 +100,7 @@ public: } TView GetDepthBufferSurface(bool preserve_contents) { + std::lock_guard lock{mutex}; auto& maxwell3d = system.GPU().Maxwell3D(); if (!maxwell3d.dirty_flags.zeta_buffer) { @@ -127,6 +129,7 @@ public: } TView GetColorBufferSurface(std::size_t index, bool preserve_contents) { + std::lock_guard lock{mutex}; ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); auto& maxwell3d = system.GPU().Maxwell3D(); if (!maxwell3d.dirty_flags.color_buffer[index]) { @@ -188,6 +191,7 @@ public: void DoFermiCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, const Tegra::Engines::Fermi2D::Config& copy_config) { + std::lock_guard lock{mutex}; std::pair dst_surface = GetFermiSurface(dst_config); std::pair src_surface = GetFermiSurface(src_config); ImageBlit(src_surface.second, dst_surface.second, copy_config); @@ -245,8 +249,6 @@ protected: virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; void Register(TSurface surface) { - std::lock_guard lock{mutex}; - const GPUVAddr gpu_addr = surface->GetGpuAddr(); const CacheAddr cache_ptr = ToCacheAddr(memory_manager->GetPointer(gpu_addr)); const std::size_t size = surface->GetSizeInBytes(); @@ -266,8 +268,6 @@ protected: } void Unregister(TSurface surface) { - std::lock_guard lock{mutex}; - if (guard_cache && surface->IsProtected()) { return; } -- cgit v1.2.3 From b01f9c8a7090fa056ca564593eabcebab946ef41 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 11 Jun 2019 07:20:27 -0400 Subject: texture_cache: eliminate accelerated depth->color/color->depth copies due to driver instability. --- src/video_core/texture_cache/texture_cache.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 503bd2b43..c95b1b976 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -218,12 +218,6 @@ public: } protected: - // This structure is used for communicating with the backend, on which behaviors - // it supports and what not, to avoid assuming certain things about hardware. - // The backend is RESPONSIBLE for filling this settings on creation. - struct Support { - bool depth_color_image_copies; - } support_info; TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) : system{system}, rasterizer{rasterizer} { @@ -389,8 +383,7 @@ private: const auto gpu_addr = current_surface->GetGpuAddr(); TSurface new_surface = GetUncachedSurface(gpu_addr, params); const auto& cr_params = current_surface->GetSurfaceParams(); - if (cr_params.type != params.type && (!support_info.depth_color_image_copies || - cr_params.component_type != params.component_type)) { + if (cr_params.type != params.type || (cr_params.component_type != params.component_type)) { BufferCopy(current_surface, new_surface); } else { std::vector bricks = current_surface->BreakDown(params); -- cgit v1.2.3 From 2d83553ea7ab2629e7e1a83cc3345c0115d69453 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 13 Jun 2019 09:46:36 -0400 Subject: texture_cache: Implement siblings texture formats. --- src/video_core/texture_cache/texture_cache.h | 39 +++++++++++++++++++--------- 1 file changed, 27 insertions(+), 12 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index c95b1b976..022416706 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -43,6 +43,8 @@ class RasterizerInterface; namespace VideoCommon { +using VideoCore::Surface::PixelFormat; + using VideoCore::Surface::SurfaceTarget; using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; @@ -96,7 +98,7 @@ public: return {}; } const auto params{SurfaceParams::CreateForTexture(system, config, entry)}; - return GetSurface(gpu_addr, params, true).second; + return GetSurface(gpu_addr, params, true, false).second; } TView GetDepthBufferSurface(bool preserve_contents) { @@ -118,7 +120,7 @@ public: system, regs.zeta_width, regs.zeta_height, regs.zeta.format, regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height, regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; - auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents); + auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents, true); if (depth_buffer.target) depth_buffer.target->MarkAsRenderTarget(false); depth_buffer.target = surface_view.first; @@ -152,7 +154,7 @@ public: } auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), - preserve_contents); + preserve_contents, true); if (render_targets[index].target) render_targets[index].target->MarkAsRenderTarget(false); render_targets[index].target = surface_view.first; @@ -226,6 +228,11 @@ protected: } SetEmptyDepthBuffer(); staging_cache.SetSize(2); + siblings_table[PixelFormat::Z16] = PixelFormat::R16F; + siblings_table[PixelFormat::Z32F] = PixelFormat::R32F; + siblings_table[PixelFormat::Z32FS8] = PixelFormat::RG32F; + siblings_table[PixelFormat::R16F] = PixelFormat::Z16; + siblings_table[PixelFormat::R32F] = PixelFormat::Z32F; } ~TextureCache() = default; @@ -289,7 +296,7 @@ protected: const Tegra::Engines::Fermi2D::Regs::Surface& config) { SurfaceParams params = SurfaceParams::CreateForFermiCopySurface(config); const GPUVAddr gpu_addr = config.Address(); - return GetSurface(gpu_addr, params, true); + return GetSurface(gpu_addr, params, true, false); } Core::System& system; @@ -406,16 +413,22 @@ private: * @param params, the new surface params which we want to check. **/ std::pair ManageStructuralMatch(TSurface current_surface, - const SurfaceParams& params) { + const SurfaceParams& params, bool is_render) { const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); + const bool matches_target = current_surface->MatchTarget(params.target); + auto match_check = ([&]() -> std::pair { + if (matches_target) { + return {current_surface, current_surface->GetMainView()}; + } + return {current_surface, current_surface->EmplaceOverview(params)}; + }); if (is_mirage) { + if (!is_render && siblings_table[current_surface->GetFormat()] == params.pixel_format) { + return match_check(); + } return RebuildSurface(current_surface, params); } - const bool matches_target = current_surface->MatchTarget(params.target); - if (matches_target) { - return {current_surface, current_surface->GetMainView()}; - } - return {current_surface, current_surface->EmplaceOverview(params)}; + return match_check(); } /** @@ -490,7 +503,7 @@ private: * @param preserve_contents, tells if the new surface should be loaded from meory or left blank. **/ std::pair GetSurface(const GPUVAddr gpu_addr, const SurfaceParams& params, - bool preserve_contents) { + bool preserve_contents, bool is_render) { const auto host_ptr{memory_manager->GetPointer(gpu_addr)}; const auto cache_addr{ToCacheAddr(host_ptr)}; @@ -524,7 +537,7 @@ private: (params.target != SurfaceTarget::Texture3D || current_surface->MatchTarget(params.target))) { if (s_result == MatchStructureResult::FullMatch) { - return ManageStructuralMatch(current_surface, params); + return ManageStructuralMatch(current_surface, params, is_render); } else { return RebuildSurface(current_surface, params); } @@ -724,6 +737,8 @@ private: // Guards the cache for protection conflicts. bool guard_cache{}; + std::unordered_map siblings_table; + // The internal Cache is different for the Texture Cache. It's based on buckets // of 1MB. This fits better for the purpose of this cache as textures are normaly // large in size. -- cgit v1.2.3 From 3dd76432141a5cbc97bed15788984b37e44aa4a5 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 13 Jun 2019 10:39:45 -0400 Subject: texture_cache: Use siblings textures on Rebuild and fix possible error on blitting --- src/video_core/texture_cache/texture_cache.h | 33 +++++++++++++++++++--------- 1 file changed, 23 insertions(+), 10 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 022416706..201c4d42e 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -220,7 +220,6 @@ public: } protected: - TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) : system{system}, rasterizer{rasterizer} { for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { @@ -233,6 +232,7 @@ protected: siblings_table[PixelFormat::Z32FS8] = PixelFormat::RG32F; siblings_table[PixelFormat::R16F] = PixelFormat::Z16; siblings_table[PixelFormat::R32F] = PixelFormat::Z32F; + siblings_table[PixelFormat::RG32F] = PixelFormat::Z32FS8; } ~TextureCache() = default; @@ -385,15 +385,27 @@ private: * @param current_surface, the registered surface in the cache which we want to convert. * @param params, the new surface params which we'll use to recreate the surface. **/ - std::pair RebuildSurface(TSurface current_surface, - const SurfaceParams& params) { + std::pair RebuildSurface(TSurface current_surface, const SurfaceParams& params, + bool is_render) { const auto gpu_addr = current_surface->GetGpuAddr(); - TSurface new_surface = GetUncachedSurface(gpu_addr, params); const auto& cr_params = current_surface->GetSurfaceParams(); - if (cr_params.type != params.type || (cr_params.component_type != params.component_type)) { + TSurface new_surface; + if (cr_params.pixel_format != params.pixel_format && !is_render && + siblings_table[cr_params.pixel_format] == params.pixel_format) { + SurfaceParams new_params = params; + new_params.pixel_format = cr_params.pixel_format; + new_params.component_type = cr_params.component_type; + new_params.type = cr_params.type; + new_surface = GetUncachedSurface(gpu_addr, new_params); + } else { + new_surface = GetUncachedSurface(gpu_addr, params); + } + const auto& final_params = new_surface->GetSurfaceParams(); + if (cr_params.type != final_params.type || + (cr_params.component_type != final_params.component_type)) { BufferCopy(current_surface, new_surface); } else { - std::vector bricks = current_surface->BreakDown(params); + std::vector bricks = current_surface->BreakDown(final_params); for (auto& brick : bricks) { ImageCopy(current_surface, new_surface, brick); } @@ -426,7 +438,7 @@ private: if (!is_render && siblings_table[current_surface->GetFormat()] == params.pixel_format) { return match_check(); } - return RebuildSurface(current_surface, params); + return RebuildSurface(current_surface, params, is_render); } return match_check(); } @@ -539,7 +551,7 @@ private: if (s_result == MatchStructureResult::FullMatch) { return ManageStructuralMatch(current_surface, params, is_render); } else { - return RebuildSurface(current_surface, params); + return RebuildSurface(current_surface, params, is_render); } } } @@ -599,7 +611,8 @@ private: new_params.width = wh; new_params.height = hh; new_params.pixel_format = params.pixel_format; - std::pair pair = RebuildSurface(current_surface, new_params); + std::pair pair = + RebuildSurface(current_surface, new_params, is_render); std::optional mirage_view = pair.first->EmplaceView(params, gpu_addr, candidate_size); if (mirage_view) @@ -616,7 +629,7 @@ private: } // This is the case the texture is a part of the parent. if (current_surface->MatchesSubTexture(params, gpu_addr)) { - return RebuildSurface(current_surface, params); + return RebuildSurface(current_surface, params, is_render); } } else { // If there are many overlaps, odds are they are subtextures of the candidate -- cgit v1.2.3 From 7232a1ed16e46715c29d781fb143bdf799090bec Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 13 Jun 2019 16:41:16 -0400 Subject: decoders: correct block calculation --- src/video_core/texture_cache/texture_cache.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 201c4d42e..7a9b4c27d 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -335,6 +335,9 @@ private: if (untopological == MatchTopologyResult::CompressUnmatch) { return RecycleStrategy::Flush; } + if (untopological == MatchTopologyResult::FullMatch && !params.is_tiled) { + return RecycleStrategy::Flush; + } return RecycleStrategy::Ignore; } @@ -372,6 +375,11 @@ private: } return InitializeSurface(gpu_addr, params, preserve_contents); } + case RecycleStrategy::BufferCopy: { + auto new_surface = GetUncachedSurface(gpu_addr, params); + BufferCopy(overlaps[0], new_surface); + return {new_surface, new_surface->GetMainView()}; + } default: { UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!"); return InitializeSurface(gpu_addr, params, do_load); @@ -520,6 +528,10 @@ private: const auto host_ptr{memory_manager->GetPointer(gpu_addr)}; const auto cache_addr{ToCacheAddr(host_ptr)}; + if (gpu_addr == 0x00000001682F0000ULL) { + LOG_CRITICAL(HW_GPU, "Here's the texture!"); + } + // Step 0: guarantee a valid surface if (!cache_addr) { // Return a null surface if it's invalid @@ -566,6 +578,10 @@ private: return InitializeSurface(gpu_addr, params, preserve_contents); } + if (!params.is_tiled) { + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, + MatchTopologyResult::FullMatch); + } // Step 3 // Now we need to figure the relationship between the texture and its overlaps // we do a topological test to ensure we can find some relationship. If it fails -- cgit v1.2.3 From 03d489dcf5dbe13dff1ff788c609f964dd24019c Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 14 Jun 2019 15:41:28 -0400 Subject: texture_cache: Initialize all siblings to invalid pixel format. --- src/video_core/texture_cache/texture_cache.h | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 7a9b4c27d..8213f434d 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -227,12 +227,18 @@ protected: } SetEmptyDepthBuffer(); staging_cache.SetSize(2); - siblings_table[PixelFormat::Z16] = PixelFormat::R16F; - siblings_table[PixelFormat::Z32F] = PixelFormat::R32F; - siblings_table[PixelFormat::Z32FS8] = PixelFormat::RG32F; - siblings_table[PixelFormat::R16F] = PixelFormat::Z16; - siblings_table[PixelFormat::R32F] = PixelFormat::Z32F; - siblings_table[PixelFormat::RG32F] = PixelFormat::Z32FS8; + auto make_siblings = ([this](PixelFormat a, PixelFormat b) { + siblings_table[a] = b; + siblings_table[b] = a; + }); + const u32 max_formats = static_cast(PixelFormat::Max); + siblings_table.reserve(max_formats); + for (u32 i = 0; i < max_formats; i++) { + siblings_table[static_cast(i)] = PixelFormat::Invalid; + } + make_siblings(PixelFormat::Z16, PixelFormat::R16F); + make_siblings(PixelFormat::Z32F, PixelFormat::R32F); + make_siblings(PixelFormat::Z32FS8, PixelFormat::RG32F); } ~TextureCache() = default; @@ -766,6 +772,9 @@ private: // Guards the cache for protection conflicts. bool guard_cache{}; + // The siblings table is for formats that can inter exchange with one another + // without causing issues. This is only valid when a conflict occurs on a non + // rendering use. std::unordered_map siblings_table; // The internal Cache is different for the Texture Cache. It's based on buckets -- cgit v1.2.3 From 082740d34db0996a0af73d7680c57e1abb31c712 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 14 Jun 2019 16:40:04 -0400 Subject: surface: Correct format S8Z24 --- src/video_core/texture_cache/texture_cache.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 8213f434d..a9e61cba1 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -534,10 +534,6 @@ private: const auto host_ptr{memory_manager->GetPointer(gpu_addr)}; const auto cache_addr{ToCacheAddr(host_ptr)}; - if (gpu_addr == 0x00000001682F0000ULL) { - LOG_CRITICAL(HW_GPU, "Here's the texture!"); - } - // Step 0: guarantee a valid surface if (!cache_addr) { // Return a null surface if it's invalid -- cgit v1.2.3 From d7587842eb404a52eb75a12816028f0706821dd0 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 15 Jun 2019 13:22:57 -0400 Subject: texture_cache: Implement texception detection and texture barriers. --- src/video_core/texture_cache/texture_cache.h | 37 ++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 5 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index a9e61cba1..353fa4e31 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -70,8 +70,12 @@ public: * `Guard` guarantees that rendertargets don't unregister themselves if the * collide. Protection is currently only done on 3D slices. **/ - void Guard(bool new_guard) { - guard_cache = new_guard; + void GuardRenderTargets(bool new_guard) { + guard_render_targets = new_guard; + } + + void GuardSamplers(bool new_guard) { + guard_samplers = new_guard; } void FlushRegion(CacheAddr addr, std::size_t size) { @@ -98,7 +102,25 @@ public: return {}; } const auto params{SurfaceParams::CreateForTexture(system, config, entry)}; - return GetSurface(gpu_addr, params, true, false).second; + auto pair = GetSurface(gpu_addr, params, true, false); + if (guard_samplers) { + if (sampled_textures_stack_pointer == sampled_textures_stack.size()) { + sampled_textures_stack.resize(sampled_textures_stack.size() * 2); + } + sampled_textures_stack[sampled_textures_stack_pointer] = pair.first; + sampled_textures_stack_pointer++; + } + return pair.second; + } + + bool TextureBarrier() { + bool must_do = false; + for (u32 i = 0; i < sampled_textures_stack_pointer; i++) { + must_do |= sampled_textures_stack[i]->IsRenderTarget(); + sampled_textures_stack[i] = nullptr; + } + sampled_textures_stack_pointer = 0; + return must_do; } TView GetDepthBufferSurface(bool preserve_contents) { @@ -239,6 +261,7 @@ protected: make_siblings(PixelFormat::Z16, PixelFormat::R16F); make_siblings(PixelFormat::Z32F, PixelFormat::R32F); make_siblings(PixelFormat::Z32FS8, PixelFormat::RG32F); + sampled_textures_stack.resize(64); } ~TextureCache() = default; @@ -275,7 +298,7 @@ protected: } void Unregister(TSurface surface) { - if (guard_cache && surface->IsProtected()) { + if (guard_render_targets && surface->IsProtected()) { return; } const GPUVAddr gpu_addr = surface->GetGpuAddr(); @@ -766,7 +789,8 @@ private: u64 ticks{}; // Guards the cache for protection conflicts. - bool guard_cache{}; + bool guard_render_targets{}; + bool guard_samplers{}; // The siblings table is for formats that can inter exchange with one another // without causing issues. This is only valid when a conflict occurs on a non @@ -792,6 +816,9 @@ private: render_targets; FramebufferTargetInfo depth_buffer; + std::vector sampled_textures_stack{}; + u32 sampled_textures_stack_pointer{}; + StagingCache staging_cache; std::recursive_mutex mutex; }; -- cgit v1.2.3 From 6acdae0e4c9d0c20f668cd86250b5d5b0dbd70c4 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 17 Jun 2019 19:19:47 -0400 Subject: texture_cache: Correct format R16U as sibling --- src/video_core/texture_cache/texture_cache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 353fa4e31..78821503e 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -258,7 +258,7 @@ protected: for (u32 i = 0; i < max_formats; i++) { siblings_table[static_cast(i)] = PixelFormat::Invalid; } - make_siblings(PixelFormat::Z16, PixelFormat::R16F); + make_siblings(PixelFormat::Z16, PixelFormat::R16U); make_siblings(PixelFormat::Z32F, PixelFormat::R32F); make_siblings(PixelFormat::Z32FS8, PixelFormat::RG32F); sampled_textures_stack.resize(64); -- cgit v1.2.3 From 97c8c9f49a3327f8f38dd460951071630c3e26fa Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 20 Jun 2019 14:58:32 -0400 Subject: texture_cache: Eliminate linear textures fallthrough --- src/video_core/texture_cache/texture_cache.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 78821503e..d86ddeb76 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -603,10 +603,6 @@ private: return InitializeSurface(gpu_addr, params, preserve_contents); } - if (!params.is_tiled) { - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, - MatchTopologyResult::FullMatch); - } // Step 3 // Now we need to figure the relationship between the texture and its overlaps // we do a topological test to ensure we can find some relationship. If it fails -- cgit v1.2.3 From d1812316e1b0f03af2ba10d4fe04be728e72725c Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 20 Jun 2019 21:22:20 -0400 Subject: texture_cache: Style and Corrections --- src/video_core/texture_cache/texture_cache.h | 1 + 1 file changed, 1 insertion(+) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index d86ddeb76..b720856f2 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -18,6 +18,7 @@ #include "common/common_types.h" #include "common/math_util.h" #include "core/memory.h" +#include "core/settings.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/gpu.h" -- cgit v1.2.3 From 7565389700a5741460a118d1fcc5e14fccb4b413 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 24 Jun 2019 02:15:57 -0300 Subject: texture_cache: Include "core/core.h" --- src/video_core/texture_cache/texture_cache.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index b720856f2..a91b2a220 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -17,6 +17,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "common/math_util.h" +#include "core/core.h" #include "core/memory.h" #include "core/settings.h" #include "video_core/engines/fermi_2d.h" @@ -30,10 +31,6 @@ #include "video_core/texture_cache/surface_params.h" #include "video_core/texture_cache/surface_view.h" -namespace Core { -class System; -} - namespace Tegra::Texture { struct FullTextureInfo; } -- cgit v1.2.3 From 58c8a44e7aa18f768db39a36870d8b279257e1d8 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 25 Jun 2019 17:26:00 -0400 Subject: texture_cache: Query MemoryManager from the system --- src/video_core/texture_cache/texture_cache.h | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index a91b2a220..1516fcea3 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -52,10 +52,6 @@ class TextureCache { using IntervalType = typename IntervalMap::interval_type; public: - void InitMemoryMananger(Tegra::MemoryManager& memory_manager) { - this->memory_manager = &memory_manager; - } - void InvalidateRegion(CacheAddr addr, std::size_t size) { std::lock_guard lock{mutex}; @@ -278,15 +274,16 @@ protected: void Register(TSurface surface) { const GPUVAddr gpu_addr = surface->GetGpuAddr(); - const CacheAddr cache_ptr = ToCacheAddr(memory_manager->GetPointer(gpu_addr)); + const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr)); const std::size_t size = surface->GetSizeInBytes(); - const std::optional cpu_addr = memory_manager->GpuToCpuAddress(gpu_addr); + const std::optional cpu_addr = + system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); if (!cache_ptr || !cpu_addr) { LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", gpu_addr); return; } - bool continuouty = memory_manager->IsBlockContinuous(gpu_addr, size); + bool continuouty = system.GPU().MemoryManager().IsBlockContinuous(gpu_addr, size); surface->MarkAsContinuous(continuouty); surface->SetCacheAddr(cache_ptr); surface->SetCpuAddr(*cpu_addr); @@ -552,7 +549,7 @@ private: std::pair GetSurface(const GPUVAddr gpu_addr, const SurfaceParams& params, bool preserve_contents, bool is_render) { - const auto host_ptr{memory_manager->GetPointer(gpu_addr)}; + const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; const auto cache_addr{ToCacheAddr(host_ptr)}; // Step 0: guarantee a valid surface @@ -693,7 +690,7 @@ private: void LoadSurface(const TSurface& surface) { staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); - surface->LoadBuffer(*memory_manager, staging_cache); + surface->LoadBuffer(system.GPU().MemoryManager(), staging_cache); surface->UploadTexture(staging_cache.GetBuffer(0)); surface->MarkAsModified(false, Tick()); } @@ -704,7 +701,7 @@ private: } staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); surface->DownloadTexture(staging_cache.GetBuffer(0)); - surface->FlushBuffer(*memory_manager, staging_cache); + surface->FlushBuffer(system.GPU().MemoryManager(), staging_cache); surface->MarkAsModified(false, Tick()); } @@ -778,7 +775,6 @@ private: }; VideoCore::RasterizerInterface& rasterizer; - Tegra::MemoryManager* memory_manager; u64 ticks{}; -- cgit v1.2.3 From 88bc39374fd7cffd2864229ae60bdab3aebb37ea Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 25 Jun 2019 18:36:19 -0400 Subject: texture_cache: Corrections, documentation and asserts --- src/video_core/texture_cache/texture_cache.h | 84 ++++++++++++++-------------- 1 file changed, 42 insertions(+), 42 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 1516fcea3..fb6ca41ff 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -60,10 +60,10 @@ public: } } - /** + /*** * `Guard` guarantees that rendertargets don't unregister themselves if the * collide. Protection is currently only done on 3D slices. - **/ + ***/ void GuardRenderTargets(bool new_guard) { guard_render_targets = new_guard; } @@ -191,19 +191,21 @@ public: } void SetEmptyDepthBuffer() { - if (depth_buffer.target != nullptr) { - depth_buffer.target->MarkAsRenderTarget(false); - depth_buffer.target = nullptr; - depth_buffer.view = nullptr; + if (depth_buffer.target == nullptr) { + return; } + depth_buffer.target->MarkAsRenderTarget(false); + depth_buffer.target = nullptr; + depth_buffer.view = nullptr; } void SetEmptyColorBuffer(std::size_t index) { - if (render_targets[index].target != nullptr) { - render_targets[index].target->MarkAsRenderTarget(false); - render_targets[index].target = nullptr; - render_targets[index].view = nullptr; + if (render_targets[index].target == nullptr) { + return; } + render_targets[index].target->MarkAsRenderTarget(false); + render_targets[index].target = nullptr; + render_targets[index].view = nullptr; } void DoFermiCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, @@ -283,8 +285,8 @@ protected: gpu_addr); return; } - bool continuouty = system.GPU().MemoryManager().IsBlockContinuous(gpu_addr, size); - surface->MarkAsContinuous(continuouty); + const bool continuous = system.GPU().MemoryManager().IsBlockContinuous(gpu_addr, size); + surface->MarkAsContinuous(continuous); surface->SetCacheAddr(cache_ptr); surface->SetCpuAddr(*cpu_addr); RegisterInnerCache(surface); @@ -381,8 +383,8 @@ private: const SurfaceParams& params, const GPUVAddr gpu_addr, const bool preserve_contents, const MatchTopologyResult untopological) { - const bool do_load = Settings::values.use_accurate_gpu_emulation && preserve_contents; - for (auto surface : overlaps) { + const bool do_load = preserve_contents && Settings::values.use_accurate_gpu_emulation; + for (auto& surface : overlaps) { Unregister(surface); } switch (PickStrategy(overlaps, params, gpu_addr, untopological)) { @@ -394,7 +396,7 @@ private: [](const TSurface& a, const TSurface& b) -> bool { return a->GetModificationTick() < b->GetModificationTick(); }); - for (auto surface : overlaps) { + for (auto& surface : overlaps) { FlushSurface(surface); } return InitializeSurface(gpu_addr, params, preserve_contents); @@ -460,19 +462,19 @@ private: const SurfaceParams& params, bool is_render) { const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); const bool matches_target = current_surface->MatchTarget(params.target); - auto match_check = ([&]() -> std::pair { + const auto match_check = ([&]() -> std::pair { if (matches_target) { return {current_surface, current_surface->GetMainView()}; } return {current_surface, current_surface->EmplaceOverview(params)}; }); - if (is_mirage) { - if (!is_render && siblings_table[current_surface->GetFormat()] == params.pixel_format) { - return match_check(); - } - return RebuildSurface(current_surface, params, is_render); + if (!is_mirage) { + return match_check(); + } + if (!is_render && siblings_table[current_surface->GetFormat()] == params.pixel_format) { + return match_check(); } - return match_check(); + return RebuildSurface(current_surface, params, is_render); } /** @@ -493,7 +495,7 @@ private: bool modified = false; TSurface new_surface = GetUncachedSurface(gpu_addr, params); u32 passed_tests = 0; - for (auto surface : overlaps) { + for (auto& surface : overlaps) { const SurfaceParams& src_params = surface->GetSurfaceParams(); if (src_params.is_layered || src_params.num_levels > 1) { // We send this cases to recycle as they are more complex to handle @@ -504,8 +506,7 @@ private: if (!mipmap_layer) { continue; } - const u32 layer{mipmap_layer->first}; - const u32 mipmap{mipmap_layer->second}; + const auto [layer, mipmap] = *mipmap_layer; if (new_surface->GetMipmapSize(mipmap) != candidate_size) { continue; } @@ -519,7 +520,7 @@ private: } if (passed_tests == 0) { return {}; - // In Accurate GPU all test should pass, else we recycle + // In Accurate GPU all tests should pass, else we recycle } else if (Settings::values.use_accurate_gpu_emulation && passed_tests != overlaps.size()) { return {}; } @@ -548,7 +549,6 @@ private: **/ std::pair GetSurface(const GPUVAddr gpu_addr, const SurfaceParams& params, bool preserve_contents, bool is_render) { - const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; const auto cache_addr{ToCacheAddr(host_ptr)}; @@ -570,17 +570,17 @@ private: auto iter = l1_cache.find(cache_addr); if (iter != l1_cache.end()) { TSurface& current_surface = iter->second; - auto topological_result = current_surface->MatchesTopology(params); + const auto topological_result = current_surface->MatchesTopology(params); if (topological_result != MatchTopologyResult::FullMatch) { std::vector overlaps{current_surface}; return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, topological_result); } - MatchStructureResult s_result = current_surface->MatchesStructure(params); - if (s_result != MatchStructureResult::None && + const auto struct_result = current_surface->MatchesStructure(params); + if (struct_result != MatchStructureResult::None && (params.target != SurfaceTarget::Texture3D || current_surface->MatchTarget(params.target))) { - if (s_result == MatchStructureResult::FullMatch) { + if (struct_result == MatchStructureResult::FullMatch) { return ManageStructuralMatch(current_surface, params, is_render); } else { return RebuildSurface(current_surface, params, is_render); @@ -602,8 +602,8 @@ private: // Now we need to figure the relationship between the texture and its overlaps // we do a topological test to ensure we can find some relationship. If it fails // inmediatly recycle the texture - for (auto surface : overlaps) { - auto topological_result = surface->MatchesTopology(params); + for (const auto& surface : overlaps) { + const auto topological_result = surface->MatchesTopology(params); if (topological_result != MatchTopologyResult::FullMatch) { return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, topological_result); @@ -620,7 +620,7 @@ private: if (current_surface->GetGpuAddr() == gpu_addr) { std::optional> view = TryReconstructSurface(overlaps, params, gpu_addr); - if (view.has_value()) { + if (view) { return *view; } } @@ -630,7 +630,7 @@ private: // Now we check if the candidate is a mipmap/layer of the overlap std::optional view = current_surface->EmplaceView(params, gpu_addr, candidate_size); - if (view.has_value()) { + if (view) { const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); if (is_mirage) { // On a mirage view, we need to recreate the surface under this new view @@ -669,7 +669,7 @@ private: // using the overlaps. If a single overlap fails, this will fail. std::optional> view = TryReconstructSurface(overlaps, params, gpu_addr); - if (view.has_value()) { + if (view) { return *view; } } @@ -738,16 +738,16 @@ private: std::vector surfaces; while (start <= end) { std::vector& list = registry[start]; - for (auto& s : list) { - if (!s->IsPicked() && s->Overlaps(cache_addr, cache_addr_end)) { - s->MarkAsPicked(true); - surfaces.push_back(s); + for (auto& surface : list) { + if (!surface->IsPicked() && surface->Overlaps(cache_addr, cache_addr_end)) { + surface->MarkAsPicked(true); + surfaces.push_back(surface); } } start++; } - for (auto& s : surfaces) { - s->MarkAsPicked(false); + for (auto& surface : surfaces) { + surface->MarkAsPicked(false); } return surfaces; } -- cgit v1.2.3 From 223ca8075399463e51d4afea1adb0c5b6fba8588 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 25 Jun 2019 19:35:08 -0400 Subject: texture_cache: Correct variable naming. --- src/video_core/texture_cache/texture_cache.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index fb6ca41ff..b5b0e91ef 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -225,9 +225,9 @@ public: } const CacheAddr page = cache_addr >> registry_page_bits; std::vector& list = registry[page]; - for (auto& s : list) { - if (s->GetCacheAddr() == cache_addr) { - return s; + for (auto& surface : list) { + if (surface->GetCacheAddr() == cache_addr) { + return surface; } } return nullptr; -- cgit v1.2.3 From 3f3c3ca5f96fd5742524703f20b531338fa2e5f7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 29 Jun 2019 17:29:39 -0300 Subject: texture_cache: Address feedback --- src/video_core/texture_cache/texture_cache.h | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index b5b0e91ef..9436a5ff2 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -79,10 +79,9 @@ public: if (surfaces.empty()) { return; } - std::sort(surfaces.begin(), surfaces.end(), - [](const TSurface& a, const TSurface& b) -> bool { - return a->GetModificationTick() < b->GetModificationTick(); - }); + std::sort(surfaces.begin(), surfaces.end(), [](const TSurface& a, const TSurface& b) { + return a->GetModificationTick() < b->GetModificationTick(); + }); for (const auto& surface : surfaces) { FlushSurface(surface); } @@ -181,13 +180,15 @@ public: } void MarkColorBufferInUse(std::size_t index) { - if (render_targets[index].target) - render_targets[index].target->MarkAsModified(true, Tick()); + if (auto& render_target = render_targets[index].target) { + render_target->MarkAsModified(true, Tick()); + } } void MarkDepthBufferInUse() { - if (depth_buffer.target) + if (depth_buffer.target) { depth_buffer.target->MarkAsModified(true, Tick()); + } } void SetEmptyDepthBuffer() { @@ -245,11 +246,11 @@ protected: } SetEmptyDepthBuffer(); staging_cache.SetSize(2); - auto make_siblings = ([this](PixelFormat a, PixelFormat b) { + const auto make_siblings = [this](PixelFormat a, PixelFormat b) { siblings_table[a] = b; siblings_table[b] = a; - }); - const u32 max_formats = static_cast(PixelFormat::Max); + }; + const auto max_formats = static_cast(PixelFormat::Max); siblings_table.reserve(max_formats); for (u32 i = 0; i < max_formats; i++) { siblings_table[static_cast(i)] = PixelFormat::Invalid; -- cgit v1.2.3 From dd9ace502bfd2239ceddad8c5c41baf0e10e2144 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 29 Jun 2019 18:54:13 -0300 Subject: texture_cache: Use std::array for siblings_table --- src/video_core/texture_cache/texture_cache.h | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 9436a5ff2..9fcf87744 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -4,6 +4,8 @@ #pragma once +#include +#include #include #include #include @@ -244,20 +246,19 @@ protected: for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { SetEmptyColorBuffer(i); } + SetEmptyDepthBuffer(); staging_cache.SetSize(2); + const auto make_siblings = [this](PixelFormat a, PixelFormat b) { - siblings_table[a] = b; - siblings_table[b] = a; + siblings_table[static_cast(a)] = b; + siblings_table[static_cast(b)] = a; }; - const auto max_formats = static_cast(PixelFormat::Max); - siblings_table.reserve(max_formats); - for (u32 i = 0; i < max_formats; i++) { - siblings_table[static_cast(i)] = PixelFormat::Invalid; - } + std::fill(siblings_table.begin(), siblings_table.end(), PixelFormat::Invalid); make_siblings(PixelFormat::Z16, PixelFormat::R16U); make_siblings(PixelFormat::Z32F, PixelFormat::R32F); make_siblings(PixelFormat::Z32FS8, PixelFormat::RG32F); + sampled_textures_stack.resize(64); } @@ -426,7 +427,8 @@ private: const auto& cr_params = current_surface->GetSurfaceParams(); TSurface new_surface; if (cr_params.pixel_format != params.pixel_format && !is_render && - siblings_table[cr_params.pixel_format] == params.pixel_format) { + siblings_table[static_cast(cr_params.pixel_format)] == + params.pixel_format) { SurfaceParams new_params = params; new_params.pixel_format = cr_params.pixel_format; new_params.component_type = cr_params.component_type; @@ -472,7 +474,8 @@ private: if (!is_mirage) { return match_check(); } - if (!is_render && siblings_table[current_surface->GetFormat()] == params.pixel_format) { + if (!is_render && siblings_table[static_cast(current_surface->GetFormat())] == + params.pixel_format) { return match_check(); } return RebuildSurface(current_surface, params, is_render); @@ -786,7 +789,7 @@ private: // The siblings table is for formats that can inter exchange with one another // without causing issues. This is only valid when a conflict occurs on a non // rendering use. - std::unordered_map siblings_table; + std::array(PixelFormat::Max)> siblings_table; // The internal Cache is different for the Texture Cache. It's based on buckets // of 1MB. This fits better for the purpose of this cache as textures are normaly -- cgit v1.2.3 From f6f1a8f26a302dc33df635625c490f0d65880059 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 29 Jun 2019 19:52:37 -0300 Subject: texture_cache: Style changes --- src/video_core/texture_cache/texture_cache.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 9fcf87744..3df3e17dd 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -571,8 +571,7 @@ private: // Step 1 // Check Level 1 Cache for a fast structural match. If candidate surface // matches at certain level we are pretty much done. - auto iter = l1_cache.find(cache_addr); - if (iter != l1_cache.end()) { + if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) { TSurface& current_surface = iter->second; const auto topological_result = current_surface->MatchesTopology(params); if (topological_result != MatchTopologyResult::FullMatch) { -- cgit v1.2.3 From 8eae66907e043e6e26d78cfc4b5cde7ea93a4f77 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 29 Jun 2019 20:10:31 -0300 Subject: texture_cache: Use std::vector reservation for sampled_textures --- src/video_core/texture_cache/texture_cache.h | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 3df3e17dd..8edae3d97 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -97,25 +97,19 @@ public: return {}; } const auto params{SurfaceParams::CreateForTexture(system, config, entry)}; - auto pair = GetSurface(gpu_addr, params, true, false); + const auto [surface, view] = GetSurface(gpu_addr, params, true, false); if (guard_samplers) { - if (sampled_textures_stack_pointer == sampled_textures_stack.size()) { - sampled_textures_stack.resize(sampled_textures_stack.size() * 2); - } - sampled_textures_stack[sampled_textures_stack_pointer] = pair.first; - sampled_textures_stack_pointer++; + sampled_textures.push_back(surface); } - return pair.second; + return view; } bool TextureBarrier() { - bool must_do = false; - for (u32 i = 0; i < sampled_textures_stack_pointer; i++) { - must_do |= sampled_textures_stack[i]->IsRenderTarget(); - sampled_textures_stack[i] = nullptr; - } - sampled_textures_stack_pointer = 0; - return must_do; + const bool any_rt = + std::any_of(sampled_textures.begin(), sampled_textures.end(), + [](const auto& surface) { return surface->IsRenderTarget(); }); + sampled_textures.clear(); + return any_rt; } TView GetDepthBufferSurface(bool preserve_contents) { @@ -259,7 +253,7 @@ protected: make_siblings(PixelFormat::Z32F, PixelFormat::R32F); make_siblings(PixelFormat::Z32FS8, PixelFormat::RG32F); - sampled_textures_stack.resize(64); + sampled_textures.reserve(64); } ~TextureCache() = default; @@ -809,8 +803,7 @@ private: render_targets; FramebufferTargetInfo depth_buffer; - std::vector sampled_textures_stack{}; - u32 sampled_textures_stack_pointer{}; + std::vector sampled_textures; StagingCache staging_cache; std::recursive_mutex mutex; -- cgit v1.2.3 From 6e1db6b7038329a9716763c8bdf14cc5b578fec1 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 29 Jun 2019 20:47:46 -0300 Subject: texture_cache: Pack sibling queries inside a method --- src/video_core/texture_cache/texture_cache.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 8edae3d97..c9e72531a 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -421,8 +421,7 @@ private: const auto& cr_params = current_surface->GetSurfaceParams(); TSurface new_surface; if (cr_params.pixel_format != params.pixel_format && !is_render && - siblings_table[static_cast(cr_params.pixel_format)] == - params.pixel_format) { + GetSiblingFormat(cr_params.pixel_format) == params.pixel_format) { SurfaceParams new_params = params; new_params.pixel_format = cr_params.pixel_format; new_params.component_type = cr_params.component_type; @@ -459,17 +458,16 @@ private: const SurfaceParams& params, bool is_render) { const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); const bool matches_target = current_surface->MatchTarget(params.target); - const auto match_check = ([&]() -> std::pair { + const auto match_check = [&]() -> std::pair { if (matches_target) { return {current_surface, current_surface->GetMainView()}; } return {current_surface, current_surface->EmplaceOverview(params)}; - }); + }; if (!is_mirage) { return match_check(); } - if (!is_render && siblings_table[static_cast(current_surface->GetFormat())] == - params.pixel_format) { + if (!is_render && GetSiblingFormat(current_surface->GetFormat()) == params.pixel_format) { return match_check(); } return RebuildSurface(current_surface, params, is_render); @@ -766,6 +764,10 @@ private: return {}; } + constexpr PixelFormat GetSiblingFormat(PixelFormat format) const { + return siblings_table[static_cast(format)]; + } + struct FramebufferTargetInfo { TSurface target; TView view; -- cgit v1.2.3