From d2b25575426b9b52049b88d8d6d9ae83c81da312 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 27 May 2020 17:31:12 -0300 Subject: texture_cache: Use small vector for surface vectors This avoids most heap allocations when collecting surfaces into a vector. --- src/video_core/texture_cache/texture_cache.h | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index d6efc34b2..d7e42697d 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -14,6 +14,7 @@ #include #include +#include #include #include @@ -53,6 +54,7 @@ using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; template class TextureCache { + using VectorSurface = boost::container::small_vector; public: void InvalidateRegion(VAddr addr, std::size_t size) { @@ -498,7 +500,7 @@ private: * @param untopological Indicates to the recycler that the texture has no way * to match the overlaps due to topological reasons. **/ - RecycleStrategy PickStrategy(std::vector& overlaps, const SurfaceParams& params, + RecycleStrategy PickStrategy(VectorSurface& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { if (Settings::IsGPULevelExtreme()) { return RecycleStrategy::Flush; @@ -538,9 +540,8 @@ private: * @param untopological Indicates to the recycler that the texture has no way to match the * overlaps due to topological reasons. **/ - std::pair RecycleSurface(std::vector& overlaps, - const SurfaceParams& params, const GPUVAddr gpu_addr, - const bool preserve_contents, + std::pair RecycleSurface(VectorSurface& overlaps, const SurfaceParams& params, + const GPUVAddr gpu_addr, const bool preserve_contents, const MatchTopologyResult untopological) { const bool do_load = preserve_contents && Settings::IsGPULevelExtreme(); for (auto& surface : overlaps) { @@ -650,7 +651,7 @@ private: * @param params The parameters on the new surface. * @param gpu_addr The starting address of the new surface. **/ - std::optional> TryReconstructSurface(std::vector& overlaps, + std::optional> TryReconstructSurface(VectorSurface& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr) { if (params.target == SurfaceTarget::Texture3D) { @@ -708,7 +709,7 @@ private: * @param preserve_contents Indicates that the new surface should be loaded from memory or * left blank. */ - std::optional> Manage3DSurfaces(std::vector& overlaps, + std::optional> Manage3DSurfaces(VectorSurface& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, const VAddr cpu_addr, @@ -810,7 +811,7 @@ private: TSurface& current_surface = iter->second; const auto topological_result = current_surface->MatchesTopology(params); if (topological_result != MatchTopologyResult::FullMatch) { - std::vector overlaps{current_surface}; + VectorSurface overlaps{current_surface}; return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, topological_result); } @@ -1124,14 +1125,14 @@ private: } } - std::vector GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { + VectorSurface GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { if (size == 0) { return {}; } const VAddr cpu_addr_end = cpu_addr + size; VAddr start = cpu_addr >> registry_page_bits; const VAddr end = (cpu_addr_end - 1) >> registry_page_bits; - std::vector surfaces; + VectorSurface surfaces; while (start <= end) { std::vector& list = registry[start]; for (auto& surface : list) { -- cgit v1.2.3 From b8b6f94ba9a662857c40d819ac40755c7984cb16 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 27 May 2020 17:59:04 -0300 Subject: texture_cache: Use unordered_map::find instead of operator[] on hot code --- src/video_core/texture_cache/texture_cache.h | 34 ++++++++++++++++------------ 1 file changed, 19 insertions(+), 15 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index d7e42697d..99f74e6c4 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -310,18 +310,20 @@ public: dst_surface.first->MarkAsModified(true, Tick()); } - TSurface TryFindFramebufferSurface(VAddr addr) { + TSurface TryFindFramebufferSurface(VAddr addr) const { if (!addr) { return nullptr; } const VAddr page = addr >> registry_page_bits; - std::vector& list = registry[page]; - for (auto& surface : list) { - if (surface->GetCpuAddr() == addr) { - return surface; - } + const auto it = registry.find(page); + if (it == registry.end()) { + return nullptr; } - return nullptr; + const auto& list = it->second; + const auto found = std::find_if(list.begin(), list.end(), [addr](const auto& surface) { + return surface->GetCpuAddr() == addr; + }); + return found != list.end() ? *found : nullptr; } u64 Tick() { @@ -1130,18 +1132,20 @@ private: return {}; } const VAddr cpu_addr_end = cpu_addr + size; - VAddr start = cpu_addr >> registry_page_bits; const VAddr end = (cpu_addr_end - 1) >> registry_page_bits; VectorSurface surfaces; - while (start <= end) { - std::vector& list = registry[start]; - for (auto& surface : list) { - if (!surface->IsPicked() && surface->Overlaps(cpu_addr, cpu_addr_end)) { - surface->MarkAsPicked(true); - surfaces.push_back(surface); + for (VAddr start = cpu_addr >> registry_page_bits; start <= end; ++start) { + const auto it = registry.find(start); + if (it == registry.end()) { + continue; + } + for (auto& surface : it->second) { + if (surface->IsPicked() || !surface->Overlaps(cpu_addr, cpu_addr_end)) { + continue; } + surface->MarkAsPicked(true); + surfaces.push_back(surface); } - start++; } for (auto& surface : surfaces) { surface->MarkAsPicked(false); -- cgit v1.2.3 From 5b37cecd76205612bfc2cc1d0b475d893fe7ee6a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 29 May 2020 01:48:01 -0300 Subject: texture_cache: Handle overlaps with multiple subresources Implement more surface reconstruct cases. Allow overlaps with more than one layer and mipmap and copies all of them to the new texture. - Fixes textures moving around objects on Xenoblade games --- src/video_core/texture_cache/texture_cache.h | 60 +++++++++++++++------------- 1 file changed, 33 insertions(+), 27 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 8bfc541d4..658264860 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -652,45 +652,54 @@ private: **/ std::optional> TryReconstructSurface(std::vector& overlaps, const SurfaceParams& params, - const GPUVAddr gpu_addr) { + GPUVAddr gpu_addr) { if (params.target == SurfaceTarget::Texture3D) { - return {}; + return std::nullopt; } - bool modified = false; TSurface new_surface = GetUncachedSurface(gpu_addr, params); - u32 passed_tests = 0; + std::size_t passed_tests = 0; + bool modified = false; + for (auto& surface : overlaps) { const SurfaceParams& src_params = surface->GetSurfaceParams(); - if (src_params.is_layered || src_params.num_levels > 1) { - // We send this cases to recycle as they are more complex to handle - return {}; - } - const std::size_t candidate_size = surface->GetSizeInBytes(); - auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; + const auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; if (!mipmap_layer) { continue; } - const auto [layer, mipmap] = *mipmap_layer; - if (new_surface->GetMipmapSize(mipmap) != candidate_size) { + const auto [base_layer, base_mipmap] = *mipmap_layer; + if (new_surface->GetMipmapSize(base_mipmap) != surface->GetMipmapSize(0)) { continue; } + + // Copy all mipmaps and layers + const u32 block_width = params.GetDefaultBlockWidth(); + const u32 block_height = params.GetDefaultBlockHeight(); + for (u32 mipmap = base_mipmap; mipmap < base_mipmap + src_params.num_levels; ++mipmap) { + const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap); + const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap); + if (width < block_width || height < block_height) { + // Current APIs forbid copying small compressed textures, avoid errors + break; + } + const CopyParams copy_params(0, 0, 0, 0, 0, base_layer, 0, mipmap, width, height, + src_params.depth); + ImageCopy(surface, new_surface, copy_params); + } + ++passed_tests; modified |= surface->IsModified(); - // Now we got all the data set up - const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap); - const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap); - const CopyParams copy_params(0, 0, 0, 0, 0, layer, 0, mipmap, width, height, 1); - passed_tests++; - ImageCopy(surface, new_surface, copy_params); } if (passed_tests == 0) { - return {}; + return std::nullopt; + } + if (Settings::IsGPULevelExtreme() && passed_tests != overlaps.size()) { // In Accurate GPU all tests should pass, else we recycle - } else if (Settings::IsGPULevelExtreme() && passed_tests != overlaps.size()) { - return {}; + return std::nullopt; } + for (const auto& surface : overlaps) { Unregister(surface); } + new_surface->MarkAsModified(modified, Tick()); Register(new_surface); return {{new_surface, new_surface->GetMainView()}}; @@ -868,12 +877,9 @@ private: // two things either the candidate surface is a supertexture of the overlap // or they don't match in any known way. if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { - if (current_surface->GetGpuAddr() == gpu_addr) { - std::optional> view = - TryReconstructSurface(overlaps, params, gpu_addr); - if (view) { - return *view; - } + const std::optional view = TryReconstructSurface(overlaps, params, gpu_addr); + if (view) { + return *view; } return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); -- cgit v1.2.3 From dd70e097ccb84b64983456759525d650d1ceab0a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 29 May 2020 20:10:58 -0300 Subject: texture_cache: Reload textures when number of resources mismatch --- src/video_core/texture_cache/texture_cache.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 658264860..62206b906 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -660,6 +660,15 @@ private: std::size_t passed_tests = 0; bool modified = false; + u32 num_resources = 0; + for (auto& surface : overlaps) { + const SurfaceParams& src_params = surface->GetSurfaceParams(); + num_resources += src_params.depth * src_params.num_levels; + } + if (num_resources != params.depth * params.num_levels) { + LoadSurface(new_surface); + } + for (auto& surface : overlaps) { const SurfaceParams& src_params = surface->GetSurfaceParams(); const auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; -- cgit v1.2.3 From e454f7e7a7d75fe0415ce48ffbd5d5979d79ce67 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 29 May 2020 20:12:46 -0300 Subject: texture_cache: Only copy textures that were modified from host --- src/video_core/texture_cache/texture_cache.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 62206b906..3e024a098 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -679,6 +679,12 @@ private: if (new_surface->GetMipmapSize(base_mipmap) != surface->GetMipmapSize(0)) { continue; } + ++passed_tests; + + if (!surface->IsModified()) { + continue; + } + modified = true; // Copy all mipmaps and layers const u32 block_width = params.GetDefaultBlockWidth(); @@ -694,8 +700,6 @@ private: src_params.depth); ImageCopy(surface, new_surface, copy_params); } - ++passed_tests; - modified |= surface->IsModified(); } if (passed_tests == 0) { return std::nullopt; -- cgit v1.2.3 From 1ee1a5d3d64379bf2463c072a32af8e64a8c14cf Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 29 May 2020 23:52:47 -0300 Subject: texture_cache: More relaxed reconstruction Only reupload textures when they've not been modified from the GPU. --- src/video_core/texture_cache/texture_cache.h | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 3e024a098..4ba0d2c3a 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -656,19 +656,19 @@ private: if (params.target == SurfaceTarget::Texture3D) { return std::nullopt; } + const auto test_modified = [](TSurface& surface) { return surface->IsModified(); }; TSurface new_surface = GetUncachedSurface(gpu_addr, params); - std::size_t passed_tests = 0; - bool modified = false; - u32 num_resources = 0; - for (auto& surface : overlaps) { - const SurfaceParams& src_params = surface->GetSurfaceParams(); - num_resources += src_params.depth * src_params.num_levels; - } - if (num_resources != params.depth * params.num_levels) { + if (std::none_of(overlaps.begin(), overlaps.end(), test_modified)) { LoadSurface(new_surface); + for (const auto& surface : overlaps) { + Unregister(surface); + } + Register(new_surface); + return {{new_surface, new_surface->GetMainView()}}; } + std::size_t passed_tests = 0; for (auto& surface : overlaps) { const SurfaceParams& src_params = surface->GetSurfaceParams(); const auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; @@ -681,11 +681,6 @@ private: } ++passed_tests; - if (!surface->IsModified()) { - continue; - } - modified = true; - // Copy all mipmaps and layers const u32 block_width = params.GetDefaultBlockWidth(); const u32 block_height = params.GetDefaultBlockHeight(); @@ -709,6 +704,7 @@ private: return std::nullopt; } + const bool modified = std::any_of(overlaps.begin(), overlaps.end(), test_modified); for (const auto& surface : overlaps) { Unregister(surface); } -- cgit v1.2.3 From c95c254f3eda75476ad221a4828033f4140a3470 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 29 May 2020 23:32:41 -0300 Subject: texture_cache: Implement rendering to 3D textures This allows rendering to 3D textures with more than one slice. Applications are allowed to render to more than one slice of a texture using gl_Layer from a VTG shader. This also requires reworking how 3D texture collisions are handled, for now, this commit allows rendering to slices but not to miplevels. When a render target attempts to write to a mipmap, we fallback to the previous implementation (copying or flushing as needed). - Fixes color correction 3D textures on UE4 games (rainbow effects). - Allows Xenoblade games to render to 3D textures directly. --- src/video_core/texture_cache/texture_cache.h | 119 ++++++++++++--------------- 1 file changed, 51 insertions(+), 68 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 6f63217a2..4ee0d76b9 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -508,12 +508,12 @@ private: return RecycleStrategy::Flush; } // 3D Textures decision - if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) { + if (params.target == SurfaceTarget::Texture3D) { return RecycleStrategy::Flush; } for (const auto& s : overlaps) { const auto& s_params = s->GetSurfaceParams(); - if (s_params.block_depth > 1 || s_params.target == SurfaceTarget::Texture3D) { + if (s_params.target == SurfaceTarget::Texture3D) { return RecycleStrategy::Flush; } } @@ -726,76 +726,60 @@ private: * @param params The parameters on the new surface. * @param gpu_addr The starting address of the new surface. * @param cpu_addr The starting address of the new surface on physical memory. - * @param preserve_contents Indicates that the new surface should be loaded from memory or - * left blank. */ std::optional> Manage3DSurfaces(VectorSurface& overlaps, const SurfaceParams& params, - const GPUVAddr gpu_addr, - const VAddr cpu_addr, - bool preserve_contents) { - if (params.target == SurfaceTarget::Texture3D) { - bool failed = false; - if (params.num_levels > 1) { - // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach - return std::nullopt; - } - TSurface new_surface = GetUncachedSurface(gpu_addr, params); - bool modified = false; - for (auto& surface : overlaps) { - const SurfaceParams& src_params = surface->GetSurfaceParams(); - if (src_params.target != SurfaceTarget::Texture2D) { - failed = true; - break; - } - if (src_params.height != params.height) { - failed = true; - break; - } - if (src_params.block_depth != params.block_depth || - src_params.block_height != params.block_height) { - failed = true; - break; + GPUVAddr gpu_addr, VAddr cpu_addr) { + if (params.num_levels > 1) { + // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach + return std::nullopt; + } + + if (overlaps.size() == 1) { + const auto& surface = overlaps[0]; + const SurfaceParams& overlap_params = surface->GetSurfaceParams(); + // Don't attempt to render to textures with more than one level for now + // The texture has to be to the right or the sample address if we want to render to it + if (overlap_params.num_levels == 1 && cpu_addr >= surface->GetCpuAddr()) { + const u32 offset = static_cast(cpu_addr - surface->GetCpuAddr()); + const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset)); + if (slice < overlap_params.depth) { + auto view = surface->Emplace3DView(slice, params.depth, 0, 1); + return std::make_pair(std::move(surface), std::move(view)); } - const u32 offset = static_cast(surface->GetCpuAddr() - cpu_addr); - const auto offsets = params.GetBlockOffsetXYZ(offset); - const auto z = std::get<2>(offsets); - modified |= surface->IsModified(); - const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height, - 1); - ImageCopy(surface, new_surface, copy_params); } - if (failed) { + } + + if (params.depth == 1) { + return std::nullopt; + } + + TSurface new_surface = GetUncachedSurface(gpu_addr, params); + bool modified = false; + for (auto& surface : overlaps) { + const SurfaceParams& src_params = surface->GetSurfaceParams(); + if (src_params.height != params.height || + src_params.block_depth != params.block_depth || + src_params.block_height != params.block_height) { return std::nullopt; } - for (const auto& surface : overlaps) { - Unregister(surface); - } - new_surface->MarkAsModified(modified, Tick()); - Register(new_surface); - auto view = new_surface->GetMainView(); - return {{std::move(new_surface), view}}; - } else { - for (const auto& surface : overlaps) { - if (!surface->MatchTarget(params.target)) { - if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) { - if (Settings::IsGPULevelExtreme()) { - return std::nullopt; - } - Unregister(surface); - return InitializeSurface(gpu_addr, params, preserve_contents); - } - return std::nullopt; - } - if (surface->GetCpuAddr() != cpu_addr) { - continue; - } - if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) { - return {{surface, surface->GetMainView()}}; - } - } - return InitializeSurface(gpu_addr, params, preserve_contents); + modified |= surface->IsModified(); + + const u32 offset = static_cast(surface->GetCpuAddr() - cpu_addr); + const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset)); + const u32 width = params.width; + const u32 height = params.height; + const CopyParams copy_params(0, 0, 0, 0, 0, slice, 0, 0, width, height, 1); + ImageCopy(surface, new_surface, copy_params); } + for (const auto& surface : overlaps) { + Unregister(surface); + } + new_surface->MarkAsModified(modified, Tick()); + Register(new_surface); + + auto view = new_surface->GetMainView(); + return std::make_pair(std::move(new_surface), std::move(view)); } /** @@ -873,10 +857,9 @@ private: } } - // Check if it's a 3D texture - if (params.block_depth > 0) { - auto surface = - Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents); + // Manage 3D textures + if (params.target == SurfaceTarget::Texture3D) { + auto surface = Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr); if (surface) { return *surface; } -- cgit v1.2.3 From 3c2ae53b4c574deb4f9afe3104c7d022c53c5281 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 1 Jun 2020 04:49:35 -0300 Subject: texture_cache: Handle 3D texture blits with one layer --- src/video_core/texture_cache/texture_cache.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 4ee0d76b9..60b95a854 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -755,6 +755,8 @@ private: } TSurface new_surface = GetUncachedSurface(gpu_addr, params); + LoadSurface(new_surface); + bool modified = false; for (auto& surface : overlaps) { const SurfaceParams& src_params = surface->GetSurfaceParams(); @@ -763,7 +765,10 @@ private: src_params.block_height != params.block_height) { return std::nullopt; } - modified |= surface->IsModified(); + if (!surface->IsModified()) { + continue; + } + modified = true; const u32 offset = static_cast(surface->GetCpuAddr() - cpu_addr); const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset)); -- cgit v1.2.3 From c99f5d405b6a24603ff8174aeb2952facc4a92d9 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 8 Jun 2020 05:01:44 -0300 Subject: texture_cache: Simplify blit code --- src/video_core/texture_cache/texture_cache.h | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 60b95a854..b19eeed66 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -298,15 +298,13 @@ public: const GPUVAddr src_gpu_addr = src_config.Address(); const GPUVAddr dst_gpu_addr = dst_config.Address(); DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr); - const std::optional dst_cpu_addr = - system.GPU().MemoryManager().GpuToCpuAddress(dst_gpu_addr); - const std::optional src_cpu_addr = - system.GPU().MemoryManager().GpuToCpuAddress(src_gpu_addr); - std::pair dst_surface = - GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false); - std::pair src_surface = - GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false); - ImageBlit(src_surface.second, dst_surface.second, copy_config); + + const auto& memory_manager = system.GPU().MemoryManager(); + const std::optional dst_cpu_addr = memory_manager.GpuToCpuAddress(dst_gpu_addr); + const std::optional src_cpu_addr = memory_manager.GpuToCpuAddress(src_gpu_addr); + std::pair dst_surface = GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false); + TView src_surface = GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false).second; + ImageBlit(src_surface, dst_surface.second, copy_config); dst_surface.first->MarkAsModified(true, Tick()); } -- cgit v1.2.3 From bd43c0547085fcfb585ac3a90521eeb8414fd538 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 8 Jun 2020 05:02:22 -0300 Subject: texture_cache: Port original code management for 2D vs 3D textures Handle blits to images as 2D, even when they have block depth. - Fixes rendering issues on Luigi's Mansion 3 --- src/video_core/texture_cache/texture_cache.h | 49 +++++++++++++++++++--------- 1 file changed, 34 insertions(+), 15 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index b19eeed66..b543fc8c0 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -724,10 +724,35 @@ private: * @param params The parameters on the new surface. * @param gpu_addr The starting address of the new surface. * @param cpu_addr The starting address of the new surface on physical memory. + * @param preserve_contents Indicates that the new surface should be loaded from memory or + * left blank. */ std::optional> Manage3DSurfaces(VectorSurface& overlaps, const SurfaceParams& params, - GPUVAddr gpu_addr, VAddr cpu_addr) { + GPUVAddr gpu_addr, VAddr cpu_addr, + bool preserve_contents) { + if (params.target != SurfaceTarget::Texture3D) { + for (const auto& surface : overlaps) { + if (!surface->MatchTarget(params.target)) { + if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) { + if (Settings::IsGPULevelExtreme()) { + return std::nullopt; + } + Unregister(surface); + return InitializeSurface(gpu_addr, params, preserve_contents); + } + return std::nullopt; + } + if (surface->GetCpuAddr() != cpu_addr) { + continue; + } + if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) { + return std::make_pair(surface, surface->GetMainView()); + } + } + return InitializeSurface(gpu_addr, params, preserve_contents); + } + if (params.num_levels > 1) { // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach return std::nullopt; @@ -748,25 +773,18 @@ private: } } - if (params.depth == 1) { - return std::nullopt; - } - TSurface new_surface = GetUncachedSurface(gpu_addr, params); - LoadSurface(new_surface); - bool modified = false; + for (auto& surface : overlaps) { const SurfaceParams& src_params = surface->GetSurfaceParams(); - if (src_params.height != params.height || + if (src_params.target != SurfaceTarget::Texture2D || + src_params.height != params.height || src_params.block_depth != params.block_depth || src_params.block_height != params.block_height) { return std::nullopt; } - if (!surface->IsModified()) { - continue; - } - modified = true; + modified |= surface->IsModified(); const u32 offset = static_cast(surface->GetCpuAddr() - cpu_addr); const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset)); @@ -781,7 +799,7 @@ private: new_surface->MarkAsModified(modified, Tick()); Register(new_surface); - auto view = new_surface->GetMainView(); + TView view = new_surface->GetMainView(); return std::make_pair(std::move(new_surface), std::move(view)); } @@ -861,8 +879,9 @@ private: } // Manage 3D textures - if (params.target == SurfaceTarget::Texture3D) { - auto surface = Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr); + if (params.block_depth > 0) { + auto surface = + Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents); if (surface) { return *surface; } -- cgit v1.2.3 From ef53b2fd08f1122f22456500bfdc707f1c18906c Mon Sep 17 00:00:00 2001 From: Lioncash Date: Fri, 19 Jun 2020 23:13:48 -0400 Subject: texture_cache: Fix incorrect address used in a DeduceSurface() call Previously the source was being deduced twice in a row. --- src/video_core/texture_cache/texture_cache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index b543fc8c0..85075e868 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1053,7 +1053,7 @@ private: void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params, const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) { auto deduced_src = DeduceSurface(src_gpu_addr, src_params); - auto deduced_dst = DeduceSurface(src_gpu_addr, src_params); + auto deduced_dst = DeduceSurface(dst_gpu_addr, dst_params); if (deduced_src.Failed() || deduced_dst.Failed()) { return; } -- cgit v1.2.3