From d2b25575426b9b52049b88d8d6d9ae83c81da312 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 27 May 2020 17:31:12 -0300 Subject: texture_cache: Use small vector for surface vectors This avoids most heap allocations when collecting surfaces into a vector. --- src/video_core/texture_cache/texture_cache.h | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index d6efc34b2..d7e42697d 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -14,6 +14,7 @@ #include #include +#include #include #include @@ -53,6 +54,7 @@ using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; template class TextureCache { + using VectorSurface = boost::container::small_vector; public: void InvalidateRegion(VAddr addr, std::size_t size) { @@ -498,7 +500,7 @@ private: * @param untopological Indicates to the recycler that the texture has no way * to match the overlaps due to topological reasons. **/ - RecycleStrategy PickStrategy(std::vector& overlaps, const SurfaceParams& params, + RecycleStrategy PickStrategy(VectorSurface& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { if (Settings::IsGPULevelExtreme()) { return RecycleStrategy::Flush; @@ -538,9 +540,8 @@ private: * @param untopological Indicates to the recycler that the texture has no way to match the * overlaps due to topological reasons. **/ - std::pair RecycleSurface(std::vector& overlaps, - const SurfaceParams& params, const GPUVAddr gpu_addr, - const bool preserve_contents, + std::pair RecycleSurface(VectorSurface& overlaps, const SurfaceParams& params, + const GPUVAddr gpu_addr, const bool preserve_contents, const MatchTopologyResult untopological) { const bool do_load = preserve_contents && Settings::IsGPULevelExtreme(); for (auto& surface : overlaps) { @@ -650,7 +651,7 @@ private: * @param params The parameters on the new surface. * @param gpu_addr The starting address of the new surface. **/ - std::optional> TryReconstructSurface(std::vector& overlaps, + std::optional> TryReconstructSurface(VectorSurface& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr) { if (params.target == SurfaceTarget::Texture3D) { @@ -708,7 +709,7 @@ private: * @param preserve_contents Indicates that the new surface should be loaded from memory or * left blank. */ - std::optional> Manage3DSurfaces(std::vector& overlaps, + std::optional> Manage3DSurfaces(VectorSurface& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, const VAddr cpu_addr, @@ -810,7 +811,7 @@ private: TSurface& current_surface = iter->second; const auto topological_result = current_surface->MatchesTopology(params); if (topological_result != MatchTopologyResult::FullMatch) { - std::vector overlaps{current_surface}; + VectorSurface overlaps{current_surface}; return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, topological_result); } @@ -1124,14 +1125,14 @@ private: } } - std::vector GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { + VectorSurface GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { if (size == 0) { return {}; } const VAddr cpu_addr_end = cpu_addr + size; VAddr start = cpu_addr >> registry_page_bits; const VAddr end = (cpu_addr_end - 1) >> registry_page_bits; - std::vector surfaces; + VectorSurface surfaces; while (start <= end) { std::vector& list = registry[start]; for (auto& surface : list) { -- cgit v1.2.3 From b8b6f94ba9a662857c40d819ac40755c7984cb16 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 27 May 2020 17:59:04 -0300 Subject: texture_cache: Use unordered_map::find instead of operator[] on hot code --- src/video_core/texture_cache/texture_cache.h | 34 ++++++++++++++++------------ 1 file changed, 19 insertions(+), 15 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index d7e42697d..99f74e6c4 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -310,18 +310,20 @@ public: dst_surface.first->MarkAsModified(true, Tick()); } - TSurface TryFindFramebufferSurface(VAddr addr) { + TSurface TryFindFramebufferSurface(VAddr addr) const { if (!addr) { return nullptr; } const VAddr page = addr >> registry_page_bits; - std::vector& list = registry[page]; - for (auto& surface : list) { - if (surface->GetCpuAddr() == addr) { - return surface; - } + const auto it = registry.find(page); + if (it == registry.end()) { + return nullptr; } - return nullptr; + const auto& list = it->second; + const auto found = std::find_if(list.begin(), list.end(), [addr](const auto& surface) { + return surface->GetCpuAddr() == addr; + }); + return found != list.end() ? *found : nullptr; } u64 Tick() { @@ -1130,18 +1132,20 @@ private: return {}; } const VAddr cpu_addr_end = cpu_addr + size; - VAddr start = cpu_addr >> registry_page_bits; const VAddr end = (cpu_addr_end - 1) >> registry_page_bits; VectorSurface surfaces; - while (start <= end) { - std::vector& list = registry[start]; - for (auto& surface : list) { - if (!surface->IsPicked() && surface->Overlaps(cpu_addr, cpu_addr_end)) { - surface->MarkAsPicked(true); - surfaces.push_back(surface); + for (VAddr start = cpu_addr >> registry_page_bits; start <= end; ++start) { + const auto it = registry.find(start); + if (it == registry.end()) { + continue; + } + for (auto& surface : it->second) { + if (surface->IsPicked() || !surface->Overlaps(cpu_addr, cpu_addr_end)) { + continue; } + surface->MarkAsPicked(true); + surfaces.push_back(surface); } - start++; } for (auto& surface : surfaces) { surface->MarkAsPicked(false); -- cgit v1.2.3 From fc153f6bcd9884064eb2752fc7be5a6458bbd71b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 28 May 2020 17:08:44 -0300 Subject: format_lookup_table: Implement G24S8 format as S8Z24 --- src/video_core/texture_cache/format_lookup_table.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp index 7032e0059..f476f03b0 100644 --- a/src/video_core/texture_cache/format_lookup_table.cpp +++ b/src/video_core/texture_cache/format_lookup_table.cpp @@ -41,7 +41,7 @@ struct Table { ComponentType alpha_component; bool is_srgb; }; -constexpr std::array DefinitionTable = {{ +constexpr std::array DefinitionTable = {{ {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U}, {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S}, {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI}, @@ -98,6 +98,7 @@ constexpr std::array DefinitionTable = {{ {TextureFormat::ZF32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::Z32F}, {TextureFormat::Z16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z16}, {TextureFormat::S8Z24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24}, + {TextureFormat::G24R8, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24}, {TextureFormat::ZF32_X24S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::Z32FS8}, {TextureFormat::DXT1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1}, -- cgit v1.2.3 From 5b37cecd76205612bfc2cc1d0b475d893fe7ee6a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 29 May 2020 01:48:01 -0300 Subject: texture_cache: Handle overlaps with multiple subresources Implement more surface reconstruct cases. Allow overlaps with more than one layer and mipmap and copies all of them to the new texture. - Fixes textures moving around objects on Xenoblade games --- src/video_core/texture_cache/texture_cache.h | 60 +++++++++++++++------------- 1 file changed, 33 insertions(+), 27 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 8bfc541d4..658264860 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -652,45 +652,54 @@ private: **/ std::optional> TryReconstructSurface(std::vector& overlaps, const SurfaceParams& params, - const GPUVAddr gpu_addr) { + GPUVAddr gpu_addr) { if (params.target == SurfaceTarget::Texture3D) { - return {}; + return std::nullopt; } - bool modified = false; TSurface new_surface = GetUncachedSurface(gpu_addr, params); - u32 passed_tests = 0; + std::size_t passed_tests = 0; + bool modified = false; + for (auto& surface : overlaps) { const SurfaceParams& src_params = surface->GetSurfaceParams(); - if (src_params.is_layered || src_params.num_levels > 1) { - // We send this cases to recycle as they are more complex to handle - return {}; - } - const std::size_t candidate_size = surface->GetSizeInBytes(); - auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; + const auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; if (!mipmap_layer) { continue; } - const auto [layer, mipmap] = *mipmap_layer; - if (new_surface->GetMipmapSize(mipmap) != candidate_size) { + const auto [base_layer, base_mipmap] = *mipmap_layer; + if (new_surface->GetMipmapSize(base_mipmap) != surface->GetMipmapSize(0)) { continue; } + + // Copy all mipmaps and layers + const u32 block_width = params.GetDefaultBlockWidth(); + const u32 block_height = params.GetDefaultBlockHeight(); + for (u32 mipmap = base_mipmap; mipmap < base_mipmap + src_params.num_levels; ++mipmap) { + const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap); + const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap); + if (width < block_width || height < block_height) { + // Current APIs forbid copying small compressed textures, avoid errors + break; + } + const CopyParams copy_params(0, 0, 0, 0, 0, base_layer, 0, mipmap, width, height, + src_params.depth); + ImageCopy(surface, new_surface, copy_params); + } + ++passed_tests; modified |= surface->IsModified(); - // Now we got all the data set up - const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap); - const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap); - const CopyParams copy_params(0, 0, 0, 0, 0, layer, 0, mipmap, width, height, 1); - passed_tests++; - ImageCopy(surface, new_surface, copy_params); } if (passed_tests == 0) { - return {}; + return std::nullopt; + } + if (Settings::IsGPULevelExtreme() && passed_tests != overlaps.size()) { // In Accurate GPU all tests should pass, else we recycle - } else if (Settings::IsGPULevelExtreme() && passed_tests != overlaps.size()) { - return {}; + return std::nullopt; } + for (const auto& surface : overlaps) { Unregister(surface); } + new_surface->MarkAsModified(modified, Tick()); Register(new_surface); return {{new_surface, new_surface->GetMainView()}}; @@ -868,12 +877,9 @@ private: // two things either the candidate surface is a supertexture of the overlap // or they don't match in any known way. if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { - if (current_surface->GetGpuAddr() == gpu_addr) { - std::optional> view = - TryReconstructSurface(overlaps, params, gpu_addr); - if (view) { - return *view; - } + const std::optional view = TryReconstructSurface(overlaps, params, gpu_addr); + if (view) { + return *view; } return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); -- cgit v1.2.3 From dd70e097ccb84b64983456759525d650d1ceab0a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 29 May 2020 20:10:58 -0300 Subject: texture_cache: Reload textures when number of resources mismatch --- src/video_core/texture_cache/texture_cache.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 658264860..62206b906 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -660,6 +660,15 @@ private: std::size_t passed_tests = 0; bool modified = false; + u32 num_resources = 0; + for (auto& surface : overlaps) { + const SurfaceParams& src_params = surface->GetSurfaceParams(); + num_resources += src_params.depth * src_params.num_levels; + } + if (num_resources != params.depth * params.num_levels) { + LoadSurface(new_surface); + } + for (auto& surface : overlaps) { const SurfaceParams& src_params = surface->GetSurfaceParams(); const auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; -- cgit v1.2.3 From e454f7e7a7d75fe0415ce48ffbd5d5979d79ce67 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 29 May 2020 20:12:46 -0300 Subject: texture_cache: Only copy textures that were modified from host --- src/video_core/texture_cache/texture_cache.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 62206b906..3e024a098 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -679,6 +679,12 @@ private: if (new_surface->GetMipmapSize(base_mipmap) != surface->GetMipmapSize(0)) { continue; } + ++passed_tests; + + if (!surface->IsModified()) { + continue; + } + modified = true; // Copy all mipmaps and layers const u32 block_width = params.GetDefaultBlockWidth(); @@ -694,8 +700,6 @@ private: src_params.depth); ImageCopy(surface, new_surface, copy_params); } - ++passed_tests; - modified |= surface->IsModified(); } if (passed_tests == 0) { return std::nullopt; -- cgit v1.2.3 From 1ee1a5d3d64379bf2463c072a32af8e64a8c14cf Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 29 May 2020 23:52:47 -0300 Subject: texture_cache: More relaxed reconstruction Only reupload textures when they've not been modified from the GPU. --- src/video_core/texture_cache/texture_cache.h | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 3e024a098..4ba0d2c3a 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -656,19 +656,19 @@ private: if (params.target == SurfaceTarget::Texture3D) { return std::nullopt; } + const auto test_modified = [](TSurface& surface) { return surface->IsModified(); }; TSurface new_surface = GetUncachedSurface(gpu_addr, params); - std::size_t passed_tests = 0; - bool modified = false; - u32 num_resources = 0; - for (auto& surface : overlaps) { - const SurfaceParams& src_params = surface->GetSurfaceParams(); - num_resources += src_params.depth * src_params.num_levels; - } - if (num_resources != params.depth * params.num_levels) { + if (std::none_of(overlaps.begin(), overlaps.end(), test_modified)) { LoadSurface(new_surface); + for (const auto& surface : overlaps) { + Unregister(surface); + } + Register(new_surface); + return {{new_surface, new_surface->GetMainView()}}; } + std::size_t passed_tests = 0; for (auto& surface : overlaps) { const SurfaceParams& src_params = surface->GetSurfaceParams(); const auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; @@ -681,11 +681,6 @@ private: } ++passed_tests; - if (!surface->IsModified()) { - continue; - } - modified = true; - // Copy all mipmaps and layers const u32 block_width = params.GetDefaultBlockWidth(); const u32 block_height = params.GetDefaultBlockHeight(); @@ -709,6 +704,7 @@ private: return std::nullopt; } + const bool modified = std::any_of(overlaps.begin(), overlaps.end(), test_modified); for (const auto& surface : overlaps) { Unregister(surface); } -- cgit v1.2.3 From c95c254f3eda75476ad221a4828033f4140a3470 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 29 May 2020 23:32:41 -0300 Subject: texture_cache: Implement rendering to 3D textures This allows rendering to 3D textures with more than one slice. Applications are allowed to render to more than one slice of a texture using gl_Layer from a VTG shader. This also requires reworking how 3D texture collisions are handled, for now, this commit allows rendering to slices but not to miplevels. When a render target attempts to write to a mipmap, we fallback to the previous implementation (copying or flushing as needed). - Fixes color correction 3D textures on UE4 games (rainbow effects). - Allows Xenoblade games to render to 3D textures directly. --- src/video_core/texture_cache/surface_base.cpp | 7 +- src/video_core/texture_cache/surface_base.h | 13 ++- src/video_core/texture_cache/surface_params.cpp | 17 +++- src/video_core/texture_cache/texture_cache.h | 119 ++++++++++-------------- 4 files changed, 76 insertions(+), 80 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 715f39d0d..94d3a6ae5 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -248,12 +248,11 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, // Use an extra temporal buffer auto& tmp_buffer = staging_cache.GetBuffer(1); - // Special case for 3D Texture Segments - const bool must_read_current_data = - params.block_depth > 0 && params.target == VideoCore::Surface::SurfaceTarget::Texture2D; tmp_buffer.resize(guest_memory_size); host_ptr = tmp_buffer.data(); - if (must_read_current_data) { + + if (params.target == SurfaceTarget::Texture3D) { + // Special case for 3D texture segments memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); } diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 79e10ffbb..173f2edba 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -217,8 +217,8 @@ public: } bool IsProtected() const { - // Only 3D Slices are to be protected - return is_target && params.block_depth > 0; + // Only 3D slices are to be protected + return is_target && params.target == SurfaceTarget::Texture3D; } bool IsRenderTarget() const { @@ -250,6 +250,11 @@ public: return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels)); } + TView Emplace3DView(u32 slice, u32 depth, u32 base_level, u32 num_levels) { + return GetView(ViewParams(VideoCore::Surface::SurfaceTarget::Texture3D, slice, depth, + base_level, num_levels)); + } + std::optional EmplaceIrregularView(const SurfaceParams& view_params, const GPUVAddr view_addr, const std::size_t candidate_size, const u32 mipmap, @@ -272,8 +277,8 @@ public: std::optional EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr, const std::size_t candidate_size) { if (params.target == SurfaceTarget::Texture3D || - (params.num_levels == 1 && !params.is_layered) || - view_params.target == SurfaceTarget::Texture3D) { + view_params.target == SurfaceTarget::Texture3D || + (params.num_levels == 1 && !params.is_layered)) { return {}; } const auto layer_mipmap{GetLayerMipmap(view_addr)}; diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 884fabffe..642eeb850 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -215,10 +215,19 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz params.num_levels = 1; params.emulated_levels = 1; - const bool is_layered = config.layers > 1 && params.block_depth == 0; - params.is_layered = is_layered; - params.depth = is_layered ? config.layers.Value() : 1; - params.target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D; + if (config.memory_layout.is_3d != 0) { + params.depth = config.layers.Value(); + params.is_layered = false; + params.target = SurfaceTarget::Texture3D; + } else if (config.layers > 1) { + params.depth = config.layers.Value(); + params.is_layered = true; + params.target = SurfaceTarget::Texture2DArray; + } else { + params.depth = 1; + params.is_layered = false; + params.target = SurfaceTarget::Texture2D; + } return params; } diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 6f63217a2..4ee0d76b9 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -508,12 +508,12 @@ private: return RecycleStrategy::Flush; } // 3D Textures decision - if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) { + if (params.target == SurfaceTarget::Texture3D) { return RecycleStrategy::Flush; } for (const auto& s : overlaps) { const auto& s_params = s->GetSurfaceParams(); - if (s_params.block_depth > 1 || s_params.target == SurfaceTarget::Texture3D) { + if (s_params.target == SurfaceTarget::Texture3D) { return RecycleStrategy::Flush; } } @@ -726,76 +726,60 @@ private: * @param params The parameters on the new surface. * @param gpu_addr The starting address of the new surface. * @param cpu_addr The starting address of the new surface on physical memory. - * @param preserve_contents Indicates that the new surface should be loaded from memory or - * left blank. */ std::optional> Manage3DSurfaces(VectorSurface& overlaps, const SurfaceParams& params, - const GPUVAddr gpu_addr, - const VAddr cpu_addr, - bool preserve_contents) { - if (params.target == SurfaceTarget::Texture3D) { - bool failed = false; - if (params.num_levels > 1) { - // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach - return std::nullopt; - } - TSurface new_surface = GetUncachedSurface(gpu_addr, params); - bool modified = false; - for (auto& surface : overlaps) { - const SurfaceParams& src_params = surface->GetSurfaceParams(); - if (src_params.target != SurfaceTarget::Texture2D) { - failed = true; - break; - } - if (src_params.height != params.height) { - failed = true; - break; - } - if (src_params.block_depth != params.block_depth || - src_params.block_height != params.block_height) { - failed = true; - break; + GPUVAddr gpu_addr, VAddr cpu_addr) { + if (params.num_levels > 1) { + // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach + return std::nullopt; + } + + if (overlaps.size() == 1) { + const auto& surface = overlaps[0]; + const SurfaceParams& overlap_params = surface->GetSurfaceParams(); + // Don't attempt to render to textures with more than one level for now + // The texture has to be to the right or the sample address if we want to render to it + if (overlap_params.num_levels == 1 && cpu_addr >= surface->GetCpuAddr()) { + const u32 offset = static_cast(cpu_addr - surface->GetCpuAddr()); + const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset)); + if (slice < overlap_params.depth) { + auto view = surface->Emplace3DView(slice, params.depth, 0, 1); + return std::make_pair(std::move(surface), std::move(view)); } - const u32 offset = static_cast(surface->GetCpuAddr() - cpu_addr); - const auto offsets = params.GetBlockOffsetXYZ(offset); - const auto z = std::get<2>(offsets); - modified |= surface->IsModified(); - const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height, - 1); - ImageCopy(surface, new_surface, copy_params); } - if (failed) { + } + + if (params.depth == 1) { + return std::nullopt; + } + + TSurface new_surface = GetUncachedSurface(gpu_addr, params); + bool modified = false; + for (auto& surface : overlaps) { + const SurfaceParams& src_params = surface->GetSurfaceParams(); + if (src_params.height != params.height || + src_params.block_depth != params.block_depth || + src_params.block_height != params.block_height) { return std::nullopt; } - for (const auto& surface : overlaps) { - Unregister(surface); - } - new_surface->MarkAsModified(modified, Tick()); - Register(new_surface); - auto view = new_surface->GetMainView(); - return {{std::move(new_surface), view}}; - } else { - for (const auto& surface : overlaps) { - if (!surface->MatchTarget(params.target)) { - if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) { - if (Settings::IsGPULevelExtreme()) { - return std::nullopt; - } - Unregister(surface); - return InitializeSurface(gpu_addr, params, preserve_contents); - } - return std::nullopt; - } - if (surface->GetCpuAddr() != cpu_addr) { - continue; - } - if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) { - return {{surface, surface->GetMainView()}}; - } - } - return InitializeSurface(gpu_addr, params, preserve_contents); + modified |= surface->IsModified(); + + const u32 offset = static_cast(surface->GetCpuAddr() - cpu_addr); + const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset)); + const u32 width = params.width; + const u32 height = params.height; + const CopyParams copy_params(0, 0, 0, 0, 0, slice, 0, 0, width, height, 1); + ImageCopy(surface, new_surface, copy_params); } + for (const auto& surface : overlaps) { + Unregister(surface); + } + new_surface->MarkAsModified(modified, Tick()); + Register(new_surface); + + auto view = new_surface->GetMainView(); + return std::make_pair(std::move(new_surface), std::move(view)); } /** @@ -873,10 +857,9 @@ private: } } - // Check if it's a 3D texture - if (params.block_depth > 0) { - auto surface = - Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents); + // Manage 3D textures + if (params.target == SurfaceTarget::Texture3D) { + auto surface = Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr); if (surface) { return *surface; } -- cgit v1.2.3 From 3c2ae53b4c574deb4f9afe3104c7d022c53c5281 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 1 Jun 2020 04:49:35 -0300 Subject: texture_cache: Handle 3D texture blits with one layer --- src/video_core/texture_cache/surface_params.cpp | 4 ++-- src/video_core/texture_cache/texture_cache.h | 7 ++++++- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 642eeb850..6fe7c85ac 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -246,8 +246,8 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface( params.width = config.width; params.height = config.height; params.pitch = config.pitch; - // TODO(Rodrigo): Try to guess the surface target from depth and layer parameters - params.target = SurfaceTarget::Texture2D; + // TODO(Rodrigo): Try to guess texture arrays from parameters + params.target = params.block_depth > 0 ? SurfaceTarget::Texture3D : SurfaceTarget::Texture2D; params.depth = 1; params.num_levels = 1; params.emulated_levels = 1; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 4ee0d76b9..60b95a854 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -755,6 +755,8 @@ private: } TSurface new_surface = GetUncachedSurface(gpu_addr, params); + LoadSurface(new_surface); + bool modified = false; for (auto& surface : overlaps) { const SurfaceParams& src_params = surface->GetSurfaceParams(); @@ -763,7 +765,10 @@ private: src_params.block_height != params.block_height) { return std::nullopt; } - modified |= surface->IsModified(); + if (!surface->IsModified()) { + continue; + } + modified = true; const u32 offset = static_cast(surface->GetCpuAddr() - cpu_addr); const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset)); -- cgit v1.2.3 From c99f5d405b6a24603ff8174aeb2952facc4a92d9 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 8 Jun 2020 05:01:44 -0300 Subject: texture_cache: Simplify blit code --- src/video_core/texture_cache/texture_cache.h | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 60b95a854..b19eeed66 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -298,15 +298,13 @@ public: const GPUVAddr src_gpu_addr = src_config.Address(); const GPUVAddr dst_gpu_addr = dst_config.Address(); DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr); - const std::optional dst_cpu_addr = - system.GPU().MemoryManager().GpuToCpuAddress(dst_gpu_addr); - const std::optional src_cpu_addr = - system.GPU().MemoryManager().GpuToCpuAddress(src_gpu_addr); - std::pair dst_surface = - GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false); - std::pair src_surface = - GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false); - ImageBlit(src_surface.second, dst_surface.second, copy_config); + + const auto& memory_manager = system.GPU().MemoryManager(); + const std::optional dst_cpu_addr = memory_manager.GpuToCpuAddress(dst_gpu_addr); + const std::optional src_cpu_addr = memory_manager.GpuToCpuAddress(src_gpu_addr); + std::pair dst_surface = GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false); + TView src_surface = GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false).second; + ImageBlit(src_surface, dst_surface.second, copy_config); dst_surface.first->MarkAsModified(true, Tick()); } -- cgit v1.2.3 From bd43c0547085fcfb585ac3a90521eeb8414fd538 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 8 Jun 2020 05:02:22 -0300 Subject: texture_cache: Port original code management for 2D vs 3D textures Handle blits to images as 2D, even when they have block depth. - Fixes rendering issues on Luigi's Mansion 3 --- src/video_core/texture_cache/surface_params.cpp | 2 +- src/video_core/texture_cache/texture_cache.h | 49 +++++++++++++++++-------- 2 files changed, 35 insertions(+), 16 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 6fe7c85ac..0b2b2b8c4 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -247,7 +247,7 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface( params.height = config.height; params.pitch = config.pitch; // TODO(Rodrigo): Try to guess texture arrays from parameters - params.target = params.block_depth > 0 ? SurfaceTarget::Texture3D : SurfaceTarget::Texture2D; + params.target = SurfaceTarget::Texture2D; params.depth = 1; params.num_levels = 1; params.emulated_levels = 1; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index b19eeed66..b543fc8c0 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -724,10 +724,35 @@ private: * @param params The parameters on the new surface. * @param gpu_addr The starting address of the new surface. * @param cpu_addr The starting address of the new surface on physical memory. + * @param preserve_contents Indicates that the new surface should be loaded from memory or + * left blank. */ std::optional> Manage3DSurfaces(VectorSurface& overlaps, const SurfaceParams& params, - GPUVAddr gpu_addr, VAddr cpu_addr) { + GPUVAddr gpu_addr, VAddr cpu_addr, + bool preserve_contents) { + if (params.target != SurfaceTarget::Texture3D) { + for (const auto& surface : overlaps) { + if (!surface->MatchTarget(params.target)) { + if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) { + if (Settings::IsGPULevelExtreme()) { + return std::nullopt; + } + Unregister(surface); + return InitializeSurface(gpu_addr, params, preserve_contents); + } + return std::nullopt; + } + if (surface->GetCpuAddr() != cpu_addr) { + continue; + } + if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) { + return std::make_pair(surface, surface->GetMainView()); + } + } + return InitializeSurface(gpu_addr, params, preserve_contents); + } + if (params.num_levels > 1) { // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach return std::nullopt; @@ -748,25 +773,18 @@ private: } } - if (params.depth == 1) { - return std::nullopt; - } - TSurface new_surface = GetUncachedSurface(gpu_addr, params); - LoadSurface(new_surface); - bool modified = false; + for (auto& surface : overlaps) { const SurfaceParams& src_params = surface->GetSurfaceParams(); - if (src_params.height != params.height || + if (src_params.target != SurfaceTarget::Texture2D || + src_params.height != params.height || src_params.block_depth != params.block_depth || src_params.block_height != params.block_height) { return std::nullopt; } - if (!surface->IsModified()) { - continue; - } - modified = true; + modified |= surface->IsModified(); const u32 offset = static_cast(surface->GetCpuAddr() - cpu_addr); const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset)); @@ -781,7 +799,7 @@ private: new_surface->MarkAsModified(modified, Tick()); Register(new_surface); - auto view = new_surface->GetMainView(); + TView view = new_surface->GetMainView(); return std::make_pair(std::move(new_surface), std::move(view)); } @@ -861,8 +879,9 @@ private: } // Manage 3D textures - if (params.target == SurfaceTarget::Texture3D) { - auto surface = Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr); + if (params.block_depth > 0) { + auto surface = + Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents); if (surface) { return *surface; } -- cgit v1.2.3 From ef53b2fd08f1122f22456500bfdc707f1c18906c Mon Sep 17 00:00:00 2001 From: Lioncash Date: Fri, 19 Jun 2020 23:13:48 -0400 Subject: texture_cache: Fix incorrect address used in a DeduceSurface() call Previously the source was being deduced twice in a row. --- src/video_core/texture_cache/texture_cache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index b543fc8c0..85075e868 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1053,7 +1053,7 @@ private: void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params, const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) { auto deduced_src = DeduceSurface(src_gpu_addr, src_params); - auto deduced_dst = DeduceSurface(src_gpu_addr, src_params); + auto deduced_dst = DeduceSurface(dst_gpu_addr, dst_params); if (deduced_src.Failed() || deduced_dst.Failed()) { return; } -- cgit v1.2.3