From d2b25575426b9b52049b88d8d6d9ae83c81da312 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 27 May 2020 17:31:12 -0300 Subject: texture_cache: Use small vector for surface vectors This avoids most heap allocations when collecting surfaces into a vector. --- src/video_core/texture_cache/texture_cache.h | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index d6efc34b2..d7e42697d 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -14,6 +14,7 @@ #include #include +#include #include #include @@ -53,6 +54,7 @@ using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; template class TextureCache { + using VectorSurface = boost::container::small_vector; public: void InvalidateRegion(VAddr addr, std::size_t size) { @@ -498,7 +500,7 @@ private: * @param untopological Indicates to the recycler that the texture has no way * to match the overlaps due to topological reasons. **/ - RecycleStrategy PickStrategy(std::vector& overlaps, const SurfaceParams& params, + RecycleStrategy PickStrategy(VectorSurface& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { if (Settings::IsGPULevelExtreme()) { return RecycleStrategy::Flush; @@ -538,9 +540,8 @@ private: * @param untopological Indicates to the recycler that the texture has no way to match the * overlaps due to topological reasons. **/ - std::pair RecycleSurface(std::vector& overlaps, - const SurfaceParams& params, const GPUVAddr gpu_addr, - const bool preserve_contents, + std::pair RecycleSurface(VectorSurface& overlaps, const SurfaceParams& params, + const GPUVAddr gpu_addr, const bool preserve_contents, const MatchTopologyResult untopological) { const bool do_load = preserve_contents && Settings::IsGPULevelExtreme(); for (auto& surface : overlaps) { @@ -650,7 +651,7 @@ private: * @param params The parameters on the new surface. * @param gpu_addr The starting address of the new surface. **/ - std::optional> TryReconstructSurface(std::vector& overlaps, + std::optional> TryReconstructSurface(VectorSurface& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr) { if (params.target == SurfaceTarget::Texture3D) { @@ -708,7 +709,7 @@ private: * @param preserve_contents Indicates that the new surface should be loaded from memory or * left blank. */ - std::optional> Manage3DSurfaces(std::vector& overlaps, + std::optional> Manage3DSurfaces(VectorSurface& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, const VAddr cpu_addr, @@ -810,7 +811,7 @@ private: TSurface& current_surface = iter->second; const auto topological_result = current_surface->MatchesTopology(params); if (topological_result != MatchTopologyResult::FullMatch) { - std::vector overlaps{current_surface}; + VectorSurface overlaps{current_surface}; return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, topological_result); } @@ -1124,14 +1125,14 @@ private: } } - std::vector GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { + VectorSurface GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { if (size == 0) { return {}; } const VAddr cpu_addr_end = cpu_addr + size; VAddr start = cpu_addr >> registry_page_bits; const VAddr end = (cpu_addr_end - 1) >> registry_page_bits; - std::vector surfaces; + VectorSurface surfaces; while (start <= end) { std::vector& list = registry[start]; for (auto& surface : list) { -- cgit v1.2.3 From b8b6f94ba9a662857c40d819ac40755c7984cb16 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 27 May 2020 17:59:04 -0300 Subject: texture_cache: Use unordered_map::find instead of operator[] on hot code --- src/video_core/texture_cache/texture_cache.h | 34 ++++++++++++++++------------ 1 file changed, 19 insertions(+), 15 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index d7e42697d..99f74e6c4 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -310,18 +310,20 @@ public: dst_surface.first->MarkAsModified(true, Tick()); } - TSurface TryFindFramebufferSurface(VAddr addr) { + TSurface TryFindFramebufferSurface(VAddr addr) const { if (!addr) { return nullptr; } const VAddr page = addr >> registry_page_bits; - std::vector& list = registry[page]; - for (auto& surface : list) { - if (surface->GetCpuAddr() == addr) { - return surface; - } + const auto it = registry.find(page); + if (it == registry.end()) { + return nullptr; } - return nullptr; + const auto& list = it->second; + const auto found = std::find_if(list.begin(), list.end(), [addr](const auto& surface) { + return surface->GetCpuAddr() == addr; + }); + return found != list.end() ? *found : nullptr; } u64 Tick() { @@ -1130,18 +1132,20 @@ private: return {}; } const VAddr cpu_addr_end = cpu_addr + size; - VAddr start = cpu_addr >> registry_page_bits; const VAddr end = (cpu_addr_end - 1) >> registry_page_bits; VectorSurface surfaces; - while (start <= end) { - std::vector& list = registry[start]; - for (auto& surface : list) { - if (!surface->IsPicked() && surface->Overlaps(cpu_addr, cpu_addr_end)) { - surface->MarkAsPicked(true); - surfaces.push_back(surface); + for (VAddr start = cpu_addr >> registry_page_bits; start <= end; ++start) { + const auto it = registry.find(start); + if (it == registry.end()) { + continue; + } + for (auto& surface : it->second) { + if (surface->IsPicked() || !surface->Overlaps(cpu_addr, cpu_addr_end)) { + continue; } + surface->MarkAsPicked(true); + surfaces.push_back(surface); } - start++; } for (auto& surface : surfaces) { surface->MarkAsPicked(false); -- cgit v1.2.3 From fc153f6bcd9884064eb2752fc7be5a6458bbd71b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 28 May 2020 17:08:44 -0300 Subject: format_lookup_table: Implement G24S8 format as S8Z24 --- src/video_core/texture_cache/format_lookup_table.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp index 7032e0059..f476f03b0 100644 --- a/src/video_core/texture_cache/format_lookup_table.cpp +++ b/src/video_core/texture_cache/format_lookup_table.cpp @@ -41,7 +41,7 @@ struct Table { ComponentType alpha_component; bool is_srgb; }; -constexpr std::array DefinitionTable = {{ +constexpr std::array DefinitionTable = {{ {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U}, {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S}, {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI}, @@ -98,6 +98,7 @@ constexpr std::array DefinitionTable = {{ {TextureFormat::ZF32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::Z32F}, {TextureFormat::Z16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z16}, {TextureFormat::S8Z24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24}, + {TextureFormat::G24R8, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24}, {TextureFormat::ZF32_X24S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::Z32FS8}, {TextureFormat::DXT1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1}, -- cgit v1.2.3 From 5b37cecd76205612bfc2cc1d0b475d893fe7ee6a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 29 May 2020 01:48:01 -0300 Subject: texture_cache: Handle overlaps with multiple subresources Implement more surface reconstruct cases. Allow overlaps with more than one layer and mipmap and copies all of them to the new texture. - Fixes textures moving around objects on Xenoblade games --- src/video_core/texture_cache/texture_cache.h | 60 +++++++++++++++------------- 1 file changed, 33 insertions(+), 27 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 8bfc541d4..658264860 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -652,45 +652,54 @@ private: **/ std::optional> TryReconstructSurface(std::vector& overlaps, const SurfaceParams& params, - const GPUVAddr gpu_addr) { + GPUVAddr gpu_addr) { if (params.target == SurfaceTarget::Texture3D) { - return {}; + return std::nullopt; } - bool modified = false; TSurface new_surface = GetUncachedSurface(gpu_addr, params); - u32 passed_tests = 0; + std::size_t passed_tests = 0; + bool modified = false; + for (auto& surface : overlaps) { const SurfaceParams& src_params = surface->GetSurfaceParams(); - if (src_params.is_layered || src_params.num_levels > 1) { - // We send this cases to recycle as they are more complex to handle - return {}; - } - const std::size_t candidate_size = surface->GetSizeInBytes(); - auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; + const auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; if (!mipmap_layer) { continue; } - const auto [layer, mipmap] = *mipmap_layer; - if (new_surface->GetMipmapSize(mipmap) != candidate_size) { + const auto [base_layer, base_mipmap] = *mipmap_layer; + if (new_surface->GetMipmapSize(base_mipmap) != surface->GetMipmapSize(0)) { continue; } + + // Copy all mipmaps and layers + const u32 block_width = params.GetDefaultBlockWidth(); + const u32 block_height = params.GetDefaultBlockHeight(); + for (u32 mipmap = base_mipmap; mipmap < base_mipmap + src_params.num_levels; ++mipmap) { + const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap); + const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap); + if (width < block_width || height < block_height) { + // Current APIs forbid copying small compressed textures, avoid errors + break; + } + const CopyParams copy_params(0, 0, 0, 0, 0, base_layer, 0, mipmap, width, height, + src_params.depth); + ImageCopy(surface, new_surface, copy_params); + } + ++passed_tests; modified |= surface->IsModified(); - // Now we got all the data set up - const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap); - const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap); - const CopyParams copy_params(0, 0, 0, 0, 0, layer, 0, mipmap, width, height, 1); - passed_tests++; - ImageCopy(surface, new_surface, copy_params); } if (passed_tests == 0) { - return {}; + return std::nullopt; + } + if (Settings::IsGPULevelExtreme() && passed_tests != overlaps.size()) { // In Accurate GPU all tests should pass, else we recycle - } else if (Settings::IsGPULevelExtreme() && passed_tests != overlaps.size()) { - return {}; + return std::nullopt; } + for (const auto& surface : overlaps) { Unregister(surface); } + new_surface->MarkAsModified(modified, Tick()); Register(new_surface); return {{new_surface, new_surface->GetMainView()}}; @@ -868,12 +877,9 @@ private: // two things either the candidate surface is a supertexture of the overlap // or they don't match in any known way. if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { - if (current_surface->GetGpuAddr() == gpu_addr) { - std::optional> view = - TryReconstructSurface(overlaps, params, gpu_addr); - if (view) { - return *view; - } + const std::optional view = TryReconstructSurface(overlaps, params, gpu_addr); + if (view) { + return *view; } return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); -- cgit v1.2.3 From dd70e097ccb84b64983456759525d650d1ceab0a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 29 May 2020 20:10:58 -0300 Subject: texture_cache: Reload textures when number of resources mismatch --- src/video_core/texture_cache/texture_cache.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 658264860..62206b906 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -660,6 +660,15 @@ private: std::size_t passed_tests = 0; bool modified = false; + u32 num_resources = 0; + for (auto& surface : overlaps) { + const SurfaceParams& src_params = surface->GetSurfaceParams(); + num_resources += src_params.depth * src_params.num_levels; + } + if (num_resources != params.depth * params.num_levels) { + LoadSurface(new_surface); + } + for (auto& surface : overlaps) { const SurfaceParams& src_params = surface->GetSurfaceParams(); const auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; -- cgit v1.2.3 From e454f7e7a7d75fe0415ce48ffbd5d5979d79ce67 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 29 May 2020 20:12:46 -0300 Subject: texture_cache: Only copy textures that were modified from host --- src/video_core/texture_cache/texture_cache.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 62206b906..3e024a098 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -679,6 +679,12 @@ private: if (new_surface->GetMipmapSize(base_mipmap) != surface->GetMipmapSize(0)) { continue; } + ++passed_tests; + + if (!surface->IsModified()) { + continue; + } + modified = true; // Copy all mipmaps and layers const u32 block_width = params.GetDefaultBlockWidth(); @@ -694,8 +700,6 @@ private: src_params.depth); ImageCopy(surface, new_surface, copy_params); } - ++passed_tests; - modified |= surface->IsModified(); } if (passed_tests == 0) { return std::nullopt; -- cgit v1.2.3 From 1ee1a5d3d64379bf2463c072a32af8e64a8c14cf Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 29 May 2020 23:52:47 -0300 Subject: texture_cache: More relaxed reconstruction Only reupload textures when they've not been modified from the GPU. --- src/video_core/texture_cache/texture_cache.h | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 3e024a098..4ba0d2c3a 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -656,19 +656,19 @@ private: if (params.target == SurfaceTarget::Texture3D) { return std::nullopt; } + const auto test_modified = [](TSurface& surface) { return surface->IsModified(); }; TSurface new_surface = GetUncachedSurface(gpu_addr, params); - std::size_t passed_tests = 0; - bool modified = false; - u32 num_resources = 0; - for (auto& surface : overlaps) { - const SurfaceParams& src_params = surface->GetSurfaceParams(); - num_resources += src_params.depth * src_params.num_levels; - } - if (num_resources != params.depth * params.num_levels) { + if (std::none_of(overlaps.begin(), overlaps.end(), test_modified)) { LoadSurface(new_surface); + for (const auto& surface : overlaps) { + Unregister(surface); + } + Register(new_surface); + return {{new_surface, new_surface->GetMainView()}}; } + std::size_t passed_tests = 0; for (auto& surface : overlaps) { const SurfaceParams& src_params = surface->GetSurfaceParams(); const auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; @@ -681,11 +681,6 @@ private: } ++passed_tests; - if (!surface->IsModified()) { - continue; - } - modified = true; - // Copy all mipmaps and layers const u32 block_width = params.GetDefaultBlockWidth(); const u32 block_height = params.GetDefaultBlockHeight(); @@ -709,6 +704,7 @@ private: return std::nullopt; } + const bool modified = std::any_of(overlaps.begin(), overlaps.end(), test_modified); for (const auto& surface : overlaps) { Unregister(surface); } -- cgit v1.2.3 From c95c254f3eda75476ad221a4828033f4140a3470 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 29 May 2020 23:32:41 -0300 Subject: texture_cache: Implement rendering to 3D textures This allows rendering to 3D textures with more than one slice. Applications are allowed to render to more than one slice of a texture using gl_Layer from a VTG shader. This also requires reworking how 3D texture collisions are handled, for now, this commit allows rendering to slices but not to miplevels. When a render target attempts to write to a mipmap, we fallback to the previous implementation (copying or flushing as needed). - Fixes color correction 3D textures on UE4 games (rainbow effects). - Allows Xenoblade games to render to 3D textures directly. --- src/video_core/engines/maxwell_3d.h | 1 + .../renderer_opengl/gl_texture_cache.cpp | 49 +++++---- src/video_core/renderer_opengl/gl_texture_cache.h | 6 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 9 +- .../renderer_vulkan/vk_texture_cache.cpp | 76 ++++++++++--- src/video_core/renderer_vulkan/vk_texture_cache.h | 33 +++--- src/video_core/texture_cache/surface_base.cpp | 7 +- src/video_core/texture_cache/surface_base.h | 13 ++- src/video_core/texture_cache/surface_params.cpp | 17 ++- src/video_core/texture_cache/texture_cache.h | 119 +++++++++------------ 10 files changed, 191 insertions(+), 139 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index b827b112f..79fc9bbea 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -598,6 +598,7 @@ public: BitField<4, 3, u32> block_height; BitField<8, 3, u32> block_depth; BitField<12, 1, InvMemoryLayout> type; + BitField<16, 1, u32> is_3d; } memory_layout; union { BitField<0, 16, u32> layers; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 57db5a08b..46df15dfc 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -263,9 +263,14 @@ CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& param target = GetTextureTarget(params.target); texture = CreateTexture(params, target, internal_format, texture_buffer); DecorateSurfaceName(); - main_view = CreateViewInner( - ViewParams(params.target, 0, params.is_layered ? params.depth : 1, 0, params.num_levels), - true); + + u32 num_layers = 1; + if (params.is_layered || params.target == SurfaceTarget::Texture3D) { + num_layers = params.depth; + } + + main_view = + CreateViewInner(ViewParams(params.target, 0, num_layers, 0, params.num_levels), true); } CachedSurface::~CachedSurface() = default; @@ -413,20 +418,23 @@ CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& p CachedSurfaceView::~CachedSurfaceView() = default; -void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const { +void CachedSurfaceView::Attach(GLenum attachment, GLenum fb_target) const { ASSERT(params.num_levels == 1); + if (params.target == SurfaceTarget::Texture3D) { + if (params.num_layers > 1) { + ASSERT(params.base_layer == 0); + glFramebufferTexture(fb_target, attachment, surface.texture.handle, params.base_level); + } else { + glFramebufferTexture3D(fb_target, attachment, target, surface.texture.handle, + params.base_level, params.base_layer); + } + return; + } + if (params.num_layers > 1) { - // Layered framebuffer attachments UNIMPLEMENTED_IF(params.base_layer != 0); - - switch (params.target) { - case SurfaceTarget::Texture2DArray: - glFramebufferTexture(target, attachment, GetTexture(), 0); - break; - default: - UNIMPLEMENTED(); - } + glFramebufferTexture(fb_target, attachment, GetTexture(), 0); return; } @@ -434,16 +442,16 @@ void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const { const GLuint texture = surface.GetTexture(); switch (surface.GetSurfaceParams().target) { case SurfaceTarget::Texture1D: - glFramebufferTexture1D(target, attachment, view_target, texture, params.base_level); + glFramebufferTexture1D(fb_target, attachment, view_target, texture, params.base_level); break; case SurfaceTarget::Texture2D: - glFramebufferTexture2D(target, attachment, view_target, texture, params.base_level); + glFramebufferTexture2D(fb_target, attachment, view_target, texture, params.base_level); break; case SurfaceTarget::Texture1DArray: case SurfaceTarget::Texture2DArray: case SurfaceTarget::TextureCubemap: case SurfaceTarget::TextureCubeArray: - glFramebufferTextureLayer(target, attachment, texture, params.base_level, + glFramebufferTextureLayer(fb_target, attachment, texture, params.base_level, params.base_layer); break; default: @@ -500,8 +508,13 @@ OGLTextureView CachedSurfaceView::CreateTextureView() const { OGLTextureView texture_view; texture_view.Create(); - glTextureView(texture_view.handle, target, surface.texture.handle, format, params.base_level, - params.num_levels, params.base_layer, params.num_layers); + if (target == GL_TEXTURE_3D) { + glTextureView(texture_view.handle, target, surface.texture.handle, format, + params.base_level, params.num_levels, 0, 1); + } else { + glTextureView(texture_view.handle, target, surface.texture.handle, format, + params.base_level, params.num_levels, params.base_layer, params.num_layers); + } ApplyTextureDefaults(surface.GetSurfaceParams(), texture_view.handle); return texture_view; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 8a2ac8603..bfc4ddf5d 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -80,8 +80,10 @@ public: explicit CachedSurfaceView(CachedSurface& surface, const ViewParams& params, bool is_proxy); ~CachedSurfaceView(); - /// Attaches this texture view to the current bound GL_DRAW_FRAMEBUFFER - void Attach(GLenum attachment, GLenum target) const; + /// @brief Attaches this texture view to the currently bound fb_target framebuffer + /// @param attachment Attachment to bind textures to + /// @param fb_target Framebuffer target to attach to (e.g. DRAW_FRAMEBUFFER) + void Attach(GLenum attachment, GLenum fb_target) const; GLuint GetTexture(Tegra::Texture::SwizzleSource x_source, Tegra::Texture::SwizzleSource y_source, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index d86c46412..19b8f9da3 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -716,7 +716,7 @@ std::tuple RasterizerVulkan::ConfigureFramebuffers( if (!view) { return false; } - key.views.push_back(view->GetHandle()); + key.views.push_back(view->GetAttachment()); key.width = std::min(key.width, view->GetWidth()); key.height = std::min(key.height, view->GetHeight()); key.layers = std::min(key.layers, view->GetNumLayers()); @@ -1137,8 +1137,8 @@ void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& textu auto view = texture_cache.GetTextureSurface(texture.tic, entry); ASSERT(!view->IsBufferView()); - const auto image_view = view->GetHandle(texture.tic.x_source, texture.tic.y_source, - texture.tic.z_source, texture.tic.w_source); + const VkImageView image_view = view->GetImageView(texture.tic.x_source, texture.tic.y_source, + texture.tic.z_source, texture.tic.w_source); const auto sampler = sampler_cache.GetSampler(texture.tsc); update_descriptor_queue.AddSampledImage(sampler, image_view); @@ -1164,7 +1164,8 @@ void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const Ima UNIMPLEMENTED_IF(tic.IsBuffer()); - const auto image_view = view->GetHandle(tic.x_source, tic.y_source, tic.z_source, tic.w_source); + const VkImageView image_view = + view->GetImageView(tic.x_source, tic.y_source, tic.z_source, tic.w_source); update_descriptor_queue.AddImage(image_view); const auto image_layout = update_descriptor_queue.GetLastImageLayout(); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index ea487b770..430031665 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -167,6 +167,7 @@ VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceP ci.extent = {params.width, params.height, 1}; break; case SurfaceTarget::Texture3D: + ci.flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT; ci.extent = {params.width, params.height, params.depth}; break; case SurfaceTarget::TextureBuffer: @@ -176,6 +177,12 @@ VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceP return ci; } +u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source, Tegra::Texture::SwizzleSource y_source, + Tegra::Texture::SwizzleSource z_source, Tegra::Texture::SwizzleSource w_source) { + return (static_cast(x_source) << 24) | (static_cast(y_source) << 16) | + (static_cast(z_source) << 8) | static_cast(w_source); +} + } // Anonymous namespace CachedSurface::CachedSurface(Core::System& system, const VKDevice& device, @@ -203,9 +210,11 @@ CachedSurface::CachedSurface(Core::System& system, const VKDevice& device, } // TODO(Rodrigo): Move this to a virtual function. - main_view = CreateViewInner( - ViewParams(params.target, 0, static_cast(params.GetNumLayers()), 0, params.num_levels), - true); + u32 num_layers = 1; + if (params.is_layered || params.target == SurfaceTarget::Texture3D) { + num_layers = params.depth; + } + main_view = CreateView(ViewParams(params.target, 0, num_layers, 0, params.num_levels)); } CachedSurface::~CachedSurface() = default; @@ -253,12 +262,8 @@ void CachedSurface::DecorateSurfaceName() { } View CachedSurface::CreateView(const ViewParams& params) { - return CreateViewInner(params, false); -} - -View CachedSurface::CreateViewInner(const ViewParams& params, bool is_proxy) { // TODO(Rodrigo): Add name decorations - return views[params] = std::make_shared(device, *this, params, is_proxy); + return views[params] = std::make_shared(device, *this, params); } void CachedSurface::UploadBuffer(const std::vector& staging_buffer) { @@ -342,18 +347,27 @@ VkImageSubresourceRange CachedSurface::GetImageSubresourceRange() const { } CachedSurfaceView::CachedSurfaceView(const VKDevice& device, CachedSurface& surface, - const ViewParams& params, bool is_proxy) + const ViewParams& params) : VideoCommon::ViewBase{params}, params{surface.GetSurfaceParams()}, image{surface.GetImageHandle()}, buffer_view{surface.GetBufferViewHandle()}, aspect_mask{surface.GetAspectMask()}, device{device}, surface{surface}, - base_layer{params.base_layer}, num_layers{params.num_layers}, base_level{params.base_level}, - num_levels{params.num_levels}, image_view_type{image ? GetImageViewType(params.target) - : VK_IMAGE_VIEW_TYPE_1D} {} + base_level{params.base_level}, num_levels{params.num_levels}, + image_view_type{image ? GetImageViewType(params.target) : VK_IMAGE_VIEW_TYPE_1D} { + if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) { + base_layer = 0; + num_layers = 1; + base_slice = params.base_layer; + num_slices = params.num_layers; + } else { + base_layer = params.base_layer; + num_layers = params.num_layers; + } +} CachedSurfaceView::~CachedSurfaceView() = default; -VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y_source, - SwizzleSource z_source, SwizzleSource w_source) { +VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSource y_source, + SwizzleSource z_source, SwizzleSource w_source) { const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); if (last_image_view && last_swizzle == new_swizzle) { return last_image_view; @@ -399,6 +413,11 @@ VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y }); } + if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) { + ASSERT(base_slice == 0); + ASSERT(num_slices == params.depth); + } + VkImageViewCreateInfo ci; ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; ci.pNext = nullptr; @@ -417,6 +436,35 @@ VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y return last_image_view = *image_view; } +VkImageView CachedSurfaceView::GetAttachment() { + if (render_target) { + return *render_target; + } + + VkImageViewCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.image = surface.GetImageHandle(); + ci.format = surface.GetImage().GetFormat(); + ci.components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, + VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY}; + ci.subresourceRange.aspectMask = aspect_mask; + ci.subresourceRange.baseMipLevel = base_level; + ci.subresourceRange.levelCount = num_levels; + if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) { + ci.viewType = num_slices > 1 ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D; + ci.subresourceRange.baseArrayLayer = base_slice; + ci.subresourceRange.layerCount = num_slices; + } else { + ci.viewType = image_view_type; + ci.subresourceRange.baseArrayLayer = base_layer; + ci.subresourceRange.layerCount = num_layers; + } + render_target = device.GetLogical().CreateImageView(ci); + return *render_target; +} + VKTextureCache::VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, const VKDevice& device, VKResourceManager& resource_manager, VKMemoryManager& memory_manager, VKScheduler& scheduler, diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index f211ccb1e..807e26c8a 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -91,7 +91,6 @@ protected: void DecorateSurfaceName(); View CreateView(const ViewParams& params) override; - View CreateViewInner(const ViewParams& params, bool is_proxy); private: void UploadBuffer(const std::vector& staging_buffer); @@ -120,23 +119,20 @@ private: class CachedSurfaceView final : public VideoCommon::ViewBase { public: explicit CachedSurfaceView(const VKDevice& device, CachedSurface& surface, - const ViewParams& params, bool is_proxy); + const ViewParams& params); ~CachedSurfaceView(); - VkImageView GetHandle(Tegra::Texture::SwizzleSource x_source, - Tegra::Texture::SwizzleSource y_source, - Tegra::Texture::SwizzleSource z_source, - Tegra::Texture::SwizzleSource w_source); + VkImageView GetImageView(Tegra::Texture::SwizzleSource x_source, + Tegra::Texture::SwizzleSource y_source, + Tegra::Texture::SwizzleSource z_source, + Tegra::Texture::SwizzleSource w_source); + + VkImageView GetAttachment(); bool IsSameSurface(const CachedSurfaceView& rhs) const { return &surface == &rhs.surface; } - VkImageView GetHandle() { - return GetHandle(Tegra::Texture::SwizzleSource::R, Tegra::Texture::SwizzleSource::G, - Tegra::Texture::SwizzleSource::B, Tegra::Texture::SwizzleSource::A); - } - u32 GetWidth() const { return params.GetMipWidth(base_level); } @@ -180,14 +176,6 @@ public: } private: - static u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source, - Tegra::Texture::SwizzleSource y_source, - Tegra::Texture::SwizzleSource z_source, - Tegra::Texture::SwizzleSource w_source) { - return (static_cast(x_source) << 24) | (static_cast(y_source) << 16) | - (static_cast(z_source) << 8) | static_cast(w_source); - } - // Store a copy of these values to avoid double dereference when reading them const SurfaceParams params; const VkImage image; @@ -196,15 +184,18 @@ private: const VKDevice& device; CachedSurface& surface; - const u32 base_layer; - const u32 num_layers; const u32 base_level; const u32 num_levels; const VkImageViewType image_view_type; + u32 base_layer = 0; + u32 num_layers = 0; + u32 base_slice = 0; + u32 num_slices = 0; VkImageView last_image_view = nullptr; u32 last_swizzle = 0; + vk::ImageView render_target; std::unordered_map view_cache; }; diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 715f39d0d..94d3a6ae5 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -248,12 +248,11 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, // Use an extra temporal buffer auto& tmp_buffer = staging_cache.GetBuffer(1); - // Special case for 3D Texture Segments - const bool must_read_current_data = - params.block_depth > 0 && params.target == VideoCore::Surface::SurfaceTarget::Texture2D; tmp_buffer.resize(guest_memory_size); host_ptr = tmp_buffer.data(); - if (must_read_current_data) { + + if (params.target == SurfaceTarget::Texture3D) { + // Special case for 3D texture segments memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); } diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 79e10ffbb..173f2edba 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -217,8 +217,8 @@ public: } bool IsProtected() const { - // Only 3D Slices are to be protected - return is_target && params.block_depth > 0; + // Only 3D slices are to be protected + return is_target && params.target == SurfaceTarget::Texture3D; } bool IsRenderTarget() const { @@ -250,6 +250,11 @@ public: return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels)); } + TView Emplace3DView(u32 slice, u32 depth, u32 base_level, u32 num_levels) { + return GetView(ViewParams(VideoCore::Surface::SurfaceTarget::Texture3D, slice, depth, + base_level, num_levels)); + } + std::optional EmplaceIrregularView(const SurfaceParams& view_params, const GPUVAddr view_addr, const std::size_t candidate_size, const u32 mipmap, @@ -272,8 +277,8 @@ public: std::optional EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr, const std::size_t candidate_size) { if (params.target == SurfaceTarget::Texture3D || - (params.num_levels == 1 && !params.is_layered) || - view_params.target == SurfaceTarget::Texture3D) { + view_params.target == SurfaceTarget::Texture3D || + (params.num_levels == 1 && !params.is_layered)) { return {}; } const auto layer_mipmap{GetLayerMipmap(view_addr)}; diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 884fabffe..642eeb850 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -215,10 +215,19 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz params.num_levels = 1; params.emulated_levels = 1; - const bool is_layered = config.layers > 1 && params.block_depth == 0; - params.is_layered = is_layered; - params.depth = is_layered ? config.layers.Value() : 1; - params.target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D; + if (config.memory_layout.is_3d != 0) { + params.depth = config.layers.Value(); + params.is_layered = false; + params.target = SurfaceTarget::Texture3D; + } else if (config.layers > 1) { + params.depth = config.layers.Value(); + params.is_layered = true; + params.target = SurfaceTarget::Texture2DArray; + } else { + params.depth = 1; + params.is_layered = false; + params.target = SurfaceTarget::Texture2D; + } return params; } diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 6f63217a2..4ee0d76b9 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -508,12 +508,12 @@ private: return RecycleStrategy::Flush; } // 3D Textures decision - if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) { + if (params.target == SurfaceTarget::Texture3D) { return RecycleStrategy::Flush; } for (const auto& s : overlaps) { const auto& s_params = s->GetSurfaceParams(); - if (s_params.block_depth > 1 || s_params.target == SurfaceTarget::Texture3D) { + if (s_params.target == SurfaceTarget::Texture3D) { return RecycleStrategy::Flush; } } @@ -726,76 +726,60 @@ private: * @param params The parameters on the new surface. * @param gpu_addr The starting address of the new surface. * @param cpu_addr The starting address of the new surface on physical memory. - * @param preserve_contents Indicates that the new surface should be loaded from memory or - * left blank. */ std::optional> Manage3DSurfaces(VectorSurface& overlaps, const SurfaceParams& params, - const GPUVAddr gpu_addr, - const VAddr cpu_addr, - bool preserve_contents) { - if (params.target == SurfaceTarget::Texture3D) { - bool failed = false; - if (params.num_levels > 1) { - // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach - return std::nullopt; - } - TSurface new_surface = GetUncachedSurface(gpu_addr, params); - bool modified = false; - for (auto& surface : overlaps) { - const SurfaceParams& src_params = surface->GetSurfaceParams(); - if (src_params.target != SurfaceTarget::Texture2D) { - failed = true; - break; - } - if (src_params.height != params.height) { - failed = true; - break; - } - if (src_params.block_depth != params.block_depth || - src_params.block_height != params.block_height) { - failed = true; - break; + GPUVAddr gpu_addr, VAddr cpu_addr) { + if (params.num_levels > 1) { + // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach + return std::nullopt; + } + + if (overlaps.size() == 1) { + const auto& surface = overlaps[0]; + const SurfaceParams& overlap_params = surface->GetSurfaceParams(); + // Don't attempt to render to textures with more than one level for now + // The texture has to be to the right or the sample address if we want to render to it + if (overlap_params.num_levels == 1 && cpu_addr >= surface->GetCpuAddr()) { + const u32 offset = static_cast(cpu_addr - surface->GetCpuAddr()); + const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset)); + if (slice < overlap_params.depth) { + auto view = surface->Emplace3DView(slice, params.depth, 0, 1); + return std::make_pair(std::move(surface), std::move(view)); } - const u32 offset = static_cast(surface->GetCpuAddr() - cpu_addr); - const auto offsets = params.GetBlockOffsetXYZ(offset); - const auto z = std::get<2>(offsets); - modified |= surface->IsModified(); - const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height, - 1); - ImageCopy(surface, new_surface, copy_params); } - if (failed) { + } + + if (params.depth == 1) { + return std::nullopt; + } + + TSurface new_surface = GetUncachedSurface(gpu_addr, params); + bool modified = false; + for (auto& surface : overlaps) { + const SurfaceParams& src_params = surface->GetSurfaceParams(); + if (src_params.height != params.height || + src_params.block_depth != params.block_depth || + src_params.block_height != params.block_height) { return std::nullopt; } - for (const auto& surface : overlaps) { - Unregister(surface); - } - new_surface->MarkAsModified(modified, Tick()); - Register(new_surface); - auto view = new_surface->GetMainView(); - return {{std::move(new_surface), view}}; - } else { - for (const auto& surface : overlaps) { - if (!surface->MatchTarget(params.target)) { - if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) { - if (Settings::IsGPULevelExtreme()) { - return std::nullopt; - } - Unregister(surface); - return InitializeSurface(gpu_addr, params, preserve_contents); - } - return std::nullopt; - } - if (surface->GetCpuAddr() != cpu_addr) { - continue; - } - if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) { - return {{surface, surface->GetMainView()}}; - } - } - return InitializeSurface(gpu_addr, params, preserve_contents); + modified |= surface->IsModified(); + + const u32 offset = static_cast(surface->GetCpuAddr() - cpu_addr); + const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset)); + const u32 width = params.width; + const u32 height = params.height; + const CopyParams copy_params(0, 0, 0, 0, 0, slice, 0, 0, width, height, 1); + ImageCopy(surface, new_surface, copy_params); } + for (const auto& surface : overlaps) { + Unregister(surface); + } + new_surface->MarkAsModified(modified, Tick()); + Register(new_surface); + + auto view = new_surface->GetMainView(); + return std::make_pair(std::move(new_surface), std::move(view)); } /** @@ -873,10 +857,9 @@ private: } } - // Check if it's a 3D texture - if (params.block_depth > 0) { - auto surface = - Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents); + // Manage 3D textures + if (params.target == SurfaceTarget::Texture3D) { + auto surface = Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr); if (surface) { return *surface; } -- cgit v1.2.3 From 3c2ae53b4c574deb4f9afe3104c7d022c53c5281 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 1 Jun 2020 04:49:35 -0300 Subject: texture_cache: Handle 3D texture blits with one layer --- src/video_core/renderer_opengl/gl_texture_cache.cpp | 4 ++-- src/video_core/texture_cache/surface_params.cpp | 4 ++-- src/video_core/texture_cache/texture_cache.h | 7 ++++++- 3 files changed, 10 insertions(+), 5 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 46df15dfc..61505879b 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -557,8 +557,8 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view, const Tegra::Engines::Fermi2D::Config& copy_config) { const auto& src_params{src_view->GetSurfaceParams()}; const auto& dst_params{dst_view->GetSurfaceParams()}; - UNIMPLEMENTED_IF(src_params.target == SurfaceTarget::Texture3D); - UNIMPLEMENTED_IF(dst_params.target == SurfaceTarget::Texture3D); + UNIMPLEMENTED_IF(src_params.depth != 1); + UNIMPLEMENTED_IF(dst_params.depth != 1); state_tracker.NotifyScissor0(); state_tracker.NotifyFramebuffer(); diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 642eeb850..6fe7c85ac 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -246,8 +246,8 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface( params.width = config.width; params.height = config.height; params.pitch = config.pitch; - // TODO(Rodrigo): Try to guess the surface target from depth and layer parameters - params.target = SurfaceTarget::Texture2D; + // TODO(Rodrigo): Try to guess texture arrays from parameters + params.target = params.block_depth > 0 ? SurfaceTarget::Texture3D : SurfaceTarget::Texture2D; params.depth = 1; params.num_levels = 1; params.emulated_levels = 1; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 4ee0d76b9..60b95a854 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -755,6 +755,8 @@ private: } TSurface new_surface = GetUncachedSurface(gpu_addr, params); + LoadSurface(new_surface); + bool modified = false; for (auto& surface : overlaps) { const SurfaceParams& src_params = surface->GetSurfaceParams(); @@ -763,7 +765,10 @@ private: src_params.block_height != params.block_height) { return std::nullopt; } - modified |= surface->IsModified(); + if (!surface->IsModified()) { + continue; + } + modified = true; const u32 offset = static_cast(surface->GetCpuAddr() - cpu_addr); const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset)); -- cgit v1.2.3 From c99f5d405b6a24603ff8174aeb2952facc4a92d9 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 8 Jun 2020 05:01:44 -0300 Subject: texture_cache: Simplify blit code --- src/video_core/texture_cache/texture_cache.h | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 60b95a854..b19eeed66 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -298,15 +298,13 @@ public: const GPUVAddr src_gpu_addr = src_config.Address(); const GPUVAddr dst_gpu_addr = dst_config.Address(); DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr); - const std::optional dst_cpu_addr = - system.GPU().MemoryManager().GpuToCpuAddress(dst_gpu_addr); - const std::optional src_cpu_addr = - system.GPU().MemoryManager().GpuToCpuAddress(src_gpu_addr); - std::pair dst_surface = - GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false); - std::pair src_surface = - GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false); - ImageBlit(src_surface.second, dst_surface.second, copy_config); + + const auto& memory_manager = system.GPU().MemoryManager(); + const std::optional dst_cpu_addr = memory_manager.GpuToCpuAddress(dst_gpu_addr); + const std::optional src_cpu_addr = memory_manager.GpuToCpuAddress(src_gpu_addr); + std::pair dst_surface = GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false); + TView src_surface = GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false).second; + ImageBlit(src_surface, dst_surface.second, copy_config); dst_surface.first->MarkAsModified(true, Tick()); } -- cgit v1.2.3 From bd43c0547085fcfb585ac3a90521eeb8414fd538 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 8 Jun 2020 05:02:22 -0300 Subject: texture_cache: Port original code management for 2D vs 3D textures Handle blits to images as 2D, even when they have block depth. - Fixes rendering issues on Luigi's Mansion 3 --- src/video_core/texture_cache/surface_params.cpp | 2 +- src/video_core/texture_cache/texture_cache.h | 49 +++++++++++++++++-------- 2 files changed, 35 insertions(+), 16 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 6fe7c85ac..0b2b2b8c4 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -247,7 +247,7 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface( params.height = config.height; params.pitch = config.pitch; // TODO(Rodrigo): Try to guess texture arrays from parameters - params.target = params.block_depth > 0 ? SurfaceTarget::Texture3D : SurfaceTarget::Texture2D; + params.target = SurfaceTarget::Texture2D; params.depth = 1; params.num_levels = 1; params.emulated_levels = 1; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index b19eeed66..b543fc8c0 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -724,10 +724,35 @@ private: * @param params The parameters on the new surface. * @param gpu_addr The starting address of the new surface. * @param cpu_addr The starting address of the new surface on physical memory. + * @param preserve_contents Indicates that the new surface should be loaded from memory or + * left blank. */ std::optional> Manage3DSurfaces(VectorSurface& overlaps, const SurfaceParams& params, - GPUVAddr gpu_addr, VAddr cpu_addr) { + GPUVAddr gpu_addr, VAddr cpu_addr, + bool preserve_contents) { + if (params.target != SurfaceTarget::Texture3D) { + for (const auto& surface : overlaps) { + if (!surface->MatchTarget(params.target)) { + if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) { + if (Settings::IsGPULevelExtreme()) { + return std::nullopt; + } + Unregister(surface); + return InitializeSurface(gpu_addr, params, preserve_contents); + } + return std::nullopt; + } + if (surface->GetCpuAddr() != cpu_addr) { + continue; + } + if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) { + return std::make_pair(surface, surface->GetMainView()); + } + } + return InitializeSurface(gpu_addr, params, preserve_contents); + } + if (params.num_levels > 1) { // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach return std::nullopt; @@ -748,25 +773,18 @@ private: } } - if (params.depth == 1) { - return std::nullopt; - } - TSurface new_surface = GetUncachedSurface(gpu_addr, params); - LoadSurface(new_surface); - bool modified = false; + for (auto& surface : overlaps) { const SurfaceParams& src_params = surface->GetSurfaceParams(); - if (src_params.height != params.height || + if (src_params.target != SurfaceTarget::Texture2D || + src_params.height != params.height || src_params.block_depth != params.block_depth || src_params.block_height != params.block_height) { return std::nullopt; } - if (!surface->IsModified()) { - continue; - } - modified = true; + modified |= surface->IsModified(); const u32 offset = static_cast(surface->GetCpuAddr() - cpu_addr); const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset)); @@ -781,7 +799,7 @@ private: new_surface->MarkAsModified(modified, Tick()); Register(new_surface); - auto view = new_surface->GetMainView(); + TView view = new_surface->GetMainView(); return std::make_pair(std::move(new_surface), std::move(view)); } @@ -861,8 +879,9 @@ private: } // Manage 3D textures - if (params.target == SurfaceTarget::Texture3D) { - auto surface = Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr); + if (params.block_depth > 0) { + auto surface = + Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents); if (surface) { return *surface; } -- cgit v1.2.3 From ef53b2fd08f1122f22456500bfdc707f1c18906c Mon Sep 17 00:00:00 2001 From: Lioncash Date: Fri, 19 Jun 2020 23:13:48 -0400 Subject: texture_cache: Fix incorrect address used in a DeduceSurface() call Previously the source was being deduced twice in a row. --- src/video_core/texture_cache/texture_cache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index b543fc8c0..85075e868 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1053,7 +1053,7 @@ private: void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params, const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) { auto deduced_src = DeduceSurface(src_gpu_addr, src_params); - auto deduced_dst = DeduceSurface(src_gpu_addr, src_params); + auto deduced_dst = DeduceSurface(dst_gpu_addr, dst_params); if (deduced_src.Failed() || deduced_dst.Failed()) { return; } -- cgit v1.2.3