From 1b4503c571d3b961efe74fa7e35d5fa14941ec09 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 24 Apr 2019 16:35:54 -0300 Subject: texture_cache: Split texture cache into different files --- src/video_core/texture_cache/surface_base.cpp | 118 ++++++ src/video_core/texture_cache/surface_base.h | 172 +++++++++ src/video_core/texture_cache/surface_params.cpp | 412 +++++++++++++++++++++ src/video_core/texture_cache/surface_params.h | 229 ++++++++++++ src/video_core/texture_cache/surface_view.cpp | 23 ++ src/video_core/texture_cache/surface_view.h | 35 ++ src/video_core/texture_cache/texture_cache.h | 282 ++++++++++++++ .../texture_cache/texture_cache_contextless.h | 93 +++++ 8 files changed, 1364 insertions(+) create mode 100644 src/video_core/texture_cache/surface_base.cpp create mode 100644 src/video_core/texture_cache/surface_base.h create mode 100644 src/video_core/texture_cache/surface_params.cpp create mode 100644 src/video_core/texture_cache/surface_params.h create mode 100644 src/video_core/texture_cache/surface_view.cpp create mode 100644 src/video_core/texture_cache/surface_view.h create mode 100644 src/video_core/texture_cache/texture_cache.h create mode 100644 src/video_core/texture_cache/texture_cache_contextless.h (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp new file mode 100644 index 000000000..8680485b4 --- /dev/null +++ b/src/video_core/texture_cache/surface_base.cpp @@ -0,0 +1,118 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/morton.h" +#include "video_core/texture_cache/surface_base.h" +#include "video_core/texture_cache/surface_params.h" +#include "video_core/textures/convert.h" + +namespace VideoCommon { + +using Tegra::Texture::ConvertFromGuestToHost; +using VideoCore::MortonSwizzleMode; + +namespace { +void SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, u8* buffer, + u32 level) { + const u32 width{params.GetMipWidth(level)}; + const u32 height{params.GetMipHeight(level)}; + const u32 block_height{params.GetMipBlockHeight(level)}; + const u32 block_depth{params.GetMipBlockDepth(level)}; + + std::size_t guest_offset{params.GetGuestMipmapLevelOffset(level)}; + if (params.IsLayered()) { + std::size_t host_offset{0}; + const std::size_t guest_stride = params.GetGuestLayerSize(); + const std::size_t host_stride = params.GetHostLayerSize(level); + for (u32 layer = 0; layer < params.GetNumLayers(); layer++) { + MortonSwizzle(mode, params.GetPixelFormat(), width, block_height, height, block_depth, + 1, params.GetTileWidthSpacing(), buffer + host_offset, + memory + guest_offset); + guest_offset += guest_stride; + host_offset += host_stride; + } + } else { + MortonSwizzle(mode, params.GetPixelFormat(), width, block_height, height, block_depth, + params.GetMipDepth(level), params.GetTileWidthSpacing(), buffer, + memory + guest_offset); + } +} +} // Anonymous namespace + +SurfaceBaseImpl::SurfaceBaseImpl(const SurfaceParams& params) : params{params} { + staging_buffer.resize(params.GetHostSizeInBytes()); +} + +SurfaceBaseImpl::~SurfaceBaseImpl() = default; + +void SurfaceBaseImpl::LoadBuffer() { + if (params.IsTiled()) { + ASSERT_MSG(params.GetBlockWidth() == 1, "Block width is defined as {} on texture target {}", + params.GetBlockWidth(), static_cast(params.GetTarget())); + for (u32 level = 0; level < params.GetNumLevels(); ++level) { + u8* const buffer{GetStagingBufferLevelData(level)}; + SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params, buffer, level); + } + } else { + ASSERT_MSG(params.GetNumLevels() == 1, "Linear mipmap loading is not implemented"); + const u32 bpp{GetFormatBpp(params.GetPixelFormat()) / CHAR_BIT}; + const u32 block_width{params.GetDefaultBlockWidth()}; + const u32 block_height{params.GetDefaultBlockHeight()}; + const u32 width{(params.GetWidth() + block_width - 1) / block_width}; + const u32 height{(params.GetHeight() + block_height - 1) / block_height}; + const u32 copy_size{width * bpp}; + if (params.GetPitch() == copy_size) { + std::memcpy(staging_buffer.data(), host_ptr, params.GetHostSizeInBytes()); + } else { + const u8* start{host_ptr}; + u8* write_to{staging_buffer.data()}; + for (u32 h = height; h > 0; --h) { + std::memcpy(write_to, start, copy_size); + start += params.GetPitch(); + write_to += copy_size; + } + } + } + + for (u32 level = 0; level < params.GetNumLevels(); ++level) { + ConvertFromGuestToHost(GetStagingBufferLevelData(level), params.GetPixelFormat(), + params.GetMipWidth(level), params.GetMipHeight(level), + params.GetMipDepth(level), true, true); + } +} + +void SurfaceBaseImpl::FlushBuffer() { + if (params.IsTiled()) { + ASSERT_MSG(params.GetBlockWidth() == 1, "Block width is defined as {}", + params.GetBlockWidth()); + for (u32 level = 0; level < params.GetNumLevels(); ++level) { + u8* const buffer = GetStagingBufferLevelData(level); + SwizzleFunc(MortonSwizzleMode::LinearToMorton, GetHostPtr(), params, buffer, level); + } + } else { + UNIMPLEMENTED(); + /* + ASSERT(params.GetTarget() == SurfaceTarget::Texture2D); + ASSERT(params.GetNumLevels() == 1); + + const u32 bpp{params.GetFormatBpp() / 8}; + const u32 copy_size{params.GetWidth() * bpp}; + if (params.GetPitch() == copy_size) { + std::memcpy(host_ptr, staging_buffer.data(), GetSizeInBytes()); + } else { + u8* start{host_ptr}; + const u8* read_to{staging_buffer.data()}; + for (u32 h = params.GetHeight(); h > 0; --h) { + std::memcpy(start, read_to, copy_size); + start += params.GetPitch(); + read_to += copy_size; + } + } + */ + } +} + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h new file mode 100644 index 000000000..d0142a9e6 --- /dev/null +++ b/src/video_core/texture_cache/surface_base.h @@ -0,0 +1,172 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/gpu.h" +#include "video_core/texture_cache/surface_params.h" +#include "video_core/texture_cache/surface_view.h" + +namespace VideoCommon { + +class SurfaceBaseImpl { +public: + void LoadBuffer(); + + void FlushBuffer(); + + GPUVAddr GetGpuAddr() const { + ASSERT(is_registered); + return gpu_addr; + } + + VAddr GetCpuAddr() const { + ASSERT(is_registered); + return cpu_addr; + } + + u8* GetHostPtr() const { + ASSERT(is_registered); + return host_ptr; + } + + CacheAddr GetCacheAddr() const { + ASSERT(is_registered); + return cache_addr; + } + + const SurfaceParams& GetSurfaceParams() const { + return params; + } + + void Register(GPUVAddr gpu_addr_, VAddr cpu_addr_, u8* host_ptr_) { + ASSERT(!is_registered); + is_registered = true; + gpu_addr = gpu_addr_; + cpu_addr = cpu_addr_; + host_ptr = host_ptr_; + cache_addr = ToCacheAddr(host_ptr_); + DecorateSurfaceName(); + } + + void Unregister() { + ASSERT(is_registered); + is_registered = false; + } + + bool IsRegistered() const { + return is_registered; + } + + std::size_t GetSizeInBytes() const { + return params.GetGuestSizeInBytes(); + } + + u8* GetStagingBufferLevelData(u32 level) { + return staging_buffer.data() + params.GetHostMipmapLevelOffset(level); + } + +protected: + explicit SurfaceBaseImpl(const SurfaceParams& params); + ~SurfaceBaseImpl(); // non-virtual is intended + + virtual void DecorateSurfaceName() = 0; + + const SurfaceParams params; + +private: + GPUVAddr gpu_addr{}; + VAddr cpu_addr{}; + u8* host_ptr{}; + CacheAddr cache_addr{}; + bool is_registered{}; + + std::vector staging_buffer; +}; + +template +class SurfaceBase : public SurfaceBaseImpl { + static_assert(std::is_trivially_copyable_v); + +public: + virtual TExecutionContext UploadTexture(TExecutionContext exctx) = 0; + + virtual TExecutionContext DownloadTexture(TExecutionContext exctx) = 0; + + TView* TryGetView(GPUVAddr view_addr, const SurfaceParams& view_params) { + if (view_addr < GetGpuAddr() || !params.IsFamiliar(view_params)) { + // It can't be a view if it's in a prior address. + return {}; + } + + const auto relative_offset{static_cast(view_addr - GetGpuAddr())}; + const auto it{view_offset_map.find(relative_offset)}; + if (it == view_offset_map.end()) { + // Couldn't find an aligned view. + return {}; + } + const auto [layer, level] = it->second; + + if (!params.IsViewValid(view_params, layer, level)) { + return {}; + } + + return GetView(layer, view_params.GetNumLayers(), level, view_params.GetNumLevels()); + } + + void MarkAsModified(bool is_modified_) { + is_modified = is_modified_; + if (is_modified_) { + modification_tick = texture_cache.Tick(); + } + } + + TView* GetView(GPUVAddr view_addr, const SurfaceParams& view_params) { + TView* view{TryGetView(view_addr, view_params)}; + ASSERT(view != nullptr); + return view; + } + + bool IsModified() const { + return is_modified; + } + + u64 GetModificationTick() const { + return modification_tick; + } + +protected: + explicit SurfaceBase(TTextureCache& texture_cache, const SurfaceParams& params) + : SurfaceBaseImpl{params}, texture_cache{texture_cache}, + view_offset_map{params.CreateViewOffsetMap()} {} + + ~SurfaceBase() = default; + + virtual std::unique_ptr CreateView(const ViewKey& view_key) = 0; + +private: + TView* GetView(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels) { + const ViewKey key{base_layer, num_layers, base_level, num_levels}; + const auto [entry, is_cache_miss] = views.try_emplace(key); + auto& view{entry->second}; + if (is_cache_miss) { + view = CreateView(key); + } + return view.get(); + } + + TTextureCache& texture_cache; + const std::map> view_offset_map; + + std::unordered_map> views; + + bool is_modified{}; + u64 modification_tick{}; +}; + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp new file mode 100644 index 000000000..d1f8c53d5 --- /dev/null +++ b/src/video_core/texture_cache/surface_params.cpp @@ -0,0 +1,412 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/cityhash.h" +#include "common/alignment.h" +#include "core/core.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/surface_params.h" +#include "video_core/textures/decoders.h" + +namespace VideoCommon { + +using VideoCore::Surface::ComponentTypeFromDepthFormat; +using VideoCore::Surface::ComponentTypeFromRenderTarget; +using VideoCore::Surface::ComponentTypeFromTexture; +using VideoCore::Surface::PixelFormatFromDepthFormat; +using VideoCore::Surface::PixelFormatFromRenderTargetFormat; +using VideoCore::Surface::PixelFormatFromTextureFormat; +using VideoCore::Surface::SurfaceTarget; +using VideoCore::Surface::SurfaceTargetFromTextureType; + +namespace { +constexpr u32 GetMipmapSize(bool uncompressed, u32 mip_size, u32 tile) { + return uncompressed ? mip_size : std::max(1U, (mip_size + tile - 1) / tile); +} +} // Anonymous namespace + +SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, + const Tegra::Texture::FullTextureInfo& config) { + SurfaceParams params; + params.is_tiled = config.tic.IsTiled(); + params.srgb_conversion = config.tic.IsSrgbConversionEnabled(); + params.block_width = params.is_tiled ? config.tic.BlockWidth() : 0, + params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0, + params.block_depth = params.is_tiled ? config.tic.BlockDepth() : 0, + params.tile_width_spacing = params.is_tiled ? (1 << config.tic.tile_width_spacing.Value()) : 1; + params.pixel_format = PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(), + params.srgb_conversion); + params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value()); + params.type = GetFormatType(params.pixel_format); + params.target = SurfaceTargetFromTextureType(config.tic.texture_type); + params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format)); + params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format)); + params.depth = config.tic.Depth(); + if (params.target == SurfaceTarget::TextureCubemap || + params.target == SurfaceTarget::TextureCubeArray) { + params.depth *= 6; + } + params.pitch = params.is_tiled ? 0 : config.tic.Pitch(); + params.unaligned_height = config.tic.Height(); + params.num_levels = config.tic.max_mip_level + 1; + + params.CalculateCachedValues(); + return params; +} + +SurfaceParams SurfaceParams::CreateForDepthBuffer( + Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format, + u32 block_width, u32 block_height, u32 block_depth, + Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) { + SurfaceParams params; + params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; + params.srgb_conversion = false; + params.block_width = 1 << std::min(block_width, 5U); + params.block_height = 1 << std::min(block_height, 5U); + params.block_depth = 1 << std::min(block_depth, 5U); + params.tile_width_spacing = 1; + params.pixel_format = PixelFormatFromDepthFormat(format); + params.component_type = ComponentTypeFromDepthFormat(format); + params.type = GetFormatType(params.pixel_format); + params.width = zeta_width; + params.height = zeta_height; + params.unaligned_height = zeta_height; + params.target = SurfaceTarget::Texture2D; + params.depth = 1; + params.num_levels = 1; + + params.CalculateCachedValues(); + return params; +} + +SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::size_t index) { + const auto& config{system.GPU().Maxwell3D().regs.rt[index]}; + SurfaceParams params; + params.is_tiled = + config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; + params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || + config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; + params.block_width = 1 << config.memory_layout.block_width; + params.block_height = 1 << config.memory_layout.block_height; + params.block_depth = 1 << config.memory_layout.block_depth; + params.tile_width_spacing = 1; + params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); + params.component_type = ComponentTypeFromRenderTarget(config.format); + params.type = GetFormatType(params.pixel_format); + if (params.is_tiled) { + params.width = config.width; + } else { + const u32 bpp = GetFormatBpp(params.pixel_format) / CHAR_BIT; + params.pitch = config.width; + params.width = params.pitch / bpp; + } + params.height = config.height; + params.depth = 1; + params.unaligned_height = config.height; + params.target = SurfaceTarget::Texture2D; + params.num_levels = 1; + + params.CalculateCachedValues(); + return params; +} + +SurfaceParams SurfaceParams::CreateForFermiCopySurface( + const Tegra::Engines::Fermi2D::Regs::Surface& config) { + SurfaceParams params{}; + params.is_tiled = !config.linear; + params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || + config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; + params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 32U) : 0, + params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 32U) : 0, + params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 32U) : 0, + params.tile_width_spacing = 1; + params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); + params.component_type = ComponentTypeFromRenderTarget(config.format); + params.type = GetFormatType(params.pixel_format); + params.width = config.width; + params.height = config.height; + params.unaligned_height = config.height; + // TODO(Rodrigo): Try to guess the surface target from depth and layer parameters + params.target = SurfaceTarget::Texture2D; + params.depth = 1; + params.num_levels = 1; + + params.CalculateCachedValues(); + return params; +} + +u32 SurfaceParams::GetMipWidth(u32 level) const { + return std::max(1U, width >> level); +} + +u32 SurfaceParams::GetMipHeight(u32 level) const { + return std::max(1U, height >> level); +} + +u32 SurfaceParams::GetMipDepth(u32 level) const { + return IsLayered() ? depth : std::max(1U, depth >> level); +} + +bool SurfaceParams::IsLayered() const { + switch (target) { + case SurfaceTarget::Texture1DArray: + case SurfaceTarget::Texture2DArray: + case SurfaceTarget::TextureCubemap: + case SurfaceTarget::TextureCubeArray: + return true; + default: + return false; + } +} + +u32 SurfaceParams::GetMipBlockHeight(u32 level) const { + // Auto block resizing algorithm from: + // https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c + if (level == 0) { + return this->block_height; + } + + const u32 height{GetMipHeight(level)}; + const u32 default_block_height{GetDefaultBlockHeight()}; + const u32 blocks_in_y{(height + default_block_height - 1) / default_block_height}; + u32 block_height = 16; + while (block_height > 1 && blocks_in_y <= block_height * 4) { + block_height >>= 1; + } + return block_height; +} + +u32 SurfaceParams::GetMipBlockDepth(u32 level) const { + if (level == 0) { + return this->block_depth; + } + if (IsLayered()) { + return 1; + } + + const u32 depth{GetMipDepth(level)}; + u32 block_depth = 32; + while (block_depth > 1 && depth * 2 <= block_depth) { + block_depth >>= 1; + } + + if (block_depth == 32 && GetMipBlockHeight(level) >= 4) { + return 16; + } + + return block_depth; +} + +std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const { + std::size_t offset = 0; + for (u32 i = 0; i < level; i++) { + offset += GetInnerMipmapMemorySize(i, false, false); + } + return offset; +} + +std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level) const { + std::size_t offset = 0; + for (u32 i = 0; i < level; i++) { + offset += GetInnerMipmapMemorySize(i, true, false) * GetNumLayers(); + } + return offset; +} + +std::size_t SurfaceParams::GetHostMipmapSize(u32 level) const { + return GetInnerMipmapMemorySize(level, true, false) * GetNumLayers(); +} + +std::size_t SurfaceParams::GetGuestLayerSize() const { + return GetLayerSize(false, false); +} + +std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) const { + std::size_t size = 0; + for (u32 level = 0; level < num_levels; ++level) { + size += GetInnerMipmapMemorySize(level, as_host_size, uncompressed); + } + if (is_tiled && (IsLayered() || target == SurfaceTarget::Texture3D)) { + return Common::AlignUp(size, Tegra::Texture::GetGOBSize() * block_height * block_depth); + } + return size; +} + +std::size_t SurfaceParams::GetHostLayerSize(u32 level) const { + ASSERT(target != SurfaceTarget::Texture3D); + return GetInnerMipmapMemorySize(level, true, false); +} + +u32 SurfaceParams::GetDefaultBlockWidth() const { + return VideoCore::Surface::GetDefaultBlockWidth(pixel_format); +} + +u32 SurfaceParams::GetDefaultBlockHeight() const { + return VideoCore::Surface::GetDefaultBlockHeight(pixel_format); +} + +u32 SurfaceParams::GetBitsPerPixel() const { + return VideoCore::Surface::GetFormatBpp(pixel_format); +} + +u32 SurfaceParams::GetBytesPerPixel() const { + return VideoCore::Surface::GetBytesPerPixel(pixel_format); +} + +bool SurfaceParams::IsFamiliar(const SurfaceParams& view_params) const { + if (std::tie(is_tiled, tile_width_spacing, pixel_format, component_type, type) != + std::tie(view_params.is_tiled, view_params.tile_width_spacing, view_params.pixel_format, + view_params.component_type, view_params.type)) { + return false; + } + + const SurfaceTarget view_target{view_params.target}; + if (view_target == target) { + return true; + } + + switch (target) { + case SurfaceTarget::Texture1D: + case SurfaceTarget::Texture2D: + case SurfaceTarget::Texture3D: + return false; + case SurfaceTarget::Texture1DArray: + return view_target == SurfaceTarget::Texture1D; + case SurfaceTarget::Texture2DArray: + return view_target == SurfaceTarget::Texture2D; + case SurfaceTarget::TextureCubemap: + return view_target == SurfaceTarget::Texture2D || + view_target == SurfaceTarget::Texture2DArray; + case SurfaceTarget::TextureCubeArray: + return view_target == SurfaceTarget::Texture2D || + view_target == SurfaceTarget::Texture2DArray || + view_target == SurfaceTarget::TextureCubemap; + default: + UNIMPLEMENTED_MSG("Unimplemented texture family={}", static_cast(target)); + return false; + } +} + +bool SurfaceParams::IsPixelFormatZeta() const { + return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat && + pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat; +} + +void SurfaceParams::CalculateCachedValues() { + switch (target) { + case SurfaceTarget::Texture1D: + case SurfaceTarget::Texture2D: + case SurfaceTarget::Texture3D: + num_layers = 1; + break; + case SurfaceTarget::Texture1DArray: + case SurfaceTarget::Texture2DArray: + case SurfaceTarget::TextureCubemap: + case SurfaceTarget::TextureCubeArray: + num_layers = depth; + break; + default: + UNREACHABLE(); + } + + guest_size_in_bytes = GetInnerMemorySize(false, false, false); + + if (IsPixelFormatASTC(pixel_format)) { + // ASTC is uncompressed in software, in emulated as RGBA8 + host_size_in_bytes = static_cast(width) * static_cast(height) * + static_cast(depth) * 4ULL; + } else { + host_size_in_bytes = GetInnerMemorySize(true, false, false); + } +} + +std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, + bool uncompressed) const { + const bool tiled{as_host_size ? false : is_tiled}; + const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())}; + const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())}; + const u32 depth{target == SurfaceTarget::Texture3D ? GetMipDepth(level) : 1U}; + return Tegra::Texture::CalculateSize(tiled, GetBytesPerPixel(), width, height, depth, + GetMipBlockHeight(level), GetMipBlockDepth(level)); +} + +std::size_t SurfaceParams::GetInnerMemorySize(bool as_host_size, bool layer_only, + bool uncompressed) const { + return GetLayerSize(as_host_size, uncompressed) * (layer_only ? 1U : num_layers); +} + +std::map> SurfaceParams::CreateViewOffsetMap() const { + std::map> view_offset_map; + switch (target) { + case SurfaceTarget::Texture1D: + case SurfaceTarget::Texture2D: + case SurfaceTarget::Texture3D: { + // TODO(Rodrigo): Add layer iterations for 3D textures + constexpr u32 layer = 0; + for (u32 level = 0; level < num_levels; ++level) { + const std::size_t offset{GetGuestMipmapLevelOffset(level)}; + view_offset_map.insert({offset, {layer, level}}); + } + break; + } + case SurfaceTarget::Texture1DArray: + case SurfaceTarget::Texture2DArray: + case SurfaceTarget::TextureCubemap: + case SurfaceTarget::TextureCubeArray: { + const std::size_t layer_size{GetGuestLayerSize()}; + for (u32 level = 0; level < num_levels; ++level) { + const std::size_t level_offset{GetGuestMipmapLevelOffset(level)}; + for (u32 layer = 0; layer < num_layers; ++layer) { + const auto layer_offset{static_cast(layer_size * layer)}; + const std::size_t offset{level_offset + layer_offset}; + view_offset_map.insert({offset, {layer, level}}); + } + } + break; + } + default: + UNIMPLEMENTED_MSG("Unimplemented surface target {}", static_cast(target)); + } + return view_offset_map; +} + +bool SurfaceParams::IsViewValid(const SurfaceParams& view_params, u32 layer, u32 level) const { + return IsDimensionValid(view_params, level) && IsDepthValid(view_params, level) && + IsInBounds(view_params, layer, level); +} + +bool SurfaceParams::IsDimensionValid(const SurfaceParams& view_params, u32 level) const { + return view_params.width == GetMipWidth(level) && view_params.height == GetMipHeight(level); +} + +bool SurfaceParams::IsDepthValid(const SurfaceParams& view_params, u32 level) const { + if (view_params.target != SurfaceTarget::Texture3D) { + return true; + } + return view_params.depth == GetMipDepth(level); +} + +bool SurfaceParams::IsInBounds(const SurfaceParams& view_params, u32 layer, u32 level) const { + return layer + view_params.num_layers <= num_layers && + level + view_params.num_levels <= num_levels; +} + +std::size_t HasheableSurfaceParams::Hash() const { + return static_cast( + Common::CityHash64(reinterpret_cast(this), sizeof(*this))); +} + +bool HasheableSurfaceParams::operator==(const HasheableSurfaceParams& rhs) const { + return std::tie(is_tiled, block_width, block_height, block_depth, tile_width_spacing, width, + height, depth, pitch, unaligned_height, num_levels, pixel_format, + component_type, type, target) == + std::tie(rhs.is_tiled, rhs.block_width, rhs.block_height, rhs.block_depth, + rhs.tile_width_spacing, rhs.width, rhs.height, rhs.depth, rhs.pitch, + rhs.unaligned_height, rhs.num_levels, rhs.pixel_format, rhs.component_type, + rhs.type, rhs.target); +} + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h new file mode 100644 index 000000000..77dc0ba66 --- /dev/null +++ b/src/video_core/texture_cache/surface_params.h @@ -0,0 +1,229 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/common_types.h" +#include "video_core/engines/fermi_2d.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/surface.h" + +namespace VideoCommon { + +class HasheableSurfaceParams { +public: + std::size_t Hash() const; + + bool operator==(const HasheableSurfaceParams& rhs) const; + + bool operator!=(const HasheableSurfaceParams& rhs) const { + return !operator==(rhs); + } + +protected: + // Avoid creation outside of a managed environment. + HasheableSurfaceParams() = default; + + bool is_tiled; + bool srgb_conversion; + u32 block_width; + u32 block_height; + u32 block_depth; + u32 tile_width_spacing; + u32 width; + u32 height; + u32 depth; + u32 pitch; + u32 unaligned_height; + u32 num_levels; + VideoCore::Surface::PixelFormat pixel_format; + VideoCore::Surface::ComponentType component_type; + VideoCore::Surface::SurfaceType type; + VideoCore::Surface::SurfaceTarget target; +}; + +class SurfaceParams final : public HasheableSurfaceParams { +public: + /// Creates SurfaceCachedParams from a texture configuration. + static SurfaceParams CreateForTexture(Core::System& system, + const Tegra::Texture::FullTextureInfo& config); + + /// Creates SurfaceCachedParams for a depth buffer configuration. + static SurfaceParams CreateForDepthBuffer( + Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format, + u32 block_width, u32 block_height, u32 block_depth, + Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type); + + /// Creates SurfaceCachedParams from a framebuffer configuration. + static SurfaceParams CreateForFramebuffer(Core::System& system, std::size_t index); + + /// Creates SurfaceCachedParams from a Fermi2D surface configuration. + static SurfaceParams CreateForFermiCopySurface( + const Tegra::Engines::Fermi2D::Regs::Surface& config); + + bool IsTiled() const { + return is_tiled; + } + + bool GetSrgbConversion() const { + return srgb_conversion; + } + + u32 GetBlockWidth() const { + return block_width; + } + + u32 GetTileWidthSpacing() const { + return tile_width_spacing; + } + + u32 GetWidth() const { + return width; + } + + u32 GetHeight() const { + return height; + } + + u32 GetDepth() const { + return depth; + } + + u32 GetPitch() const { + return pitch; + } + + u32 GetNumLevels() const { + return num_levels; + } + + VideoCore::Surface::PixelFormat GetPixelFormat() const { + return pixel_format; + } + + VideoCore::Surface::ComponentType GetComponentType() const { + return component_type; + } + + VideoCore::Surface::SurfaceTarget GetTarget() const { + return target; + } + + VideoCore::Surface::SurfaceType GetType() const { + return type; + } + + std::size_t GetGuestSizeInBytes() const { + return guest_size_in_bytes; + } + + std::size_t GetHostSizeInBytes() const { + return host_size_in_bytes; + } + + u32 GetNumLayers() const { + return num_layers; + } + + /// Returns the width of a given mipmap level. + u32 GetMipWidth(u32 level) const; + + /// Returns the height of a given mipmap level. + u32 GetMipHeight(u32 level) const; + + /// Returns the depth of a given mipmap level. + u32 GetMipDepth(u32 level) const; + + /// Returns true if these parameters are from a layered surface. + bool IsLayered() const; + + /// Returns the block height of a given mipmap level. + u32 GetMipBlockHeight(u32 level) const; + + /// Returns the block depth of a given mipmap level. + u32 GetMipBlockDepth(u32 level) const; + + /// Returns the offset in bytes in guest memory of a given mipmap level. + std::size_t GetGuestMipmapLevelOffset(u32 level) const; + + /// Returns the offset in bytes in host memory (linear) of a given mipmap level. + std::size_t GetHostMipmapLevelOffset(u32 level) const; + + /// Returns the size in bytes in host memory (linear) of a given mipmap level. + std::size_t GetHostMipmapSize(u32 level) const; + + /// Returns the size of a layer in bytes in guest memory. + std::size_t GetGuestLayerSize() const; + + /// Returns the size of a layer in bytes in host memory for a given mipmap level. + std::size_t GetHostLayerSize(u32 level) const; + + /// Returns the default block width. + u32 GetDefaultBlockWidth() const; + + /// Returns the default block height. + u32 GetDefaultBlockHeight() const; + + /// Returns the bits per pixel. + u32 GetBitsPerPixel() const; + + /// Returns the bytes per pixel. + u32 GetBytesPerPixel() const; + + /// Returns true if another surface can be familiar with this. This is a loosely defined term + /// that reflects the possibility of these two surface parameters potentially being part of a + /// bigger superset. + bool IsFamiliar(const SurfaceParams& view_params) const; + + /// Returns true if the pixel format is a depth and/or stencil format. + bool IsPixelFormatZeta() const; + + /// Creates a map that redirects an address difference to a layer and mipmap level. + std::map> CreateViewOffsetMap() const; + + /// Returns true if the passed surface view parameters is equal or a valid subset of this. + bool IsViewValid(const SurfaceParams& view_params, u32 layer, u32 level) const; + +private: + /// Calculates values that can be deduced from HasheableSurfaceParams. + void CalculateCachedValues(); + + /// Returns the size of a given mipmap level inside a layer. + std::size_t GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool uncompressed) const; + + /// Returns the size of all mipmap levels and aligns as needed. + std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const; + + /// Returns the size of a layer + std::size_t GetLayerSize(bool as_host_size, bool uncompressed) const; + + /// Returns true if the passed view width and height match the size of this params in a given + /// mipmap level. + bool IsDimensionValid(const SurfaceParams& view_params, u32 level) const; + + /// Returns true if the passed view depth match the size of this params in a given mipmap level. + bool IsDepthValid(const SurfaceParams& view_params, u32 level) const; + + /// Returns true if the passed view layers and mipmap levels are in bounds. + bool IsInBounds(const SurfaceParams& view_params, u32 layer, u32 level) const; + + std::size_t guest_size_in_bytes; + std::size_t host_size_in_bytes; + u32 num_layers; +}; + +} // namespace VideoCommon + +namespace std { + +template <> +struct hash { + std::size_t operator()(const VideoCommon::SurfaceParams& k) const noexcept { + return k.Hash(); + } +}; + +} // namespace std diff --git a/src/video_core/texture_cache/surface_view.cpp b/src/video_core/texture_cache/surface_view.cpp new file mode 100644 index 000000000..5f4cdbb1c --- /dev/null +++ b/src/video_core/texture_cache/surface_view.cpp @@ -0,0 +1,23 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/common_types.h" +#include "video_core/texture_cache/surface_view.h" + +namespace VideoCommon { + +std::size_t ViewKey::Hash() const { + return static_cast(base_layer) ^ static_cast(num_layers << 16) ^ + (static_cast(base_level) << 32) ^ + (static_cast(num_levels) << 48); +} + +bool ViewKey::operator==(const ViewKey& rhs) const { + return std::tie(base_layer, num_layers, base_level, num_levels) == + std::tie(rhs.base_layer, rhs.num_layers, rhs.base_level, rhs.num_levels); +} + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_view.h b/src/video_core/texture_cache/surface_view.h new file mode 100644 index 000000000..e73d8f6ae --- /dev/null +++ b/src/video_core/texture_cache/surface_view.h @@ -0,0 +1,35 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/common_types.h" + +namespace VideoCommon { + +struct ViewKey { + std::size_t Hash() const; + + bool operator==(const ViewKey& rhs) const; + + u32 base_layer{}; + u32 num_layers{}; + u32 base_level{}; + u32 num_levels{}; +}; + +} // namespace VideoCommon + +namespace std { + +template <> +struct hash { + std::size_t operator()(const VideoCommon::ViewKey& k) const noexcept { + return k.Hash(); + } +}; + +} // namespace std diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h new file mode 100644 index 000000000..fb43fa65e --- /dev/null +++ b/src/video_core/texture_cache/texture_cache.h @@ -0,0 +1,282 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "common/assert.h" +#include "common/common_types.h" +#include "core/memory.h" +#include "video_core/engines/fermi_2d.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/gpu.h" +#include "video_core/memory_manager.h" +#include "video_core/rasterizer_interface.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/surface_base.h" +#include "video_core/texture_cache/surface_params.h" +#include "video_core/texture_cache/surface_view.h" + +namespace Core { +class System; +} + +namespace Tegra::Texture { +struct FullTextureInfo; +} + +namespace VideoCore { +class RasterizerInterface; +} + +namespace VideoCommon { + +template +class TextureCache { + static_assert(std::is_trivially_copyable_v); + + using ResultType = std::tuple; + using IntervalMap = boost::icl::interval_map>>; + using IntervalType = typename IntervalMap::interval_type; + +public: + void InvalidateRegion(CacheAddr addr, std::size_t size) { + for (const auto& surface : GetSurfacesInRegion(addr, size)) { + if (!surface->IsRegistered()) { + // Skip duplicates + continue; + } + Unregister(surface); + } + } + + ResultType GetTextureSurface(TExecutionContext exctx, + const Tegra::Texture::FullTextureInfo& config) { + const auto gpu_addr{config.tic.Address()}; + if (!gpu_addr) { + return {{}, exctx}; + } + const auto params{SurfaceParams::CreateForTexture(system, config)}; + return GetSurfaceView(exctx, gpu_addr, params, true); + } + + ResultType GetDepthBufferSurface(TExecutionContext exctx, bool preserve_contents) { + const auto& regs{system.GPU().Maxwell3D().regs}; + const auto gpu_addr{regs.zeta.Address()}; + if (!gpu_addr || !regs.zeta_enable) { + return {{}, exctx}; + } + const auto depth_params{SurfaceParams::CreateForDepthBuffer( + system, regs.zeta_width, regs.zeta_height, regs.zeta.format, + regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height, + regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; + return GetSurfaceView(exctx, gpu_addr, depth_params, preserve_contents); + } + + ResultType GetColorBufferSurface(TExecutionContext exctx, std::size_t index, + bool preserve_contents) { + ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); + + const auto& regs{system.GPU().Maxwell3D().regs}; + if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || + regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { + return {{}, exctx}; + } + + auto& memory_manager{system.GPU().MemoryManager()}; + const auto& config{system.GPU().Maxwell3D().regs.rt[index]}; + const auto gpu_addr{config.Address() + + config.base_layer * config.layer_stride * sizeof(u32)}; + if (!gpu_addr) { + return {{}, exctx}; + } + + return GetSurfaceView(exctx, gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), + preserve_contents); + } + + ResultType GetFermiSurface(TExecutionContext exctx, + const Tegra::Engines::Fermi2D::Regs::Surface& config) { + return GetSurfaceView(exctx, config.Address(), + SurfaceParams::CreateForFermiCopySurface(config), true); + } + + std::shared_ptr TryFindFramebufferSurface(const u8* host_ptr) const { + const auto it{registered_surfaces.find(ToCacheAddr(host_ptr))}; + return it != registered_surfaces.end() ? *it->second.begin() : nullptr; + } + + u64 Tick() { + return ++ticks; + } + +protected: + TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) + : system{system}, rasterizer{rasterizer} {} + + ~TextureCache() = default; + + virtual ResultType TryFastGetSurfaceView( + TExecutionContext exctx, GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, + const SurfaceParams& params, bool preserve_contents, + const std::vector>& overlaps) = 0; + + virtual std::shared_ptr CreateSurface(const SurfaceParams& params) = 0; + + void Register(std::shared_ptr surface, GPUVAddr gpu_addr, VAddr cpu_addr, + u8* host_ptr) { + surface->Register(gpu_addr, cpu_addr, host_ptr); + registered_surfaces.add({GetSurfaceInterval(surface), {surface}}); + rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), 1); + } + + void Unregister(std::shared_ptr surface) { + registered_surfaces.subtract({GetSurfaceInterval(surface), {surface}}); + rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), -1); + surface->Unregister(); + } + + std::shared_ptr GetUncachedSurface(const SurfaceParams& params) { + if (const auto surface = TryGetReservedSurface(params); surface) + return surface; + // No reserved surface available, create a new one and reserve it + auto new_surface{CreateSurface(params)}; + ReserveSurface(params, new_surface); + return new_surface; + } + + Core::System& system; + +private: + ResultType GetSurfaceView(TExecutionContext exctx, GPUVAddr gpu_addr, + const SurfaceParams& params, bool preserve_contents) { + auto& memory_manager{system.GPU().MemoryManager()}; + const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; + DEBUG_ASSERT(cpu_addr); + + const auto host_ptr{memory_manager.GetPointer(gpu_addr)}; + const auto cache_addr{ToCacheAddr(host_ptr)}; + auto overlaps{GetSurfacesInRegion(cache_addr, params.GetGuestSizeInBytes())}; + if (overlaps.empty()) { + return LoadSurfaceView(exctx, gpu_addr, *cpu_addr, host_ptr, params, preserve_contents); + } + + if (overlaps.size() == 1) { + if (TView* view = overlaps[0]->TryGetView(gpu_addr, params); view) { + return {view, exctx}; + } + } + + TView* fast_view; + std::tie(fast_view, exctx) = TryFastGetSurfaceView(exctx, gpu_addr, *cpu_addr, host_ptr, + params, preserve_contents, overlaps); + + if (!fast_view) { + std::sort(overlaps.begin(), overlaps.end(), [](const auto& lhs, const auto& rhs) { + return lhs->GetModificationTick() < rhs->GetModificationTick(); + }); + } + + for (const auto& surface : overlaps) { + if (!fast_view) { + // Flush even when we don't care about the contents, to preserve memory not + // written by the new surface. + exctx = FlushSurface(exctx, surface); + } + Unregister(surface); + } + + if (fast_view) { + return {fast_view, exctx}; + } + + return LoadSurfaceView(exctx, gpu_addr, *cpu_addr, host_ptr, params, preserve_contents); + } + + ResultType LoadSurfaceView(TExecutionContext exctx, GPUVAddr gpu_addr, VAddr cpu_addr, + u8* host_ptr, const SurfaceParams& params, bool preserve_contents) { + const auto new_surface{GetUncachedSurface(params)}; + Register(new_surface, gpu_addr, cpu_addr, host_ptr); + if (preserve_contents) { + exctx = LoadSurface(exctx, new_surface); + } + return {new_surface->GetView(gpu_addr, params), exctx}; + } + + TExecutionContext LoadSurface(TExecutionContext exctx, + const std::shared_ptr& surface) { + surface->LoadBuffer(); + exctx = surface->UploadTexture(exctx); + surface->MarkAsModified(false); + return exctx; + } + + TExecutionContext FlushSurface(TExecutionContext exctx, + const std::shared_ptr& surface) { + if (!surface->IsModified()) { + return exctx; + } + exctx = surface->DownloadTexture(exctx); + surface->FlushBuffer(); + return exctx; + } + + std::vector> GetSurfacesInRegion(CacheAddr cache_addr, + std::size_t size) const { + if (size == 0) { + return {}; + } + const IntervalType interval{cache_addr, cache_addr + size}; + + std::vector> surfaces; + for (auto& pair : boost::make_iterator_range(registered_surfaces.equal_range(interval))) { + surfaces.push_back(*pair.second.begin()); + } + return surfaces; + } + + void ReserveSurface(const SurfaceParams& params, std::shared_ptr surface) { + surface_reserve[params].push_back(std::move(surface)); + } + + std::shared_ptr TryGetReservedSurface(const SurfaceParams& params) { + auto search{surface_reserve.find(params)}; + if (search == surface_reserve.end()) { + return {}; + } + for (auto& surface : search->second) { + if (!surface->IsRegistered()) { + return surface; + } + } + return {}; + } + + IntervalType GetSurfaceInterval(std::shared_ptr surface) const { + return IntervalType::right_open(surface->GetCacheAddr(), + surface->GetCacheAddr() + surface->GetSizeInBytes()); + } + + VideoCore::RasterizerInterface& rasterizer; + + u64 ticks{}; + + IntervalMap registered_surfaces; + + /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have + /// previously been used. This is to prevent surfaces from being constantly created and + /// destroyed when used with different surface parameters. + std::unordered_map>> surface_reserve; +}; + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/texture_cache_contextless.h b/src/video_core/texture_cache/texture_cache_contextless.h new file mode 100644 index 000000000..cd35a9fd4 --- /dev/null +++ b/src/video_core/texture_cache/texture_cache_contextless.h @@ -0,0 +1,93 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "video_core/texture_cache/texture_cache.h" + +namespace VideoCommon { + +struct DummyExecutionContext {}; + +template +class TextureCacheContextless : protected TextureCache { + using Base = TextureCache; + +public: + void InvalidateRegion(CacheAddr addr, std::size_t size) { + Base::InvalidateRegion(addr, size); + } + + TView* GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) { + return RemoveContext(Base::GetTextureSurface({}, config)); + } + + TView* GetDepthBufferSurface(bool preserve_contents) { + return RemoveContext(Base::GetDepthBufferSurface({}, preserve_contents)); + } + + TView* GetColorBufferSurface(std::size_t index, bool preserve_contents) { + return RemoveContext(Base::GetColorBufferSurface({}, index, preserve_contents)); + } + + TView* GetFermiSurface(const Tegra::Engines::Fermi2D::Regs::Surface& config) { + return RemoveContext(Base::GetFermiSurface({}, config)); + } + + std::shared_ptr TryFindFramebufferSurface(const u8* host_ptr) const { + return Base::TryFindFramebufferSurface(host_ptr); + } + + u64 Tick() { + return Base::Tick(); + } + +protected: + explicit TextureCacheContextless(Core::System& system, + VideoCore::RasterizerInterface& rasterizer) + : TextureCache{system, rasterizer} {} + + virtual TView* TryFastGetSurfaceView( + GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, const SurfaceParams& params, + bool preserve_contents, const std::vector>& overlaps) = 0; + +private: + std::tuple TryFastGetSurfaceView( + DummyExecutionContext, GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, + const SurfaceParams& params, bool preserve_contents, + const std::vector>& overlaps) { + return {TryFastGetSurfaceView(gpu_addr, cpu_addr, host_ptr, params, preserve_contents, + overlaps), + {}}; + } + + TView* RemoveContext(std::tuple return_value) { + const auto [view, exctx] = return_value; + return view; + } +}; + +template +class SurfaceBaseContextless : public SurfaceBase { +public: + DummyExecutionContext DownloadTexture(DummyExecutionContext) { + DownloadTextureImpl(); + return {}; + } + + DummyExecutionContext UploadTexture(DummyExecutionContext) { + UploadTextureImpl(); + return {}; + } + +protected: + explicit SurfaceBaseContextless(TTextureCache& texture_cache, const SurfaceParams& params) + : SurfaceBase{texture_cache, params} {} + + virtual void DownloadTextureImpl() = 0; + + virtual void UploadTextureImpl() = 0; +}; + +} // namespace VideoCommon -- cgit v1.2.3 From 6c410104f4f6953ac37095aa5e65804bf115c026 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 25 Apr 2019 13:41:57 -0300 Subject: texture_cache: Remove execution context copies from the texture cache This is done to simplify the OpenGL implementation, it is needed for Vulkan. --- src/video_core/texture_cache/surface_base.cpp | 8 +- src/video_core/texture_cache/surface_base.h | 8 +- src/video_core/texture_cache/texture_cache.h | 83 ++++++++----------- .../texture_cache/texture_cache_contextless.h | 93 ---------------------- 4 files changed, 41 insertions(+), 151 deletions(-) delete mode 100644 src/video_core/texture_cache/texture_cache_contextless.h (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 8680485b4..d0779b502 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -53,8 +53,8 @@ void SurfaceBaseImpl::LoadBuffer() { ASSERT_MSG(params.GetBlockWidth() == 1, "Block width is defined as {} on texture target {}", params.GetBlockWidth(), static_cast(params.GetTarget())); for (u32 level = 0; level < params.GetNumLevels(); ++level) { - u8* const buffer{GetStagingBufferLevelData(level)}; - SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params, buffer, level); + SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params, + GetStagingBufferLevelData(level), level); } } else { ASSERT_MSG(params.GetNumLevels() == 1, "Linear mipmap loading is not implemented"); @@ -89,8 +89,8 @@ void SurfaceBaseImpl::FlushBuffer() { ASSERT_MSG(params.GetBlockWidth() == 1, "Block width is defined as {}", params.GetBlockWidth()); for (u32 level = 0; level < params.GetNumLevels(); ++level) { - u8* const buffer = GetStagingBufferLevelData(level); - SwizzleFunc(MortonSwizzleMode::LinearToMorton, GetHostPtr(), params, buffer, level); + SwizzleFunc(MortonSwizzleMode::LinearToMorton, GetHostPtr(), params, + GetStagingBufferLevelData(level), level); } } else { UNIMPLEMENTED(); diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index d0142a9e6..eed8dc59d 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -89,14 +89,12 @@ private: std::vector staging_buffer; }; -template +template class SurfaceBase : public SurfaceBaseImpl { - static_assert(std::is_trivially_copyable_v); - public: - virtual TExecutionContext UploadTexture(TExecutionContext exctx) = 0; + virtual void UploadTexture() = 0; - virtual TExecutionContext DownloadTexture(TExecutionContext exctx) = 0; + virtual void DownloadTexture() = 0; TView* TryGetView(GPUVAddr view_addr, const SurfaceParams& view_params) { if (view_addr < GetGpuAddr() || !params.IsFamiliar(view_params)) { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index fb43fa65e..c5c01957a 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -8,7 +8,6 @@ #include #include #include -#include #include #include @@ -41,11 +40,8 @@ class RasterizerInterface; namespace VideoCommon { -template +template class TextureCache { - static_assert(std::is_trivially_copyable_v); - - using ResultType = std::tuple; using IntervalMap = boost::icl::interval_map>>; using IntervalType = typename IntervalMap::interval_type; @@ -60,37 +56,35 @@ public: } } - ResultType GetTextureSurface(TExecutionContext exctx, - const Tegra::Texture::FullTextureInfo& config) { + TView* GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) { const auto gpu_addr{config.tic.Address()}; if (!gpu_addr) { - return {{}, exctx}; + return {}; } const auto params{SurfaceParams::CreateForTexture(system, config)}; - return GetSurfaceView(exctx, gpu_addr, params, true); + return GetSurfaceView(gpu_addr, params, true); } - ResultType GetDepthBufferSurface(TExecutionContext exctx, bool preserve_contents) { + TView* GetDepthBufferSurface(bool preserve_contents) { const auto& regs{system.GPU().Maxwell3D().regs}; const auto gpu_addr{regs.zeta.Address()}; if (!gpu_addr || !regs.zeta_enable) { - return {{}, exctx}; + return {}; } const auto depth_params{SurfaceParams::CreateForDepthBuffer( system, regs.zeta_width, regs.zeta_height, regs.zeta.format, regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height, regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; - return GetSurfaceView(exctx, gpu_addr, depth_params, preserve_contents); + return GetSurfaceView(gpu_addr, depth_params, preserve_contents); } - ResultType GetColorBufferSurface(TExecutionContext exctx, std::size_t index, - bool preserve_contents) { + TView* GetColorBufferSurface(std::size_t index, bool preserve_contents) { ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); const auto& regs{system.GPU().Maxwell3D().regs}; if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { - return {{}, exctx}; + return {}; } auto& memory_manager{system.GPU().MemoryManager()}; @@ -98,17 +92,16 @@ public: const auto gpu_addr{config.Address() + config.base_layer * config.layer_stride * sizeof(u32)}; if (!gpu_addr) { - return {{}, exctx}; + return {}; } - return GetSurfaceView(exctx, gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), + return GetSurfaceView(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), preserve_contents); } - ResultType GetFermiSurface(TExecutionContext exctx, - const Tegra::Engines::Fermi2D::Regs::Surface& config) { - return GetSurfaceView(exctx, config.Address(), - SurfaceParams::CreateForFermiCopySurface(config), true); + TView* GetFermiSurface(const Tegra::Engines::Fermi2D::Regs::Surface& config) { + return GetSurfaceView(config.Address(), SurfaceParams::CreateForFermiCopySurface(config), + true); } std::shared_ptr TryFindFramebufferSurface(const u8* host_ptr) const { @@ -126,10 +119,9 @@ protected: ~TextureCache() = default; - virtual ResultType TryFastGetSurfaceView( - TExecutionContext exctx, GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, - const SurfaceParams& params, bool preserve_contents, - const std::vector>& overlaps) = 0; + virtual TView* TryFastGetSurfaceView( + GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, const SurfaceParams& params, + bool preserve_contents, const std::vector>& overlaps) = 0; virtual std::shared_ptr CreateSurface(const SurfaceParams& params) = 0; @@ -158,8 +150,7 @@ protected: Core::System& system; private: - ResultType GetSurfaceView(TExecutionContext exctx, GPUVAddr gpu_addr, - const SurfaceParams& params, bool preserve_contents) { + TView* GetSurfaceView(GPUVAddr gpu_addr, const SurfaceParams& params, bool preserve_contents) { auto& memory_manager{system.GPU().MemoryManager()}; const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; DEBUG_ASSERT(cpu_addr); @@ -168,18 +159,17 @@ private: const auto cache_addr{ToCacheAddr(host_ptr)}; auto overlaps{GetSurfacesInRegion(cache_addr, params.GetGuestSizeInBytes())}; if (overlaps.empty()) { - return LoadSurfaceView(exctx, gpu_addr, *cpu_addr, host_ptr, params, preserve_contents); + return LoadSurfaceView(gpu_addr, *cpu_addr, host_ptr, params, preserve_contents); } if (overlaps.size() == 1) { if (TView* view = overlaps[0]->TryGetView(gpu_addr, params); view) { - return {view, exctx}; + return view; } } - TView* fast_view; - std::tie(fast_view, exctx) = TryFastGetSurfaceView(exctx, gpu_addr, *cpu_addr, host_ptr, - params, preserve_contents, overlaps); + const auto fast_view{TryFastGetSurfaceView(gpu_addr, *cpu_addr, host_ptr, params, + preserve_contents, overlaps)}; if (!fast_view) { std::sort(overlaps.begin(), overlaps.end(), [](const auto& lhs, const auto& rhs) { @@ -191,44 +181,39 @@ private: if (!fast_view) { // Flush even when we don't care about the contents, to preserve memory not // written by the new surface. - exctx = FlushSurface(exctx, surface); + FlushSurface(surface); } Unregister(surface); } - if (fast_view) { - return {fast_view, exctx}; + return fast_view; } - return LoadSurfaceView(exctx, gpu_addr, *cpu_addr, host_ptr, params, preserve_contents); + return LoadSurfaceView(gpu_addr, *cpu_addr, host_ptr, params, preserve_contents); } - ResultType LoadSurfaceView(TExecutionContext exctx, GPUVAddr gpu_addr, VAddr cpu_addr, - u8* host_ptr, const SurfaceParams& params, bool preserve_contents) { + TView* LoadSurfaceView(GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, + const SurfaceParams& params, bool preserve_contents) { const auto new_surface{GetUncachedSurface(params)}; Register(new_surface, gpu_addr, cpu_addr, host_ptr); if (preserve_contents) { - exctx = LoadSurface(exctx, new_surface); + LoadSurface(new_surface); } - return {new_surface->GetView(gpu_addr, params), exctx}; + return new_surface->GetView(gpu_addr, params); } - TExecutionContext LoadSurface(TExecutionContext exctx, - const std::shared_ptr& surface) { + void LoadSurface(const std::shared_ptr& surface) { surface->LoadBuffer(); - exctx = surface->UploadTexture(exctx); + surface->UploadTexture(); surface->MarkAsModified(false); - return exctx; } - TExecutionContext FlushSurface(TExecutionContext exctx, - const std::shared_ptr& surface) { + void FlushSurface(const std::shared_ptr& surface) { if (!surface->IsModified()) { - return exctx; + return; } - exctx = surface->DownloadTexture(exctx); + surface->DownloadTexture(); surface->FlushBuffer(); - return exctx; } std::vector> GetSurfacesInRegion(CacheAddr cache_addr, diff --git a/src/video_core/texture_cache/texture_cache_contextless.h b/src/video_core/texture_cache/texture_cache_contextless.h deleted file mode 100644 index cd35a9fd4..000000000 --- a/src/video_core/texture_cache/texture_cache_contextless.h +++ /dev/null @@ -1,93 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "video_core/texture_cache/texture_cache.h" - -namespace VideoCommon { - -struct DummyExecutionContext {}; - -template -class TextureCacheContextless : protected TextureCache { - using Base = TextureCache; - -public: - void InvalidateRegion(CacheAddr addr, std::size_t size) { - Base::InvalidateRegion(addr, size); - } - - TView* GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) { - return RemoveContext(Base::GetTextureSurface({}, config)); - } - - TView* GetDepthBufferSurface(bool preserve_contents) { - return RemoveContext(Base::GetDepthBufferSurface({}, preserve_contents)); - } - - TView* GetColorBufferSurface(std::size_t index, bool preserve_contents) { - return RemoveContext(Base::GetColorBufferSurface({}, index, preserve_contents)); - } - - TView* GetFermiSurface(const Tegra::Engines::Fermi2D::Regs::Surface& config) { - return RemoveContext(Base::GetFermiSurface({}, config)); - } - - std::shared_ptr TryFindFramebufferSurface(const u8* host_ptr) const { - return Base::TryFindFramebufferSurface(host_ptr); - } - - u64 Tick() { - return Base::Tick(); - } - -protected: - explicit TextureCacheContextless(Core::System& system, - VideoCore::RasterizerInterface& rasterizer) - : TextureCache{system, rasterizer} {} - - virtual TView* TryFastGetSurfaceView( - GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, const SurfaceParams& params, - bool preserve_contents, const std::vector>& overlaps) = 0; - -private: - std::tuple TryFastGetSurfaceView( - DummyExecutionContext, GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, - const SurfaceParams& params, bool preserve_contents, - const std::vector>& overlaps) { - return {TryFastGetSurfaceView(gpu_addr, cpu_addr, host_ptr, params, preserve_contents, - overlaps), - {}}; - } - - TView* RemoveContext(std::tuple return_value) { - const auto [view, exctx] = return_value; - return view; - } -}; - -template -class SurfaceBaseContextless : public SurfaceBase { -public: - DummyExecutionContext DownloadTexture(DummyExecutionContext) { - DownloadTextureImpl(); - return {}; - } - - DummyExecutionContext UploadTexture(DummyExecutionContext) { - UploadTextureImpl(); - return {}; - } - -protected: - explicit SurfaceBaseContextless(TTextureCache& texture_cache, const SurfaceParams& params) - : SurfaceBase{texture_cache, params} {} - - virtual void DownloadTextureImpl() = 0; - - virtual void UploadTextureImpl() = 0; -}; - -} // namespace VideoCommon -- cgit v1.2.3 From 3d471e732d688c20aef73a506bdb6126002d3193 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 7 May 2019 10:56:45 -0400 Subject: Correct Surface Base and Views for new Texture Cache --- src/video_core/texture_cache/copy_params.h | 25 ++ src/video_core/texture_cache/surface_base.cpp | 108 +++++---- src/video_core/texture_cache/surface_base.h | 297 ++++++++++++++++++------ src/video_core/texture_cache/surface_params.cpp | 210 ++++++----------- src/video_core/texture_cache/surface_params.h | 159 ++++--------- src/video_core/texture_cache/surface_view.cpp | 12 +- src/video_core/texture_cache/surface_view.h | 35 ++- 7 files changed, 466 insertions(+), 380 deletions(-) create mode 100644 src/video_core/texture_cache/copy_params.h (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/copy_params.h b/src/video_core/texture_cache/copy_params.h new file mode 100644 index 000000000..75c2b1f05 --- /dev/null +++ b/src/video_core/texture_cache/copy_params.h @@ -0,0 +1,25 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" + +namespace VideoCommon { + +struct CopyParams { + u32 source_x; + u32 source_y; + u32 source_z; + u32 dest_x; + u32 dest_y; + u32 dest_z; + u32 source_level; + u32 dest_level; + u32 width; + u32 height; + u32 depth; +}; + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index d0779b502..5273fcb44 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -4,104 +4,120 @@ #include "common/assert.h" #include "common/common_types.h" -#include "video_core/morton.h" +#include "common/microprofile.h" +#include "video_core/memory_manager.h" #include "video_core/texture_cache/surface_base.h" #include "video_core/texture_cache/surface_params.h" #include "video_core/textures/convert.h" namespace VideoCommon { +MICROPROFILE_DEFINE(GPU_Load_Texture, "GPU", "Texture Load", MP_RGB(128, 192, 128)); +MICROPROFILE_DEFINE(GPU_Flush_Texture, "GPU", "Texture Flush", MP_RGB(128, 192, 128)); + using Tegra::Texture::ConvertFromGuestToHost; using VideoCore::MortonSwizzleMode; -namespace { -void SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, u8* buffer, - u32 level) { +SurfaceBaseImpl::SurfaceBaseImpl(const GPUVAddr gpu_vaddr, const SurfaceParams& params) + : gpu_addr{gpu_vaddr}, params{params}, mipmap_sizes{params.num_levels}, + mipmap_offsets{params.num_levels}, layer_size{params.GetGuestLayerSize()}, + memory_size{params.GetGuestSizeInBytes()}, host_memory_size{params.GetHostSizeInBytes()} { + u32 offset = 0; + mipmap_offsets.resize(params.num_levels); + mipmap_sizes.resize(params.num_levels); + gpu_addr_end = gpu_addr + memory_size; + for (u32 i = 0; i < params.num_levels; i++) { + mipmap_offsets[i] = offset; + mipmap_sizes[i] = params.GetGuestMipmapSize(i); + offset += mipmap_sizes[i]; + } +} + +void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, + u8* buffer, u32 level) { const u32 width{params.GetMipWidth(level)}; const u32 height{params.GetMipHeight(level)}; const u32 block_height{params.GetMipBlockHeight(level)}; const u32 block_depth{params.GetMipBlockDepth(level)}; - std::size_t guest_offset{params.GetGuestMipmapLevelOffset(level)}; - if (params.IsLayered()) { + std::size_t guest_offset{mipmap_offsets[level]}; + if (params.is_layered) { std::size_t host_offset{0}; - const std::size_t guest_stride = params.GetGuestLayerSize(); + const std::size_t guest_stride = layer_size; const std::size_t host_stride = params.GetHostLayerSize(level); - for (u32 layer = 0; layer < params.GetNumLayers(); layer++) { - MortonSwizzle(mode, params.GetPixelFormat(), width, block_height, height, block_depth, - 1, params.GetTileWidthSpacing(), buffer + host_offset, - memory + guest_offset); + for (u32 layer = 0; layer < params.depth; layer++) { + MortonSwizzle(mode, params.pixel_format, width, block_height, height, block_depth, 1, + params.tile_width_spacing, buffer + host_offset, memory + guest_offset); guest_offset += guest_stride; host_offset += host_stride; } } else { - MortonSwizzle(mode, params.GetPixelFormat(), width, block_height, height, block_depth, - params.GetMipDepth(level), params.GetTileWidthSpacing(), buffer, + MortonSwizzle(mode, params.pixel_format, width, block_height, height, block_depth, + params.GetMipDepth(level), params.tile_width_spacing, buffer, memory + guest_offset); } } -} // Anonymous namespace -SurfaceBaseImpl::SurfaceBaseImpl(const SurfaceParams& params) : params{params} { - staging_buffer.resize(params.GetHostSizeInBytes()); -} - -SurfaceBaseImpl::~SurfaceBaseImpl() = default; - -void SurfaceBaseImpl::LoadBuffer() { - if (params.IsTiled()) { - ASSERT_MSG(params.GetBlockWidth() == 1, "Block width is defined as {} on texture target {}", - params.GetBlockWidth(), static_cast(params.GetTarget())); - for (u32 level = 0; level < params.GetNumLevels(); ++level) { +void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, + std::vector& staging_buffer) { + MICROPROFILE_SCOPE(GPU_Load_Texture); + auto host_ptr = memory_manager.GetPointer(gpu_addr); + if (params.is_tiled) { + ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture target {}", + params.block_width, static_cast(params.target)); + for (u32 level = 0; level < params.num_levels; ++level) { + const u32 host_offset = params.GetHostMipmapLevelOffset(level); SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params, - GetStagingBufferLevelData(level), level); + staging_buffer.data() + host_offset, level); } } else { - ASSERT_MSG(params.GetNumLevels() == 1, "Linear mipmap loading is not implemented"); - const u32 bpp{GetFormatBpp(params.GetPixelFormat()) / CHAR_BIT}; + ASSERT_MSG(params.num_levels == 1, "Linear mipmap loading is not implemented"); + const u32 bpp{params.GetBytesPerPixel()}; const u32 block_width{params.GetDefaultBlockWidth()}; const u32 block_height{params.GetDefaultBlockHeight()}; - const u32 width{(params.GetWidth() + block_width - 1) / block_width}; - const u32 height{(params.GetHeight() + block_height - 1) / block_height}; + const u32 width{(params.width + block_width - 1) / block_width}; + const u32 height{(params.height + block_height - 1) / block_height}; const u32 copy_size{width * bpp}; - if (params.GetPitch() == copy_size) { + if (params.pitch == copy_size) { std::memcpy(staging_buffer.data(), host_ptr, params.GetHostSizeInBytes()); } else { const u8* start{host_ptr}; u8* write_to{staging_buffer.data()}; for (u32 h = height; h > 0; --h) { std::memcpy(write_to, start, copy_size); - start += params.GetPitch(); + start += params.pitch; write_to += copy_size; } } } - for (u32 level = 0; level < params.GetNumLevels(); ++level) { - ConvertFromGuestToHost(GetStagingBufferLevelData(level), params.GetPixelFormat(), + for (u32 level = 0; level < params.num_levels; ++level) { + const u32 host_offset = params.GetHostMipmapLevelOffset(level); + ConvertFromGuestToHost(staging_buffer.data() + host_offset, params.pixel_format, params.GetMipWidth(level), params.GetMipHeight(level), params.GetMipDepth(level), true, true); } } -void SurfaceBaseImpl::FlushBuffer() { - if (params.IsTiled()) { - ASSERT_MSG(params.GetBlockWidth() == 1, "Block width is defined as {}", - params.GetBlockWidth()); - for (u32 level = 0; level < params.GetNumLevels(); ++level) { - SwizzleFunc(MortonSwizzleMode::LinearToMorton, GetHostPtr(), params, - GetStagingBufferLevelData(level), level); +void SurfaceBaseImpl::FlushBuffer(std::vector& staging_buffer) { + MICROPROFILE_SCOPE(GPU_Flush_Texture); + if (params.is_tiled) { + ASSERT_MSG(params.block_width == 1, "Block width is defined as {}", params.block_width); + for (u32 level = 0; level < params.num_levels; ++level) { + const u32 host_offset = params.GetHostMipmapLevelOffset(level); + SwizzleFunc(MortonSwizzleMode::LinearToMorton, host_ptr, params, + staging_buffer.data() + host_offset, level); } } else { UNIMPLEMENTED(); /* - ASSERT(params.GetTarget() == SurfaceTarget::Texture2D); - ASSERT(params.GetNumLevels() == 1); + ASSERT(params.target == SurfaceTarget::Texture2D); + ASSERT(params.num_levels == 1); const u32 bpp{params.GetFormatBpp() / 8}; - const u32 copy_size{params.GetWidth() * bpp}; - if (params.GetPitch() == copy_size) { - std::memcpy(host_ptr, staging_buffer.data(), GetSizeInBytes()); + const u32 copy_size{params.width * bpp}; + if (params.pitch == copy_size) { + std::memcpy(host_ptr, staging_buffer.data(), memory_size); } else { u8* start{host_ptr}; const u8* read_to{staging_buffer.data()}; diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index eed8dc59d..5fd7add0a 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -4,166 +4,309 @@ #pragma once +#include #include +#include #include "common/assert.h" #include "common/common_types.h" #include "video_core/gpu.h" +#include "video_core/morton.h" +#include "video_core/texture_cache/copy_params.h" #include "video_core/texture_cache/surface_params.h" #include "video_core/texture_cache/surface_view.h" +template> +ForwardIt binary_find(ForwardIt first, ForwardIt last, const T& value, Compare comp={}) +{ + // Note: BOTH type T and the type after ForwardIt is dereferenced + // must be implicitly convertible to BOTH Type1 and Type2, used in Compare. + // This is stricter than lower_bound requirement (see above) + + first = std::lower_bound(first, last, value, comp); + return first != last && !comp(value, *first) ? first : last; +} + +namespace Tegra { +class MemoryManager; +} + namespace VideoCommon { +using VideoCore::Surface::SurfaceTarget; +using VideoCore::MortonSwizzleMode; + class SurfaceBaseImpl { public: - void LoadBuffer(); + void LoadBuffer(Tegra::MemoryManager& memory_manager, std::vector& staging_buffer); - void FlushBuffer(); + void FlushBuffer(std::vector& staging_buffer); GPUVAddr GetGpuAddr() const { - ASSERT(is_registered); return gpu_addr; } + GPUVAddr GetGpuAddrEnd() const { + return gpu_addr_end; + } + + bool Overlaps(const GPUVAddr start, const GPUVAddr end) const { + return (gpu_addr < end) && (gpu_addr_end > start); + } + + // Use only when recycling a surface + void SetGpuAddr(const GPUVAddr new_addr) { + gpu_addr = new_addr; + gpu_addr_end = new_addr + memory_size; + } + VAddr GetCpuAddr() const { - ASSERT(is_registered); - return cpu_addr; + return gpu_addr; + } + + void SetCpuAddr(const VAddr new_addr) { + cpu_addr = new_addr; } u8* GetHostPtr() const { - ASSERT(is_registered); return host_ptr; } - CacheAddr GetCacheAddr() const { - ASSERT(is_registered); - return cache_addr; + void SetHostPtr(u8* new_addr) { + host_ptr = new_addr; } const SurfaceParams& GetSurfaceParams() const { return params; } - void Register(GPUVAddr gpu_addr_, VAddr cpu_addr_, u8* host_ptr_) { - ASSERT(!is_registered); - is_registered = true; - gpu_addr = gpu_addr_; - cpu_addr = cpu_addr_; - host_ptr = host_ptr_; - cache_addr = ToCacheAddr(host_ptr_); - DecorateSurfaceName(); + std::size_t GetSizeInBytes() const { + return memory_size; } - void Unregister() { - ASSERT(is_registered); - is_registered = false; + std::size_t GetHostSizeInBytes() const { + return host_memory_size; } - bool IsRegistered() const { - return is_registered; + std::size_t GetMipmapSize(const u32 level) const { + return mipmap_sizes[level]; } - std::size_t GetSizeInBytes() const { - return params.GetGuestSizeInBytes(); + bool MatchFormat(VideoCore::Surface::PixelFormat pixel_format) const { + return params.pixel_format == pixel_format; + } + + bool MatchTarget(VideoCore::Surface::SurfaceTarget target) const { + return params.target == target; + } + + bool MatchesTopology(const SurfaceParams& rhs) const { + const u32 src_bpp = params.GetBytesPerPixel(); + const u32 dst_bpp = rhs.GetBytesPerPixel(); + return std::tie(src_bpp, params.is_tiled) == std::tie(dst_bpp, rhs.is_tiled); + } + + bool MatchesStructure(const SurfaceParams& rhs) const { + if (params.is_tiled) { + const u32 a_width1 = params.GetBlockAlignedWidth(); + const u32 a_width2 = rhs.GetBlockAlignedWidth(); + return std::tie(a_width1, params.height, params.depth, params.block_width, + params.block_height, params.block_depth, params.tile_width_spacing) == + std::tie(a_width2, rhs.height, rhs.depth, rhs.block_width, rhs.block_height, + rhs.block_depth, rhs.tile_width_spacing); + } else { + return std::tie(params.width, params.height, params.pitch) == + std::tie(rhs.width, rhs.height, rhs.pitch); + } + } + + std::optional> GetLayerMipmap(const GPUVAddr candidate_gpu_addr) const { + if (candidate_gpu_addr < gpu_addr) + return {}; + const GPUVAddr relative_address = candidate_gpu_addr - gpu_addr; + const u32 layer = relative_address / layer_size; + const GPUVAddr mipmap_address = relative_address - layer_size * layer; + const auto mipmap_it = binary_find(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); + if (mipmap_it != mipmap_offsets.end()) { + return {{layer, std::distance(mipmap_offsets.begin(), mipmap_it)}}; + } + return {}; } - u8* GetStagingBufferLevelData(u32 level) { - return staging_buffer.data() + params.GetHostMipmapLevelOffset(level); + std::vector BreakDown() const { + auto set_up_copy = [](CopyParams& cp, const SurfaceParams& params, const u32 depth, + const u32 level) { + cp.source_x = 0; + cp.source_y = 0; + cp.source_z = 0; + cp.dest_x = 0; + cp.dest_y = 0; + cp.dest_z = 0; + cp.source_level = level; + cp.dest_level = level; + cp.width = params.GetMipWidth(level); + cp.height = params.GetMipHeight(level); + cp.depth = depth; + }; + const u32 layers = params.depth; + const u32 mipmaps = params.num_levels; + if (params.is_layered) { + std::vector result{layers * mipmaps}; + for (std::size_t layer = 0; layer < layers; layer++) { + const u32 layer_offset = layer * mipmaps; + for (std::size_t level = 0; level < mipmaps; level++) { + CopyParams& cp = result[layer_offset + level]; + set_up_copy(cp, params, layer, level); + } + } + return result; + } else { + std::vector result{mipmaps}; + for (std::size_t level = 0; level < mipmaps; level++) { + CopyParams& cp = result[level]; + set_up_copy(cp, params, params.GetMipDepth(level), level); + } + return result; + } } protected: - explicit SurfaceBaseImpl(const SurfaceParams& params); - ~SurfaceBaseImpl(); // non-virtual is intended + explicit SurfaceBaseImpl(const GPUVAddr gpu_vaddr, const SurfaceParams& params); + ~SurfaceBaseImpl() = default; virtual void DecorateSurfaceName() = 0; const SurfaceParams params; - -private: GPUVAddr gpu_addr{}; - VAddr cpu_addr{}; - u8* host_ptr{}; - CacheAddr cache_addr{}; - bool is_registered{}; + GPUVAddr gpu_addr_end{}; + std::vector mipmap_sizes; + std::vector mipmap_offsets; + const std::size_t layer_size; + const std::size_t memory_size; + const std::size_t host_memory_size; + u8* host_ptr; + VAddr cpu_addr; - std::vector staging_buffer; +private: + void SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, u8* buffer, + u32 level); }; -template +template class SurfaceBase : public SurfaceBaseImpl { public: - virtual void UploadTexture() = 0; + virtual void UploadTexture(std::vector& staging_buffer) = 0; - virtual void DownloadTexture() = 0; + virtual void DownloadTexture(std::vector& staging_buffer) = 0; - TView* TryGetView(GPUVAddr view_addr, const SurfaceParams& view_params) { - if (view_addr < GetGpuAddr() || !params.IsFamiliar(view_params)) { - // It can't be a view if it's in a prior address. - return {}; - } + void MarkAsModified(const bool is_modified_, const u64 tick) { + is_modified = is_modified_ || is_protected; + modification_tick = tick; + } - const auto relative_offset{static_cast(view_addr - GetGpuAddr())}; - const auto it{view_offset_map.find(relative_offset)}; - if (it == view_offset_map.end()) { - // Couldn't find an aligned view. - return {}; - } - const auto [layer, level] = it->second; + void MarkAsProtected(const bool is_protected) { + this->is_protected = is_protected; + } - if (!params.IsViewValid(view_params, layer, level)) { - return {}; - } + void MarkAsPicked(const bool is_picked) { + this->is_picked = is_picked; + } - return GetView(layer, view_params.GetNumLayers(), level, view_params.GetNumLevels()); + bool IsModified() const { + return is_modified; } - void MarkAsModified(bool is_modified_) { - is_modified = is_modified_; - if (is_modified_) { - modification_tick = texture_cache.Tick(); - } + bool IsProtected() const { + return is_protected; } - TView* GetView(GPUVAddr view_addr, const SurfaceParams& view_params) { - TView* view{TryGetView(view_addr, view_params)}; - ASSERT(view != nullptr); - return view; + bool IsRegistered() const { + return is_registered; } - bool IsModified() const { - return is_modified; + bool IsPicked() const { + return is_picked; + } + + void MarkAsRegistered(bool is_reg) { + is_registered = is_reg; } u64 GetModificationTick() const { return modification_tick; } + TView EmplaceOverview(const SurfaceParams& overview_params) { + ViewParams vp{}; + vp.base_level = 0; + vp.num_levels = params.num_levels; + vp.target = overview_params.target; + if (params.is_layered && !overview_params.is_layered) { + vp.base_layer = 0; + vp.num_layers = 1; + } else { + vp.base_layer = 0; + vp.num_layers = params.depth; + } + return GetView(vp); + } + + std::optional EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr) { + if (view_addr < gpu_addr) + return {}; + if (params.target == SurfaceTarget::Texture3D || view_params.target == SurfaceTarget::Texture3D) { + return {}; + } + const std::size_t size = view_params.GetGuestSizeInBytes(); + const GPUVAddr relative_address = view_addr - gpu_addr; + auto layer_mipmap = GetLayerMipmap(relative_address); + if (!layer_mipmap) { + return {}; + } + const u32 layer = (*layer_mipmap).first; + const u32 mipmap = (*layer_mipmap).second; + if (GetMipmapSize(mipmap) != size) { + // TODO: the view may cover many mimaps, this case can still go on + return {}; + } + ViewParams vp{}; + vp.base_layer = layer; + vp.num_layers = 1; + vp.base_level = mipmap; + vp.num_levels = 1; + vp.target = params.target; + return {GetView(vp)}; + } + + TView GetMainView() const { + return main_view; + } + protected: - explicit SurfaceBase(TTextureCache& texture_cache, const SurfaceParams& params) - : SurfaceBaseImpl{params}, texture_cache{texture_cache}, - view_offset_map{params.CreateViewOffsetMap()} {} + explicit SurfaceBase(const GPUVAddr gpu_addr, const SurfaceParams& params) + : SurfaceBaseImpl(gpu_addr, params) {} ~SurfaceBase() = default; - virtual std::unique_ptr CreateView(const ViewKey& view_key) = 0; + virtual TView CreateView(const ViewParams& view_key) = 0; + + std::unordered_map views; + TView main_view; private: - TView* GetView(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels) { - const ViewKey key{base_layer, num_layers, base_level, num_levels}; + TView GetView(const ViewParams& key) { const auto [entry, is_cache_miss] = views.try_emplace(key); auto& view{entry->second}; if (is_cache_miss) { view = CreateView(key); } - return view.get(); + return view; } - TTextureCache& texture_cache; - const std::map> view_offset_map; - - std::unordered_map> views; - bool is_modified{}; + bool is_protected{}; + bool is_registered{}; + bool is_picked{}; u64 modification_tick{}; }; diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index d1f8c53d5..d9052152c 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -7,6 +7,7 @@ #include "common/cityhash.h" #include "common/alignment.h" #include "core/core.h" +#include "video_core/engines/shader_bytecode.h" #include "video_core/surface.h" #include "video_core/texture_cache/surface_params.h" #include "video_core/textures/decoders.h" @@ -22,6 +23,37 @@ using VideoCore::Surface::PixelFormatFromTextureFormat; using VideoCore::Surface::SurfaceTarget; using VideoCore::Surface::SurfaceTargetFromTextureType; +SurfaceTarget TextureType2SurfaceTarget(Tegra::Shader::TextureType type, bool is_array) { + switch (type) { + case Tegra::Shader::TextureType::Texture1D: { + if (is_array) + return SurfaceTarget::Texture1DArray; + else + return SurfaceTarget::Texture1D; + } + case Tegra::Shader::TextureType::Texture2D: { + if (is_array) + return SurfaceTarget::Texture2DArray; + else + return SurfaceTarget::Texture2D; + } + case Tegra::Shader::TextureType::Texture3D: { + ASSERT(!is_array); + return SurfaceTarget::Texture3D; + } + case Tegra::Shader::TextureType::TextureCube: { + if (is_array) + return SurfaceTarget::TextureCubeArray; + else + return SurfaceTarget::TextureCubemap; + } + default: { + UNREACHABLE(); + return SurfaceTarget::Texture2D; + } + } +} + namespace { constexpr u32 GetMipmapSize(bool uncompressed, u32 mip_size, u32 tile) { return uncompressed ? mip_size : std::max(1U, (mip_size + tile - 1) / tile); @@ -29,7 +61,8 @@ constexpr u32 GetMipmapSize(bool uncompressed, u32 mip_size, u32 tile) { } // Anonymous namespace SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, - const Tegra::Texture::FullTextureInfo& config) { + const Tegra::Texture::FullTextureInfo& config, + const VideoCommon::Shader::Sampler& entry) { SurfaceParams params; params.is_tiled = config.tic.IsTiled(); params.srgb_conversion = config.tic.IsSrgbConversionEnabled(); @@ -41,7 +74,8 @@ SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, params.srgb_conversion); params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value()); params.type = GetFormatType(params.pixel_format); - params.target = SurfaceTargetFromTextureType(config.tic.texture_type); + // TODO: on 1DBuffer we should use the tic info. + params.target = TextureType2SurfaceTarget(entry.GetType(), entry.IsArray()); params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format)); params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format)); params.depth = config.tic.Depth(); @@ -52,8 +86,7 @@ SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, params.pitch = params.is_tiled ? 0 : config.tic.Pitch(); params.unaligned_height = config.tic.Height(); params.num_levels = config.tic.max_mip_level + 1; - - params.CalculateCachedValues(); + params.is_layered = params.IsLayered(); return params; } @@ -77,8 +110,7 @@ SurfaceParams SurfaceParams::CreateForDepthBuffer( params.target = SurfaceTarget::Texture2D; params.depth = 1; params.num_levels = 1; - - params.CalculateCachedValues(); + params.is_layered = false; return params; } @@ -108,8 +140,7 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz params.unaligned_height = config.height; params.target = SurfaceTarget::Texture2D; params.num_levels = 1; - - params.CalculateCachedValues(); + params.is_layered = false; return params; } @@ -128,13 +159,13 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface( params.type = GetFormatType(params.pixel_format); params.width = config.width; params.height = config.height; + params.pitch = config.pitch; params.unaligned_height = config.height; // TODO(Rodrigo): Try to guess the surface target from depth and layer parameters params.target = SurfaceTarget::Texture2D; params.depth = 1; params.num_levels = 1; - - params.CalculateCachedValues(); + params.is_layered = params.IsLayered(); return params; } @@ -147,7 +178,7 @@ u32 SurfaceParams::GetMipHeight(u32 level) const { } u32 SurfaceParams::GetMipDepth(u32 level) const { - return IsLayered() ? depth : std::max(1U, depth >> level); + return is_layered ? depth : std::max(1U, depth >> level); } bool SurfaceParams::IsLayered() const { @@ -183,7 +214,7 @@ u32 SurfaceParams::GetMipBlockDepth(u32 level) const { if (level == 0) { return this->block_depth; } - if (IsLayered()) { + if (is_layered) { return 1; } @@ -216,6 +247,10 @@ std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level) const { return offset; } +std::size_t SurfaceParams::GetGuestMipmapSize(u32 level) const { + return GetInnerMipmapMemorySize(level, false, false); +} + std::size_t SurfaceParams::GetHostMipmapSize(u32 level) const { return GetInnerMipmapMemorySize(level, true, false) * GetNumLayers(); } @@ -229,7 +264,7 @@ std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) co for (u32 level = 0; level < num_levels; ++level) { size += GetInnerMipmapMemorySize(level, as_host_size, uncompressed); } - if (is_tiled && (IsLayered() || target == SurfaceTarget::Texture3D)) { + if (is_tiled && is_layered) { return Common::AlignUp(size, Tegra::Texture::GetGOBSize() * block_height * block_depth); } return size; @@ -256,150 +291,32 @@ u32 SurfaceParams::GetBytesPerPixel() const { return VideoCore::Surface::GetBytesPerPixel(pixel_format); } -bool SurfaceParams::IsFamiliar(const SurfaceParams& view_params) const { - if (std::tie(is_tiled, tile_width_spacing, pixel_format, component_type, type) != - std::tie(view_params.is_tiled, view_params.tile_width_spacing, view_params.pixel_format, - view_params.component_type, view_params.type)) { - return false; - } - - const SurfaceTarget view_target{view_params.target}; - if (view_target == target) { - return true; - } - - switch (target) { - case SurfaceTarget::Texture1D: - case SurfaceTarget::Texture2D: - case SurfaceTarget::Texture3D: - return false; - case SurfaceTarget::Texture1DArray: - return view_target == SurfaceTarget::Texture1D; - case SurfaceTarget::Texture2DArray: - return view_target == SurfaceTarget::Texture2D; - case SurfaceTarget::TextureCubemap: - return view_target == SurfaceTarget::Texture2D || - view_target == SurfaceTarget::Texture2DArray; - case SurfaceTarget::TextureCubeArray: - return view_target == SurfaceTarget::Texture2D || - view_target == SurfaceTarget::Texture2DArray || - view_target == SurfaceTarget::TextureCubemap; - default: - UNIMPLEMENTED_MSG("Unimplemented texture family={}", static_cast(target)); - return false; - } -} - bool SurfaceParams::IsPixelFormatZeta() const { return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat && pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat; } -void SurfaceParams::CalculateCachedValues() { - switch (target) { - case SurfaceTarget::Texture1D: - case SurfaceTarget::Texture2D: - case SurfaceTarget::Texture3D: - num_layers = 1; - break; - case SurfaceTarget::Texture1DArray: - case SurfaceTarget::Texture2DArray: - case SurfaceTarget::TextureCubemap: - case SurfaceTarget::TextureCubeArray: - num_layers = depth; - break; - default: - UNREACHABLE(); - } - - guest_size_in_bytes = GetInnerMemorySize(false, false, false); - - if (IsPixelFormatASTC(pixel_format)) { - // ASTC is uncompressed in software, in emulated as RGBA8 - host_size_in_bytes = static_cast(width) * static_cast(height) * - static_cast(depth) * 4ULL; - } else { - host_size_in_bytes = GetInnerMemorySize(true, false, false); - } -} - std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool uncompressed) const { const bool tiled{as_host_size ? false : is_tiled}; const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())}; const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())}; - const u32 depth{target == SurfaceTarget::Texture3D ? GetMipDepth(level) : 1U}; + const u32 depth{is_layered ? 1U : GetMipDepth(level)}; return Tegra::Texture::CalculateSize(tiled, GetBytesPerPixel(), width, height, depth, GetMipBlockHeight(level), GetMipBlockDepth(level)); } std::size_t SurfaceParams::GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const { - return GetLayerSize(as_host_size, uncompressed) * (layer_only ? 1U : num_layers); -} - -std::map> SurfaceParams::CreateViewOffsetMap() const { - std::map> view_offset_map; - switch (target) { - case SurfaceTarget::Texture1D: - case SurfaceTarget::Texture2D: - case SurfaceTarget::Texture3D: { - // TODO(Rodrigo): Add layer iterations for 3D textures - constexpr u32 layer = 0; - for (u32 level = 0; level < num_levels; ++level) { - const std::size_t offset{GetGuestMipmapLevelOffset(level)}; - view_offset_map.insert({offset, {layer, level}}); - } - break; - } - case SurfaceTarget::Texture1DArray: - case SurfaceTarget::Texture2DArray: - case SurfaceTarget::TextureCubemap: - case SurfaceTarget::TextureCubeArray: { - const std::size_t layer_size{GetGuestLayerSize()}; - for (u32 level = 0; level < num_levels; ++level) { - const std::size_t level_offset{GetGuestMipmapLevelOffset(level)}; - for (u32 layer = 0; layer < num_layers; ++layer) { - const auto layer_offset{static_cast(layer_size * layer)}; - const std::size_t offset{level_offset + layer_offset}; - view_offset_map.insert({offset, {layer, level}}); - } - } - break; - } - default: - UNIMPLEMENTED_MSG("Unimplemented surface target {}", static_cast(target)); - } - return view_offset_map; -} - -bool SurfaceParams::IsViewValid(const SurfaceParams& view_params, u32 layer, u32 level) const { - return IsDimensionValid(view_params, level) && IsDepthValid(view_params, level) && - IsInBounds(view_params, layer, level); + return GetLayerSize(as_host_size, uncompressed) * (layer_only ? 1U : depth); } -bool SurfaceParams::IsDimensionValid(const SurfaceParams& view_params, u32 level) const { - return view_params.width == GetMipWidth(level) && view_params.height == GetMipHeight(level); -} - -bool SurfaceParams::IsDepthValid(const SurfaceParams& view_params, u32 level) const { - if (view_params.target != SurfaceTarget::Texture3D) { - return true; - } - return view_params.depth == GetMipDepth(level); -} - -bool SurfaceParams::IsInBounds(const SurfaceParams& view_params, u32 layer, u32 level) const { - return layer + view_params.num_layers <= num_layers && - level + view_params.num_levels <= num_levels; -} - -std::size_t HasheableSurfaceParams::Hash() const { +std::size_t SurfaceParams::Hash() const { return static_cast( Common::CityHash64(reinterpret_cast(this), sizeof(*this))); } -bool HasheableSurfaceParams::operator==(const HasheableSurfaceParams& rhs) const { +bool SurfaceParams::operator==(const SurfaceParams& rhs) const { return std::tie(is_tiled, block_width, block_height, block_depth, tile_width_spacing, width, height, depth, pitch, unaligned_height, num_levels, pixel_format, component_type, type, target) == @@ -409,4 +326,27 @@ bool HasheableSurfaceParams::operator==(const HasheableSurfaceParams& rhs) const rhs.type, rhs.target); } +std::string SurfaceParams::TargetName() const { + switch (target) { + case SurfaceTarget::Texture1D: + return "1D"; + case SurfaceTarget::Texture2D: + return "2D"; + case SurfaceTarget::Texture3D: + return "3D"; + case SurfaceTarget::Texture1DArray: + return "1DArray"; + case SurfaceTarget::Texture2DArray: + return "2DArray"; + case SurfaceTarget::TextureCubemap: + return "Cube"; + case SurfaceTarget::TextureCubeArray: + return "CubeArray"; + default: + LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast(target)); + UNREACHABLE(); + return fmt::format("TUK({})", static_cast(target)); + } +} + } // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index 77dc0ba66..ec8efa210 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -6,50 +6,21 @@ #include +#include "common/alignment.h" #include "common/common_types.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/surface.h" +#include "video_core/shader/shader_ir.h" namespace VideoCommon { -class HasheableSurfaceParams { -public: - std::size_t Hash() const; - - bool operator==(const HasheableSurfaceParams& rhs) const; - - bool operator!=(const HasheableSurfaceParams& rhs) const { - return !operator==(rhs); - } - -protected: - // Avoid creation outside of a managed environment. - HasheableSurfaceParams() = default; - - bool is_tiled; - bool srgb_conversion; - u32 block_width; - u32 block_height; - u32 block_depth; - u32 tile_width_spacing; - u32 width; - u32 height; - u32 depth; - u32 pitch; - u32 unaligned_height; - u32 num_levels; - VideoCore::Surface::PixelFormat pixel_format; - VideoCore::Surface::ComponentType component_type; - VideoCore::Surface::SurfaceType type; - VideoCore::Surface::SurfaceTarget target; -}; - -class SurfaceParams final : public HasheableSurfaceParams { +class SurfaceParams { public: /// Creates SurfaceCachedParams from a texture configuration. static SurfaceParams CreateForTexture(Core::System& system, - const Tegra::Texture::FullTextureInfo& config); + const Tegra::Texture::FullTextureInfo& config, + const VideoCommon::Shader::Sampler& entry); /// Creates SurfaceCachedParams for a depth buffer configuration. static SurfaceParams CreateForDepthBuffer( @@ -64,68 +35,33 @@ public: static SurfaceParams CreateForFermiCopySurface( const Tegra::Engines::Fermi2D::Regs::Surface& config); - bool IsTiled() const { - return is_tiled; - } - - bool GetSrgbConversion() const { - return srgb_conversion; - } - - u32 GetBlockWidth() const { - return block_width; - } - - u32 GetTileWidthSpacing() const { - return tile_width_spacing; - } - - u32 GetWidth() const { - return width; - } - - u32 GetHeight() const { - return height; - } - - u32 GetDepth() const { - return depth; - } - - u32 GetPitch() const { - return pitch; - } - - u32 GetNumLevels() const { - return num_levels; - } - - VideoCore::Surface::PixelFormat GetPixelFormat() const { - return pixel_format; - } - - VideoCore::Surface::ComponentType GetComponentType() const { - return component_type; - } + std::size_t Hash() const; - VideoCore::Surface::SurfaceTarget GetTarget() const { - return target; - } + bool operator==(const SurfaceParams& rhs) const; - VideoCore::Surface::SurfaceType GetType() const { - return type; + bool operator!=(const SurfaceParams& rhs) const { + return !operator==(rhs); } std::size_t GetGuestSizeInBytes() const { - return guest_size_in_bytes; + return GetInnerMemorySize(false, false, false); } std::size_t GetHostSizeInBytes() const { + std::size_t host_size_in_bytes; + if (IsPixelFormatASTC(pixel_format)) { + // ASTC is uncompressed in software, in emulated as RGBA8 + host_size_in_bytes = static_cast(Common::AlignUp(width, GetDefaultBlockWidth())) * + static_cast(Common::AlignUp(height, GetDefaultBlockHeight())) * + static_cast(depth) * 4ULL; + } else { + host_size_in_bytes = GetInnerMemorySize(true, false, false); + } return host_size_in_bytes; } - u32 GetNumLayers() const { - return num_layers; + u32 GetBlockAlignedWidth() const { + return Common::AlignUp(width, 64 / GetBytesPerPixel()); } /// Returns the width of a given mipmap level. @@ -137,9 +73,6 @@ public: /// Returns the depth of a given mipmap level. u32 GetMipDepth(u32 level) const; - /// Returns true if these parameters are from a layered surface. - bool IsLayered() const; - /// Returns the block height of a given mipmap level. u32 GetMipBlockHeight(u32 level) const; @@ -152,6 +85,9 @@ public: /// Returns the offset in bytes in host memory (linear) of a given mipmap level. std::size_t GetHostMipmapLevelOffset(u32 level) const; + /// Returns the size in bytes in guest memory of a given mipmap level. + std::size_t GetGuestMipmapSize(u32 level) const; + /// Returns the size in bytes in host memory (linear) of a given mipmap level. std::size_t GetHostMipmapSize(u32 level) const; @@ -173,24 +109,30 @@ public: /// Returns the bytes per pixel. u32 GetBytesPerPixel() const; - /// Returns true if another surface can be familiar with this. This is a loosely defined term - /// that reflects the possibility of these two surface parameters potentially being part of a - /// bigger superset. - bool IsFamiliar(const SurfaceParams& view_params) const; - /// Returns true if the pixel format is a depth and/or stencil format. bool IsPixelFormatZeta() const; - /// Creates a map that redirects an address difference to a layer and mipmap level. - std::map> CreateViewOffsetMap() const; + std::string TargetName() const; - /// Returns true if the passed surface view parameters is equal or a valid subset of this. - bool IsViewValid(const SurfaceParams& view_params, u32 layer, u32 level) const; + bool is_tiled; + bool srgb_conversion; + bool is_layered; + u32 block_width; + u32 block_height; + u32 block_depth; + u32 tile_width_spacing; + u32 width; + u32 height; + u32 depth; + u32 pitch; + u32 unaligned_height; + u32 num_levels; + VideoCore::Surface::PixelFormat pixel_format; + VideoCore::Surface::ComponentType component_type; + VideoCore::Surface::SurfaceType type; + VideoCore::Surface::SurfaceTarget target; private: - /// Calculates values that can be deduced from HasheableSurfaceParams. - void CalculateCachedValues(); - /// Returns the size of a given mipmap level inside a layer. std::size_t GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool uncompressed) const; @@ -200,19 +142,12 @@ private: /// Returns the size of a layer std::size_t GetLayerSize(bool as_host_size, bool uncompressed) const; - /// Returns true if the passed view width and height match the size of this params in a given - /// mipmap level. - bool IsDimensionValid(const SurfaceParams& view_params, u32 level) const; - - /// Returns true if the passed view depth match the size of this params in a given mipmap level. - bool IsDepthValid(const SurfaceParams& view_params, u32 level) const; - - /// Returns true if the passed view layers and mipmap levels are in bounds. - bool IsInBounds(const SurfaceParams& view_params, u32 layer, u32 level) const; + std::size_t GetNumLayers() const { + return is_layered ? depth : 1; + } - std::size_t guest_size_in_bytes; - std::size_t host_size_in_bytes; - u32 num_layers; + /// Returns true if these parameters are from a layered surface. + bool IsLayered() const; }; } // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_view.cpp b/src/video_core/texture_cache/surface_view.cpp index 5f4cdbb1c..467696a4c 100644 --- a/src/video_core/texture_cache/surface_view.cpp +++ b/src/video_core/texture_cache/surface_view.cpp @@ -9,15 +9,15 @@ namespace VideoCommon { -std::size_t ViewKey::Hash() const { +std::size_t ViewParams::Hash() const { return static_cast(base_layer) ^ static_cast(num_layers << 16) ^ - (static_cast(base_level) << 32) ^ - (static_cast(num_levels) << 48); + (static_cast(base_level) << 24) ^ + (static_cast(num_levels) << 32) ^ (static_cast(target) << 36); } -bool ViewKey::operator==(const ViewKey& rhs) const { - return std::tie(base_layer, num_layers, base_level, num_levels) == - std::tie(rhs.base_layer, rhs.num_layers, rhs.base_level, rhs.num_levels); +bool ViewParams::operator==(const ViewParams& rhs) const { + return std::tie(base_layer, num_layers, base_level, num_levels, target) == + std::tie(rhs.base_layer, rhs.num_layers, rhs.base_level, rhs.num_levels, rhs.target); } } // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_view.h b/src/video_core/texture_cache/surface_view.h index e73d8f6ae..c122800a6 100644 --- a/src/video_core/texture_cache/surface_view.h +++ b/src/video_core/texture_cache/surface_view.h @@ -7,18 +7,45 @@ #include #include "common/common_types.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/surface_params.h" namespace VideoCommon { -struct ViewKey { +struct ViewParams { std::size_t Hash() const; - bool operator==(const ViewKey& rhs) const; + bool operator==(const ViewParams& rhs) const; u32 base_layer{}; u32 num_layers{}; u32 base_level{}; u32 num_levels{}; + VideoCore::Surface::SurfaceTarget target; + bool IsLayered() const { + switch (target) { + case VideoCore::Surface::SurfaceTarget::Texture1DArray: + case VideoCore::Surface::SurfaceTarget::Texture2DArray: + case VideoCore::Surface::SurfaceTarget::TextureCubemap: + case VideoCore::Surface::SurfaceTarget::TextureCubeArray: + return true; + default: + return false; + } + } +}; + +class ViewBase { +public: + ViewBase(const ViewParams& params) : params{params} {} + ~ViewBase() = default; + + const ViewParams& GetViewParams() const { + return params; + } + +protected: + ViewParams params; }; } // namespace VideoCommon @@ -26,8 +53,8 @@ struct ViewKey { namespace std { template <> -struct hash { - std::size_t operator()(const VideoCommon::ViewKey& k) const noexcept { +struct hash { + std::size_t operator()(const VideoCommon::ViewParams& k) const noexcept { return k.Hash(); } }; -- cgit v1.2.3 From bc930754cc9437ddd86e7d246b3eb4302540896a Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 7 May 2019 10:57:16 -0400 Subject: Implement Texture Cache V2 --- src/video_core/texture_cache/texture_cache.h | 454 +++++++++++++++++++++------ 1 file changed, 366 insertions(+), 88 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index c5c01957a..eb0d9bc10 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -22,6 +22,7 @@ #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" #include "video_core/surface.h" +#include "video_core/texture_cache/copy_params.h" #include "video_core/texture_cache/surface_base.h" #include "video_core/texture_cache/surface_params.h" #include "video_core/texture_cache/surface_view.h" @@ -40,32 +41,42 @@ class RasterizerInterface; namespace VideoCommon { +using VideoCore::Surface::SurfaceTarget; +using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; + template class TextureCache { - using IntervalMap = boost::icl::interval_map>>; + using IntervalMap = boost::icl::interval_map>; using IntervalType = typename IntervalMap::interval_type; public: + void InitMemoryMananger(Tegra::MemoryManager& memory_manager) { + this->memory_manager = &memory_manager; + } + void InvalidateRegion(CacheAddr addr, std::size_t size) { for (const auto& surface : GetSurfacesInRegion(addr, size)) { - if (!surface->IsRegistered()) { - // Skip duplicates - continue; - } Unregister(surface); } } - TView* GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) { + void InvalidateRegionEx(GPUVAddr addr, std::size_t size) { + for (const auto& surface : GetSurfacesInRegionInner(addr, size)) { + Unregister(surface); + } + } + + TView GetTextureSurface(const Tegra::Texture::FullTextureInfo& config, + const VideoCommon::Shader::Sampler& entry) { const auto gpu_addr{config.tic.Address()}; if (!gpu_addr) { return {}; } - const auto params{SurfaceParams::CreateForTexture(system, config)}; - return GetSurfaceView(gpu_addr, params, true); + const auto params{SurfaceParams::CreateForTexture(system, config, entry)}; + return GetSurface(gpu_addr, params, true).second; } - TView* GetDepthBufferSurface(bool preserve_contents) { + TView GetDepthBufferSurface(bool preserve_contents) { const auto& regs{system.GPU().Maxwell3D().regs}; const auto gpu_addr{regs.zeta.Address()}; if (!gpu_addr || !regs.zeta_enable) { @@ -75,36 +86,75 @@ public: system, regs.zeta_width, regs.zeta_height, regs.zeta.format, regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height, regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; - return GetSurfaceView(gpu_addr, depth_params, preserve_contents); + auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents); + if (depth_buffer.target) + depth_buffer.target->MarkAsProtected(false); + if (depth_buffer.target) + depth_buffer.target->MarkAsProtected(true); + return surface_view.second; } - TView* GetColorBufferSurface(std::size_t index, bool preserve_contents) { + TView GetColorBufferSurface(std::size_t index, bool preserve_contents) { ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); const auto& regs{system.GPU().Maxwell3D().regs}; if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { + SetEmptyColorBuffer(index); return {}; } - auto& memory_manager{system.GPU().MemoryManager()}; - const auto& config{system.GPU().Maxwell3D().regs.rt[index]}; - const auto gpu_addr{config.Address() + - config.base_layer * config.layer_stride * sizeof(u32)}; + const auto& config{regs.rt[index]}; + const auto gpu_addr{config.Address()}; if (!gpu_addr) { + SetEmptyColorBuffer(index); return {}; } - return GetSurfaceView(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), - preserve_contents); + auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), + preserve_contents); + if (render_targets[index].target) + render_targets[index].target->MarkAsProtected(false); + render_targets[index].target = surface_view.first; + if (render_targets[index].target) + render_targets[index].target->MarkAsProtected(true); + return surface_view.second; + } + + void MarkColorBufferInUse(std::size_t index) { + if (render_targets[index].target) + render_targets[index].target->MarkAsModified(true, Tick()); } - TView* GetFermiSurface(const Tegra::Engines::Fermi2D::Regs::Surface& config) { - return GetSurfaceView(config.Address(), SurfaceParams::CreateForFermiCopySurface(config), - true); + void MarkDepthBufferInUse() { + if (depth_buffer.target) + depth_buffer.target->MarkAsModified(true, Tick()); } - std::shared_ptr TryFindFramebufferSurface(const u8* host_ptr) const { + void SetEmptyDepthBuffer() { + if (depth_buffer.target != nullptr) { + depth_buffer.target->MarkAsProtected(false); + depth_buffer.target = nullptr; + depth_buffer.view = nullptr; + } + } + + void SetEmptyColorBuffer(std::size_t index) { + if (render_targets[index].target != nullptr) { + render_targets[index].target->MarkAsProtected(false); + std::memset(&render_targets[index].config, sizeof(RenderTargetConfig), 0); + render_targets[index].target = nullptr; + render_targets[index].view = nullptr; + } + } + + TView GetFermiSurface(const Tegra::Engines::Fermi2D::Regs::Surface& config) { + SurfaceParams params = SurfaceParams::CreateForFermiCopySurface(config); + const GPUVAddr gpu_addr = config.Address(); + return GetSurface(gpu_addr, params, true).second; + } + + TSurface TryFindFramebufferSurface(const u8* host_ptr) const { const auto it{registered_surfaces.find(ToCacheAddr(host_ptr))}; return it != registered_surfaces.end() ? *it->second.begin() : nullptr; } @@ -115,126 +165,334 @@ public: protected: TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) - : system{system}, rasterizer{rasterizer} {} + : system{system}, rasterizer{rasterizer} { + for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { + SetEmptyColorBuffer(i); + } + SetEmptyDepthBuffer(); + } ~TextureCache() = default; - virtual TView* TryFastGetSurfaceView( - GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, const SurfaceParams& params, - bool preserve_contents, const std::vector>& overlaps) = 0; + virtual TSurface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) = 0; - virtual std::shared_ptr CreateSurface(const SurfaceParams& params) = 0; + virtual void ImageCopy(TSurface src_surface, TSurface dst_surface, + const CopyParams& copy_params) = 0; - void Register(std::shared_ptr surface, GPUVAddr gpu_addr, VAddr cpu_addr, - u8* host_ptr) { - surface->Register(gpu_addr, cpu_addr, host_ptr); - registered_surfaces.add({GetSurfaceInterval(surface), {surface}}); - rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), 1); + void Register(TSurface surface) { + const GPUVAddr gpu_addr = surface->GetGpuAddr(); + u8* host_ptr = memory_manager->GetPointer(gpu_addr); + const std::size_t size = surface->GetSizeInBytes(); + const std::optional cpu_addr = memory_manager->GpuToCpuAddress(gpu_addr); + if (!host_ptr || !cpu_addr) { + LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", + gpu_addr); + return; + } + surface->SetHostPtr(host_ptr); + surface->SetCpuAddr(*cpu_addr); + registered_surfaces.add({GetInterval(host_ptr, size), {surface}}); + rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); + RegisterInnerCache(surface); + surface->MarkAsRegistered(true); } - void Unregister(std::shared_ptr surface) { - registered_surfaces.subtract({GetSurfaceInterval(surface), {surface}}); - rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), -1); - surface->Unregister(); + void Unregister(TSurface surface) { + if (surface->IsProtected()) + return; + const GPUVAddr gpu_addr = surface->GetGpuAddr(); + const void* host_ptr = surface->GetHostPtr(); + const std::size_t size = surface->GetSizeInBytes(); + const VAddr cpu_addr = surface->GetCpuAddr(); + registered_surfaces.erase(GetInterval(host_ptr, size)); + rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); + UnregisterInnerCache(surface); + surface->MarkAsRegistered(false); + ReserveSurface(surface->GetSurfaceParams(), surface); } - std::shared_ptr GetUncachedSurface(const SurfaceParams& params) { - if (const auto surface = TryGetReservedSurface(params); surface) + TSurface GetUncachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { + if (const auto surface = TryGetReservedSurface(params); surface) { + surface->SetGpuAddr(gpu_addr); return surface; + } // No reserved surface available, create a new one and reserve it - auto new_surface{CreateSurface(params)}; - ReserveSurface(params, new_surface); + auto new_surface{CreateSurface(gpu_addr, params)}; return new_surface; } Core::System& system; private: - TView* GetSurfaceView(GPUVAddr gpu_addr, const SurfaceParams& params, bool preserve_contents) { - auto& memory_manager{system.GPU().MemoryManager()}; - const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; - DEBUG_ASSERT(cpu_addr); - - const auto host_ptr{memory_manager.GetPointer(gpu_addr)}; - const auto cache_addr{ToCacheAddr(host_ptr)}; - auto overlaps{GetSurfacesInRegion(cache_addr, params.GetGuestSizeInBytes())}; - if (overlaps.empty()) { - return LoadSurfaceView(gpu_addr, *cpu_addr, host_ptr, params, preserve_contents); + enum class RecycleStrategy : u32 { + Ignore = 0, + Flush = 1, + BufferCopy = 3, + }; + + RecycleStrategy PickStrategy(std::vector& overlaps, const SurfaceParams& params, + const GPUVAddr gpu_addr, const bool untopological) { + // Untopological decision + if (untopological) { + return RecycleStrategy::Ignore; + } + // 3D Textures decision + if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) { + return RecycleStrategy::Flush; } + for (auto s : overlaps) { + const auto& s_params = s->GetSurfaceParams(); + if (s_params.block_depth > 1 || s_params.target == SurfaceTarget::Texture3D) { + return RecycleStrategy::Flush; + } + } + return RecycleStrategy::Ignore; + } - if (overlaps.size() == 1) { - if (TView* view = overlaps[0]->TryGetView(gpu_addr, params); view) { - return view; + std::pair RecycleSurface(std::vector& overlaps, + const SurfaceParams& params, const GPUVAddr gpu_addr, + const u8* host_ptr, const bool preserve_contents, + const bool untopological) { + for (auto surface : overlaps) { + Unregister(surface); + } + RecycleStrategy strategy = !Settings::values.use_accurate_gpu_emulation + ? PickStrategy(overlaps, params, gpu_addr, untopological) + : RecycleStrategy::Flush; + switch (strategy) { + case RecycleStrategy::Ignore: { + return InitializeSurface(gpu_addr, params, preserve_contents); + } + case RecycleStrategy::Flush: { + std::sort(overlaps.begin(), overlaps.end(), + [](const TSurface& a, const TSurface& b) -> bool { + return a->GetModificationTick() < b->GetModificationTick(); + }); + for (auto surface : overlaps) { + FlushSurface(surface); } + return InitializeSurface(gpu_addr, params, preserve_contents); } + default: { + UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!"); + return InitializeSurface(gpu_addr, params, preserve_contents); + } + } + } - const auto fast_view{TryFastGetSurfaceView(gpu_addr, *cpu_addr, host_ptr, params, - preserve_contents, overlaps)}; + std::pair RebuildMirage(TSurface current_surface, + const SurfaceParams& params) { + const auto gpu_addr = current_surface->GetGpuAddr(); + TSurface new_surface = GetUncachedSurface(gpu_addr, params); + std::vector bricks = current_surface->BreakDown(); + for (auto& brick : bricks) { + ImageCopy(current_surface, new_surface, brick); + } + Unregister(current_surface); + Register(new_surface); + return {new_surface, new_surface->GetMainView()}; + } - if (!fast_view) { - std::sort(overlaps.begin(), overlaps.end(), [](const auto& lhs, const auto& rhs) { - return lhs->GetModificationTick() < rhs->GetModificationTick(); - }); + std::pair ManageStructuralMatch(TSurface current_surface, + const SurfaceParams& params) { + const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); + if (is_mirage) { + return RebuildMirage(current_surface, params); } + const bool matches_target = current_surface->MatchTarget(params.target); + if (matches_target) { + return {current_surface, current_surface->GetMainView()}; + } + return {current_surface, current_surface->EmplaceOverview(params)}; + } - for (const auto& surface : overlaps) { - if (!fast_view) { - // Flush even when we don't care about the contents, to preserve memory not - // written by the new surface. - FlushSurface(surface); + std::optional> ReconstructSurface(std::vector& overlaps, + const SurfaceParams& params, + const GPUVAddr gpu_addr, + const u8* host_ptr) { + if (!params.is_layered || params.target == SurfaceTarget::Texture3D) { + return {}; + } + TSurface new_surface = GetUncachedSurface(gpu_addr, params); + for (auto surface : overlaps) { + const SurfaceParams& src_params = surface->GetSurfaceParams(); + if (src_params.is_layered || src_params.num_levels > 1) { + // We send this cases to recycle as they are more complex to handle + return {}; + } + const std::size_t candidate_size = src_params.GetGuestSizeInBytes(); + auto mipmap_layer = new_surface->GetLayerMipmap(surface->GetGpuAddr()); + if (!mipmap_layer) { + return {}; } + const u32 layer = (*mipmap_layer).first; + const u32 mipmap = (*mipmap_layer).second; + if (new_surface->GetMipmapSize(mipmap) != candidate_size) { + return {}; + } + // Now we got all the data set up + CopyParams copy_params{}; + const u32 dst_width = params.GetMipWidth(mipmap); + const u32 dst_height = params.GetMipHeight(mipmap); + copy_params.width = std::min(src_params.width, dst_width); + copy_params.height = std::min(src_params.height, dst_height); + copy_params.depth = 1; + copy_params.source_level = 0; + copy_params.dest_level = mipmap; + copy_params.source_z = 0; + copy_params.dest_z = layer; + ImageCopy(surface, new_surface, copy_params); + } + for (auto surface : overlaps) { Unregister(surface); } - if (fast_view) { - return fast_view; + Register(new_surface); + return {{new_surface, new_surface->GetMainView()}}; + } + + std::pair GetSurface(const GPUVAddr gpu_addr, const SurfaceParams& params, + bool preserve_contents) { + + const auto host_ptr{memory_manager->GetPointer(gpu_addr)}; + const auto cache_addr{ToCacheAddr(host_ptr)}; + const std::size_t candidate_size = params.GetGuestSizeInBytes(); + auto overlaps{GetSurfacesInRegionInner(gpu_addr, candidate_size)}; + if (overlaps.empty()) { + return InitializeSurface(gpu_addr, params, preserve_contents); + } + + for (auto surface : overlaps) { + if (!surface->MatchesTopology(params)) { + return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, + true); + } } - return LoadSurfaceView(gpu_addr, *cpu_addr, host_ptr, params, preserve_contents); + if (overlaps.size() == 1) { + TSurface current_surface = overlaps[0]; + if (current_surface->MatchesStructure(params) && + current_surface->GetGpuAddr() == gpu_addr && + (params.target != SurfaceTarget::Texture3D || + current_surface->MatchTarget(params.target))) { + return ManageStructuralMatch(current_surface, params); + } + if (current_surface->GetSizeInBytes() <= candidate_size) { + return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, + false); + } + std::optional view = current_surface->EmplaceView(params, gpu_addr); + if (view.has_value()) { + const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); + if (is_mirage) { + LOG_CRITICAL(HW_GPU, "Mirage View Unsupported"); + return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, + false); + } + return {current_surface, *view}; + } + return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, false); + } else { + std::optional> view = + ReconstructSurface(overlaps, params, gpu_addr, host_ptr); + if (view.has_value()) { + return *view; + } + return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, false); + } } - TView* LoadSurfaceView(GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, - const SurfaceParams& params, bool preserve_contents) { - const auto new_surface{GetUncachedSurface(params)}; - Register(new_surface, gpu_addr, cpu_addr, host_ptr); + std::pair InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params, + bool preserve_contents) { + auto new_surface{GetUncachedSurface(gpu_addr, params)}; + Register(new_surface); if (preserve_contents) { LoadSurface(new_surface); } - return new_surface->GetView(gpu_addr, params); + return {new_surface, new_surface->GetMainView()}; } - void LoadSurface(const std::shared_ptr& surface) { - surface->LoadBuffer(); - surface->UploadTexture(); - surface->MarkAsModified(false); + void LoadSurface(const TSurface& surface) { + staging_buffer.resize(surface->GetHostSizeInBytes()); + surface->LoadBuffer(*memory_manager, staging_buffer); + surface->UploadTexture(staging_buffer); + surface->MarkAsModified(false, Tick()); } - void FlushSurface(const std::shared_ptr& surface) { + void FlushSurface(const TSurface& surface) { if (!surface->IsModified()) { return; } - surface->DownloadTexture(); - surface->FlushBuffer(); + staging_buffer.resize(surface->GetHostSizeInBytes()); + surface->DownloadTexture(staging_buffer); + surface->FlushBuffer(staging_buffer); + surface->MarkAsModified(false, Tick()); } - std::vector> GetSurfacesInRegion(CacheAddr cache_addr, - std::size_t size) const { + std::vector GetSurfacesInRegion(CacheAddr cache_addr, std::size_t size) const { if (size == 0) { return {}; } const IntervalType interval{cache_addr, cache_addr + size}; - std::vector> surfaces; + std::vector surfaces; for (auto& pair : boost::make_iterator_range(registered_surfaces.equal_range(interval))) { - surfaces.push_back(*pair.second.begin()); + for (auto& s : pair.second) { + if (!s || !s->IsRegistered()) { + continue; + } + surfaces.push_back(s); + } } return surfaces; } - void ReserveSurface(const SurfaceParams& params, std::shared_ptr surface) { + void RegisterInnerCache(TSurface& surface) { + GPUVAddr start = surface->GetGpuAddr() >> inner_cache_page_bits; + const GPUVAddr end = (surface->GetGpuAddrEnd() - 1) >> inner_cache_page_bits; + while (start <= end) { + inner_cache[start].push_back(surface); + start++; + } + } + + void UnregisterInnerCache(TSurface& surface) { + GPUVAddr start = surface->GetGpuAddr() >> inner_cache_page_bits; + const GPUVAddr end = (surface->GetGpuAddrEnd() - 1) >> inner_cache_page_bits; + while (start <= end) { + inner_cache[start].remove(surface); + start++; + } + } + + std::vector GetSurfacesInRegionInner(const GPUVAddr gpu_addr, const std::size_t size) { + if (size == 0) { + return {}; + } + const GPUVAddr gpu_addr_end = gpu_addr + size; + GPUVAddr start = gpu_addr >> inner_cache_page_bits; + const GPUVAddr end = (gpu_addr_end - 1) >> inner_cache_page_bits; + std::vector surfaces; + while (start <= end) { + std::list& list = inner_cache[start]; + for (auto& s : list) { + if (!s->IsPicked() && s->Overlaps(gpu_addr, gpu_addr_end)) { + s->MarkAsPicked(true); + surfaces.push_back(s); + } + } + start++; + } + for (auto& s : surfaces) { + s->MarkAsPicked(false); + } + return surfaces; + } + + void ReserveSurface(const SurfaceParams& params, TSurface surface) { surface_reserve[params].push_back(std::move(surface)); } - std::shared_ptr TryGetReservedSurface(const SurfaceParams& params) { + TSurface TryGetReservedSurface(const SurfaceParams& params) { auto search{surface_reserve.find(params)}; if (search == surface_reserve.end()) { return {}; @@ -247,21 +505,41 @@ private: return {}; } - IntervalType GetSurfaceInterval(std::shared_ptr surface) const { - return IntervalType::right_open(surface->GetCacheAddr(), - surface->GetCacheAddr() + surface->GetSizeInBytes()); + IntervalType GetInterval(const void* host_ptr, const std::size_t size) const { + const CacheAddr addr = ToCacheAddr(host_ptr); + return IntervalType::right_open(addr, addr + size); } + struct RenderInfo { + RenderTargetConfig config; + TSurface target; + TView view; + }; + + struct DepthBufferInfo { + TSurface target; + TView view; + }; + VideoCore::RasterizerInterface& rasterizer; + Tegra::MemoryManager* memory_manager; u64 ticks{}; IntervalMap registered_surfaces; + static constexpr u64 inner_cache_page_bits{20}; + static constexpr u64 inner_cache_page_size{1 << inner_cache_page_bits}; + std::unordered_map> inner_cache; + /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have /// previously been used. This is to prevent surfaces from being constantly created and /// destroyed when used with different surface parameters. - std::unordered_map>> surface_reserve; + std::unordered_map> surface_reserve; + std::array render_targets; + DepthBufferInfo depth_buffer; + + std::vector staging_buffer; }; } // namespace VideoCommon -- cgit v1.2.3 From b711cdce782ee604edc3c52628eb76e6b9a08b72 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 7 May 2019 13:58:37 -0400 Subject: Corrections to Structural Matching The texture will now be reconstructed if the width only matches on GoB alignment. --- src/video_core/texture_cache/surface_base.h | 66 +++++++++++++++++++--------- src/video_core/texture_cache/texture_cache.h | 11 +++-- 2 files changed, 53 insertions(+), 24 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 5fd7add0a..9c048eb88 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -16,9 +16,8 @@ #include "video_core/texture_cache/surface_params.h" #include "video_core/texture_cache/surface_view.h" -template> -ForwardIt binary_find(ForwardIt first, ForwardIt last, const T& value, Compare comp={}) -{ +template > +ForwardIt binary_find(ForwardIt first, ForwardIt last, const T& value, Compare comp = {}) { // Note: BOTH type T and the type after ForwardIt is dereferenced // must be implicitly convertible to BOTH Type1 and Type2, used in Compare. // This is stricter than lower_bound requirement (see above) @@ -33,8 +32,14 @@ class MemoryManager; namespace VideoCommon { -using VideoCore::Surface::SurfaceTarget; using VideoCore::MortonSwizzleMode; +using VideoCore::Surface::SurfaceTarget; + +enum class MatchStructureResult : u32 { + FullMatch = 0, + SemiMatch = 1, + None = 2, +}; class SurfaceBaseImpl { public: @@ -106,17 +111,26 @@ public: return std::tie(src_bpp, params.is_tiled) == std::tie(dst_bpp, rhs.is_tiled); } - bool MatchesStructure(const SurfaceParams& rhs) const { + MatchStructureResult MatchesStructure(const SurfaceParams& rhs) const { if (params.is_tiled) { - const u32 a_width1 = params.GetBlockAlignedWidth(); - const u32 a_width2 = rhs.GetBlockAlignedWidth(); - return std::tie(a_width1, params.height, params.depth, params.block_width, - params.block_height, params.block_depth, params.tile_width_spacing) == - std::tie(a_width2, rhs.height, rhs.depth, rhs.block_width, rhs.block_height, - rhs.block_depth, rhs.tile_width_spacing); + if (std::tie(params.height, params.depth, params.block_width, params.block_height, + params.block_depth, params.tile_width_spacing) == + std::tie(rhs.height, rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth, + rhs.tile_width_spacing)) { + if (params.width == rhs.width) { + return MatchStructureResult::FullMatch; + } + if (params.GetBlockAlignedWidth() == rhs.GetBlockAlignedWidth()) { + return MatchStructureResult::SemiMatch; + } + } + return MatchStructureResult::None; } else { - return std::tie(params.width, params.height, params.pitch) == - std::tie(rhs.width, rhs.height, rhs.pitch); + if (std::tie(params.width, params.height, params.pitch) == + std::tie(rhs.width, rhs.height, rhs.pitch)) { + return MatchStructureResult::FullMatch; + } + return MatchStructureResult::None; } } @@ -126,15 +140,16 @@ public: const GPUVAddr relative_address = candidate_gpu_addr - gpu_addr; const u32 layer = relative_address / layer_size; const GPUVAddr mipmap_address = relative_address - layer_size * layer; - const auto mipmap_it = binary_find(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); + const auto mipmap_it = + binary_find(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); if (mipmap_it != mipmap_offsets.end()) { return {{layer, std::distance(mipmap_offsets.begin(), mipmap_it)}}; } return {}; } - std::vector BreakDown() const { - auto set_up_copy = [](CopyParams& cp, const SurfaceParams& params, const u32 depth, + std::vector BreakDown(const SurfaceParams& in_params) const { + auto set_up_copy = [](CopyParams& cp, const u32 width, const u32 height, const u32 depth, const u32 level) { cp.source_x = 0; cp.source_y = 0; @@ -144,8 +159,8 @@ public: cp.dest_z = 0; cp.source_level = level; cp.dest_level = level; - cp.width = params.GetMipWidth(level); - cp.height = params.GetMipHeight(level); + cp.width = width; + cp.height = height; cp.depth = depth; }; const u32 layers = params.depth; @@ -156,7 +171,11 @@ public: const u32 layer_offset = layer * mipmaps; for (std::size_t level = 0; level < mipmaps; level++) { CopyParams& cp = result[layer_offset + level]; - set_up_copy(cp, params, layer, level); + const u32 width = + std::min(params.GetMipWidth(level), in_params.GetMipWidth(level)); + const u32 height = + std::min(params.GetMipHeight(level), in_params.GetMipHeight(level)); + set_up_copy(cp, width, height, layer, level); } } return result; @@ -164,7 +183,11 @@ public: std::vector result{mipmaps}; for (std::size_t level = 0; level < mipmaps; level++) { CopyParams& cp = result[level]; - set_up_copy(cp, params, params.GetMipDepth(level), level); + const u32 width = std::min(params.GetMipWidth(level), in_params.GetMipWidth(level)); + const u32 height = + std::min(params.GetMipHeight(level), in_params.GetMipHeight(level)); + const u32 depth = std::min(params.GetMipDepth(level), in_params.GetMipDepth(level)); + set_up_copy(cp, width, height, depth, level); } return result; } @@ -254,7 +277,8 @@ public: std::optional EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr) { if (view_addr < gpu_addr) return {}; - if (params.target == SurfaceTarget::Texture3D || view_params.target == SurfaceTarget::Texture3D) { + if (params.target == SurfaceTarget::Texture3D || + view_params.target == SurfaceTarget::Texture3D) { return {}; } const std::size_t size = view_params.GetGuestSizeInBytes(); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index eb0d9bc10..f3b28453a 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -284,7 +284,7 @@ private: const SurfaceParams& params) { const auto gpu_addr = current_surface->GetGpuAddr(); TSurface new_surface = GetUncachedSurface(gpu_addr, params); - std::vector bricks = current_surface->BreakDown(); + std::vector bricks = current_surface->BreakDown(params); for (auto& brick : bricks) { ImageCopy(current_surface, new_surface, brick); } @@ -370,11 +370,16 @@ private: if (overlaps.size() == 1) { TSurface current_surface = overlaps[0]; - if (current_surface->MatchesStructure(params) && + MatchStructureResult s_result = current_surface->MatchesStructure(params); + if (s_result != MatchStructureResult::None && current_surface->GetGpuAddr() == gpu_addr && (params.target != SurfaceTarget::Texture3D || current_surface->MatchTarget(params.target))) { - return ManageStructuralMatch(current_surface, params); + if (s_result == MatchStructureResult::FullMatch) { + return ManageStructuralMatch(current_surface, params); + } else { + return RebuildMirage(current_surface, params); + } } if (current_surface->GetSizeInBytes() <= candidate_size) { return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, -- cgit v1.2.3 From d86f9cd70910d4b96ec301e7d532b11d18a290a4 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 7 May 2019 17:30:36 -0400 Subject: Change texture_cache chaching from GPUAddr to CacheAddr This also reverses the changes to make invalidation and flushing through the GPU address. --- src/video_core/texture_cache/surface_base.cpp | 5 +- src/video_core/texture_cache/surface_base.h | 30 ++++---- src/video_core/texture_cache/texture_cache.h | 102 +++++++++++--------------- 3 files changed, 59 insertions(+), 78 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 5273fcb44..0de0bc656 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -25,7 +25,6 @@ SurfaceBaseImpl::SurfaceBaseImpl(const GPUVAddr gpu_vaddr, const SurfaceParams& u32 offset = 0; mipmap_offsets.resize(params.num_levels); mipmap_sizes.resize(params.num_levels); - gpu_addr_end = gpu_addr + memory_size; for (u32 i = 0; i < params.num_levels; i++) { mipmap_offsets[i] = offset; mipmap_sizes[i] = params.GetGuestMipmapSize(i); @@ -99,8 +98,10 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, } } -void SurfaceBaseImpl::FlushBuffer(std::vector& staging_buffer) { +void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, + std::vector& staging_buffer) { MICROPROFILE_SCOPE(GPU_Flush_Texture); + auto host_ptr = memory_manager.GetPointer(gpu_addr); if (params.is_tiled) { ASSERT_MSG(params.block_width == 1, "Block width is defined as {}", params.block_width); for (u32 level = 0; level < params.num_levels; ++level) { diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 9c048eb88..74be3237d 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -45,40 +45,40 @@ class SurfaceBaseImpl { public: void LoadBuffer(Tegra::MemoryManager& memory_manager, std::vector& staging_buffer); - void FlushBuffer(std::vector& staging_buffer); + void FlushBuffer(Tegra::MemoryManager& memory_manager, std::vector& staging_buffer); GPUVAddr GetGpuAddr() const { return gpu_addr; } - GPUVAddr GetGpuAddrEnd() const { - return gpu_addr_end; - } - - bool Overlaps(const GPUVAddr start, const GPUVAddr end) const { - return (gpu_addr < end) && (gpu_addr_end > start); + bool Overlaps(const CacheAddr start, const CacheAddr end) const { + return (cache_addr < end) && (cache_addr_end > start); } // Use only when recycling a surface void SetGpuAddr(const GPUVAddr new_addr) { gpu_addr = new_addr; - gpu_addr_end = new_addr + memory_size; } VAddr GetCpuAddr() const { - return gpu_addr; + return cpu_addr; } void SetCpuAddr(const VAddr new_addr) { cpu_addr = new_addr; } - u8* GetHostPtr() const { - return host_ptr; + CacheAddr GetCacheAddr() const { + return cache_addr; + } + + CacheAddr GetCacheAddrEnd() const { + return cache_addr_end; } - void SetHostPtr(u8* new_addr) { - host_ptr = new_addr; + void SetCacheAddr(const CacheAddr new_addr) { + cache_addr = new_addr; + cache_addr_end = new_addr + memory_size; } const SurfaceParams& GetSurfaceParams() const { @@ -201,13 +201,13 @@ protected: const SurfaceParams params; GPUVAddr gpu_addr{}; - GPUVAddr gpu_addr_end{}; std::vector mipmap_sizes; std::vector mipmap_offsets; const std::size_t layer_size; const std::size_t memory_size; const std::size_t host_memory_size; - u8* host_ptr; + CacheAddr cache_addr; + CacheAddr cache_addr_end{}; VAddr cpu_addr; private: diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index f3b28453a..43aaec011 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -60,12 +60,6 @@ public: } } - void InvalidateRegionEx(GPUVAddr addr, std::size_t size) { - for (const auto& surface : GetSurfacesInRegionInner(addr, size)) { - Unregister(surface); - } - } - TView GetTextureSurface(const Tegra::Texture::FullTextureInfo& config, const VideoCommon::Shader::Sampler& entry) { const auto gpu_addr{config.tic.Address()}; @@ -154,9 +148,19 @@ public: return GetSurface(gpu_addr, params, true).second; } - TSurface TryFindFramebufferSurface(const u8* host_ptr) const { - const auto it{registered_surfaces.find(ToCacheAddr(host_ptr))}; - return it != registered_surfaces.end() ? *it->second.begin() : nullptr; + TSurface TryFindFramebufferSurface(const u8* host_ptr) { + const CacheAddr cache_addr = ToCacheAddr(host_ptr); + if (!cache_addr) { + return nullptr; + } + const CacheAddr page = cache_addr >> registry_page_bits; + std::list& list = registry[page]; + for (auto& s : list) { + if (s->GetCacheAddr() == cache_addr) { + return s; + } + } + return nullptr; } u64 Tick() { @@ -181,30 +185,28 @@ protected: void Register(TSurface surface) { const GPUVAddr gpu_addr = surface->GetGpuAddr(); - u8* host_ptr = memory_manager->GetPointer(gpu_addr); + const CacheAddr cache_ptr = ToCacheAddr(memory_manager->GetPointer(gpu_addr)); const std::size_t size = surface->GetSizeInBytes(); const std::optional cpu_addr = memory_manager->GpuToCpuAddress(gpu_addr); - if (!host_ptr || !cpu_addr) { + if (!cache_ptr || !cpu_addr) { LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", gpu_addr); return; } - surface->SetHostPtr(host_ptr); + surface->SetCacheAddr(cache_ptr); surface->SetCpuAddr(*cpu_addr); - registered_surfaces.add({GetInterval(host_ptr, size), {surface}}); - rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); RegisterInnerCache(surface); surface->MarkAsRegistered(true); + rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); } void Unregister(TSurface surface) { if (surface->IsProtected()) return; const GPUVAddr gpu_addr = surface->GetGpuAddr(); - const void* host_ptr = surface->GetHostPtr(); + const CacheAddr cache_ptr = surface->GetCacheAddr(); const std::size_t size = surface->GetSizeInBytes(); const VAddr cpu_addr = surface->GetCpuAddr(); - registered_surfaces.erase(GetInterval(host_ptr, size)); rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); UnregisterInnerCache(surface); surface->MarkAsRegistered(false); @@ -280,7 +282,7 @@ private: } } - std::pair RebuildMirage(TSurface current_surface, + std::pair RebuildSurface(TSurface current_surface, const SurfaceParams& params) { const auto gpu_addr = current_surface->GetGpuAddr(); TSurface new_surface = GetUncachedSurface(gpu_addr, params); @@ -297,7 +299,7 @@ private: const SurfaceParams& params) { const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); if (is_mirage) { - return RebuildMirage(current_surface, params); + return RebuildSurface(current_surface, params); } const bool matches_target = current_surface->MatchTarget(params.target); if (matches_target) { @@ -356,7 +358,7 @@ private: const auto host_ptr{memory_manager->GetPointer(gpu_addr)}; const auto cache_addr{ToCacheAddr(host_ptr)}; const std::size_t candidate_size = params.GetGuestSizeInBytes(); - auto overlaps{GetSurfacesInRegionInner(gpu_addr, candidate_size)}; + auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)}; if (overlaps.empty()) { return InitializeSurface(gpu_addr, params, preserve_contents); } @@ -378,7 +380,7 @@ private: if (s_result == MatchStructureResult::FullMatch) { return ManageStructuralMatch(current_surface, params); } else { - return RebuildMirage(current_surface, params); + return RebuildSurface(current_surface, params); } } if (current_surface->GetSizeInBytes() <= candidate_size) { @@ -429,58 +431,40 @@ private: } staging_buffer.resize(surface->GetHostSizeInBytes()); surface->DownloadTexture(staging_buffer); - surface->FlushBuffer(staging_buffer); + surface->FlushBuffer(*memory_manager, staging_buffer); surface->MarkAsModified(false, Tick()); } - std::vector GetSurfacesInRegion(CacheAddr cache_addr, std::size_t size) const { - if (size == 0) { - return {}; - } - const IntervalType interval{cache_addr, cache_addr + size}; - - std::vector surfaces; - for (auto& pair : boost::make_iterator_range(registered_surfaces.equal_range(interval))) { - for (auto& s : pair.second) { - if (!s || !s->IsRegistered()) { - continue; - } - surfaces.push_back(s); - } - } - return surfaces; - } - void RegisterInnerCache(TSurface& surface) { - GPUVAddr start = surface->GetGpuAddr() >> inner_cache_page_bits; - const GPUVAddr end = (surface->GetGpuAddrEnd() - 1) >> inner_cache_page_bits; + CacheAddr start = surface->GetCacheAddr() >> registry_page_bits; + const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; while (start <= end) { - inner_cache[start].push_back(surface); + registry[start].push_back(surface); start++; } } void UnregisterInnerCache(TSurface& surface) { - GPUVAddr start = surface->GetGpuAddr() >> inner_cache_page_bits; - const GPUVAddr end = (surface->GetGpuAddrEnd() - 1) >> inner_cache_page_bits; + CacheAddr start = surface->GetCacheAddr() >> registry_page_bits; + const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; while (start <= end) { - inner_cache[start].remove(surface); + registry[start].remove(surface); start++; } } - std::vector GetSurfacesInRegionInner(const GPUVAddr gpu_addr, const std::size_t size) { + std::vector GetSurfacesInRegion(const CacheAddr cache_addr, const std::size_t size) { if (size == 0) { return {}; } - const GPUVAddr gpu_addr_end = gpu_addr + size; - GPUVAddr start = gpu_addr >> inner_cache_page_bits; - const GPUVAddr end = (gpu_addr_end - 1) >> inner_cache_page_bits; + const CacheAddr cache_addr_end = cache_addr + size; + CacheAddr start = cache_addr >> registry_page_bits; + const CacheAddr end = (cache_addr_end - 1) >> registry_page_bits; std::vector surfaces; while (start <= end) { - std::list& list = inner_cache[start]; + std::list& list = registry[start]; for (auto& s : list) { - if (!s->IsPicked() && s->Overlaps(gpu_addr, gpu_addr_end)) { + if (!s->IsPicked() && s->Overlaps(cache_addr, cache_addr_end)) { s->MarkAsPicked(true); surfaces.push_back(s); } @@ -510,11 +494,6 @@ private: return {}; } - IntervalType GetInterval(const void* host_ptr, const std::size_t size) const { - const CacheAddr addr = ToCacheAddr(host_ptr); - return IntervalType::right_open(addr, addr + size); - } - struct RenderInfo { RenderTargetConfig config; TSurface target; @@ -531,11 +510,12 @@ private: u64 ticks{}; - IntervalMap registered_surfaces; - - static constexpr u64 inner_cache_page_bits{20}; - static constexpr u64 inner_cache_page_size{1 << inner_cache_page_bits}; - std::unordered_map> inner_cache; + // The internal Cache is different for the Texture Cache. It's based on buckets + // of 1MB. This fits better for the purpose of this cache as textures are normaly + // large in size. + static constexpr u64 registry_page_bits{20}; + static constexpr u64 registry_page_size{1 << registry_page_bits}; + std::unordered_map> registry; /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have /// previously been used. This is to prevent surfaces from being constantly created and -- cgit v1.2.3 From 1af4414861fda5cad2549372e65ecda090caf2f8 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 7 May 2019 19:09:34 -0400 Subject: Correct Mipmaps View method in Texture Cache --- src/video_core/texture_cache/surface_base.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 74be3237d..486585c9c 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -282,8 +282,7 @@ public: return {}; } const std::size_t size = view_params.GetGuestSizeInBytes(); - const GPUVAddr relative_address = view_addr - gpu_addr; - auto layer_mipmap = GetLayerMipmap(relative_address); + auto layer_mipmap = GetLayerMipmap(view_addr); if (!layer_mipmap) { return {}; } @@ -298,7 +297,7 @@ public: vp.num_layers = 1; vp.base_level = mipmap; vp.num_levels = 1; - vp.target = params.target; + vp.target = view_params.target; return {GetView(vp)}; } -- cgit v1.2.3 From 03d10ea3b420c923c14a11c86b47e2f00bc30e00 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 7 May 2019 21:28:31 -0300 Subject: copy_params: Use constructor instead of C-like initialization --- src/video_core/texture_cache/copy_params.h | 10 ++++++ src/video_core/texture_cache/surface_base.h | 53 +++++++++++----------------- src/video_core/texture_cache/texture_cache.h | 23 +++++------- 3 files changed, 39 insertions(+), 47 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/copy_params.h b/src/video_core/texture_cache/copy_params.h index 75c2b1f05..8cf010142 100644 --- a/src/video_core/texture_cache/copy_params.h +++ b/src/video_core/texture_cache/copy_params.h @@ -9,6 +9,16 @@ namespace VideoCommon { struct CopyParams { + CopyParams(u32 source_x, u32 source_y, u32 source_z, u32 dest_x, u32 dest_y, u32 dest_z, + u32 source_level, u32 dest_level, u32 width, u32 height, u32 depth) + : source_x{source_x}, source_y{source_y}, source_z{source_z}, dest_x{dest_x}, + dest_y{dest_y}, dest_z{dest_z}, source_level{source_level}, + dest_level{dest_level}, width{width}, height{height}, depth{depth} {} + + CopyParams(u32 width, u32 height, u32 depth, u32 level) + : source_x{}, source_y{}, source_z{}, dest_x{}, dest_y{}, dest_z{}, source_level{level}, + dest_level{level}, width{width}, height{height}, depth{depth} {} + u32 source_x; u32 source_y; u32 source_z; diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 486585c9c..029cfb055 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -149,45 +149,32 @@ public: } std::vector BreakDown(const SurfaceParams& in_params) const { - auto set_up_copy = [](CopyParams& cp, const u32 width, const u32 height, const u32 depth, - const u32 level) { - cp.source_x = 0; - cp.source_y = 0; - cp.source_z = 0; - cp.dest_x = 0; - cp.dest_y = 0; - cp.dest_z = 0; - cp.source_level = level; - cp.dest_level = level; - cp.width = width; - cp.height = height; - cp.depth = depth; - }; - const u32 layers = params.depth; - const u32 mipmaps = params.num_levels; + std::vector result; + const u32 layers{params.depth}; + const u32 mipmaps{params.num_levels}; + if (params.is_layered) { - std::vector result{layers * mipmaps}; - for (std::size_t layer = 0; layer < layers; layer++) { - const u32 layer_offset = layer * mipmaps; - for (std::size_t level = 0; level < mipmaps; level++) { - CopyParams& cp = result[layer_offset + level]; - const u32 width = - std::min(params.GetMipWidth(level), in_params.GetMipWidth(level)); - const u32 height = - std::min(params.GetMipHeight(level), in_params.GetMipHeight(level)); - set_up_copy(cp, width, height, layer, level); + result.reserve(static_cast(layers) * static_cast(mipmaps)); + for (u32 layer = 0; layer < layers; layer++) { + const u32 layer_offset{layer * mipmaps}; + for (u32 level = 0; level < mipmaps; level++) { + const u32 width{ + std::min(params.GetMipWidth(level), in_params.GetMipWidth(level))}; + const u32 height{ + std::min(params.GetMipHeight(level), in_params.GetMipHeight(level))}; + result.emplace_back(width, height, layer, level); } } return result; + } else { - std::vector result{mipmaps}; + result.reserve(mipmaps); for (std::size_t level = 0; level < mipmaps; level++) { - CopyParams& cp = result[level]; - const u32 width = std::min(params.GetMipWidth(level), in_params.GetMipWidth(level)); - const u32 height = - std::min(params.GetMipHeight(level), in_params.GetMipHeight(level)); - const u32 depth = std::min(params.GetMipDepth(level), in_params.GetMipDepth(level)); - set_up_copy(cp, width, height, depth, level); + const u32 width{std::min(params.GetMipWidth(level), in_params.GetMipWidth(level))}; + const u32 height{ + std::min(params.GetMipHeight(level), in_params.GetMipHeight(level))}; + const u32 depth{std::min(params.GetMipDepth(level), in_params.GetMipDepth(level))}; + result.emplace_back(width, height, depth, level); } return result; } diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 43aaec011..c9a648bbd 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -283,7 +283,7 @@ private: } std::pair RebuildSurface(TSurface current_surface, - const SurfaceParams& params) { + const SurfaceParams& params) { const auto gpu_addr = current_surface->GetGpuAddr(); TSurface new_surface = GetUncachedSurface(gpu_addr, params); std::vector bricks = current_surface->BreakDown(params); @@ -323,26 +323,21 @@ private: return {}; } const std::size_t candidate_size = src_params.GetGuestSizeInBytes(); - auto mipmap_layer = new_surface->GetLayerMipmap(surface->GetGpuAddr()); + auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; if (!mipmap_layer) { return {}; } - const u32 layer = (*mipmap_layer).first; - const u32 mipmap = (*mipmap_layer).second; + const u32 layer{mipmap_layer->first}; + const u32 mipmap{mipmap_layer->second}; if (new_surface->GetMipmapSize(mipmap) != candidate_size) { return {}; } // Now we got all the data set up - CopyParams copy_params{}; - const u32 dst_width = params.GetMipWidth(mipmap); - const u32 dst_height = params.GetMipHeight(mipmap); - copy_params.width = std::min(src_params.width, dst_width); - copy_params.height = std::min(src_params.height, dst_height); - copy_params.depth = 1; - copy_params.source_level = 0; - copy_params.dest_level = mipmap; - copy_params.source_z = 0; - copy_params.dest_z = layer; + const u32 dst_width{params.GetMipWidth(mipmap)}; + const u32 dst_height{params.GetMipHeight(mipmap)}; + const CopyParams copy_params(0, 0, 0, 0, 0, layer, 0, mipmap, + std::min(src_params.width, dst_width), + std::min(src_params.height, dst_height), 1); ImageCopy(surface, new_surface, copy_params); } for (auto surface : overlaps) { -- cgit v1.2.3 From 2b30000a1ed1972e0701a8525182104b4544caa4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 7 May 2019 21:48:02 -0300 Subject: surface_base: Silence truncation warnings and minor renames and reordering --- src/video_core/texture_cache/surface_base.cpp | 34 ++++++++++++++------------ src/video_core/texture_cache/surface_base.h | 35 +++++++++++++++------------ 2 files changed, 37 insertions(+), 32 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 0de0bc656..5e994cf08 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -18,17 +18,19 @@ MICROPROFILE_DEFINE(GPU_Flush_Texture, "GPU", "Texture Flush", MP_RGB(128, 192, using Tegra::Texture::ConvertFromGuestToHost; using VideoCore::MortonSwizzleMode; -SurfaceBaseImpl::SurfaceBaseImpl(const GPUVAddr gpu_vaddr, const SurfaceParams& params) - : gpu_addr{gpu_vaddr}, params{params}, mipmap_sizes{params.num_levels}, - mipmap_offsets{params.num_levels}, layer_size{params.GetGuestLayerSize()}, - memory_size{params.GetGuestSizeInBytes()}, host_memory_size{params.GetHostSizeInBytes()} { - u32 offset = 0; - mipmap_offsets.resize(params.num_levels); - mipmap_sizes.resize(params.num_levels); - for (u32 i = 0; i < params.num_levels; i++) { - mipmap_offsets[i] = offset; - mipmap_sizes[i] = params.GetGuestMipmapSize(i); - offset += mipmap_sizes[i]; +SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params) + : params{params}, gpu_addr{gpu_addr}, layer_size{params.GetGuestLayerSize()}, + guest_memory_size{params.GetGuestSizeInBytes()}, host_memory_size{ + params.GetHostSizeInBytes()} { + mipmap_offsets.reserve(params.num_levels); + mipmap_sizes.reserve(params.num_levels); + + std::size_t offset = 0; + for (u32 level = 0; level < params.num_levels; ++level) { + const std::size_t mipmap_size{params.GetGuestMipmapSize(level)}; + mipmap_sizes.push_back(mipmap_size); + mipmap_offsets.push_back(offset); + offset += mipmap_size; } } @@ -44,7 +46,7 @@ void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, const Surf std::size_t host_offset{0}; const std::size_t guest_stride = layer_size; const std::size_t host_stride = params.GetHostLayerSize(level); - for (u32 layer = 0; layer < params.depth; layer++) { + for (u32 layer = 0; layer < params.depth; ++layer) { MortonSwizzle(mode, params.pixel_format, width, block_height, height, block_depth, 1, params.tile_width_spacing, buffer + host_offset, memory + guest_offset); guest_offset += guest_stride; @@ -60,12 +62,12 @@ void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, const Surf void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, std::vector& staging_buffer) { MICROPROFILE_SCOPE(GPU_Load_Texture); - auto host_ptr = memory_manager.GetPointer(gpu_addr); + const auto host_ptr{memory_manager.GetPointer(gpu_addr)}; if (params.is_tiled) { ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture target {}", params.block_width, static_cast(params.target)); for (u32 level = 0; level < params.num_levels; ++level) { - const u32 host_offset = params.GetHostMipmapLevelOffset(level); + const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)}; SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params, staging_buffer.data() + host_offset, level); } @@ -91,7 +93,7 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, } for (u32 level = 0; level < params.num_levels; ++level) { - const u32 host_offset = params.GetHostMipmapLevelOffset(level); + const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)}; ConvertFromGuestToHost(staging_buffer.data() + host_offset, params.pixel_format, params.GetMipWidth(level), params.GetMipHeight(level), params.GetMipDepth(level), true, true); @@ -105,7 +107,7 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, if (params.is_tiled) { ASSERT_MSG(params.block_width == 1, "Block width is defined as {}", params.block_width); for (u32 level = 0; level < params.num_levels; ++level) { - const u32 host_offset = params.GetHostMipmapLevelOffset(level); + const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)}; SwizzleFunc(MortonSwizzleMode::LinearToMorton, host_ptr, params, staging_buffer.data() + host_offset, level); } diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 029cfb055..7cc122158 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -78,7 +78,7 @@ public: void SetCacheAddr(const CacheAddr new_addr) { cache_addr = new_addr; - cache_addr_end = new_addr + memory_size; + cache_addr_end = new_addr + guest_memory_size; } const SurfaceParams& GetSurfaceParams() const { @@ -86,7 +86,7 @@ public: } std::size_t GetSizeInBytes() const { - return memory_size; + return guest_memory_size; } std::size_t GetHostSizeInBytes() const { @@ -135,17 +135,19 @@ public: } std::optional> GetLayerMipmap(const GPUVAddr candidate_gpu_addr) const { - if (candidate_gpu_addr < gpu_addr) + if (candidate_gpu_addr < gpu_addr) { return {}; - const GPUVAddr relative_address = candidate_gpu_addr - gpu_addr; - const u32 layer = relative_address / layer_size; + } + const auto relative_address{static_cast(candidate_gpu_addr - gpu_addr)}; + const auto layer{static_cast(relative_address / layer_size)}; const GPUVAddr mipmap_address = relative_address - layer_size * layer; const auto mipmap_it = binary_find(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); - if (mipmap_it != mipmap_offsets.end()) { - return {{layer, std::distance(mipmap_offsets.begin(), mipmap_it)}}; + if (mipmap_it == mipmap_offsets.end()) { + return {}; } - return {}; + const auto level{static_cast(std::distance(mipmap_offsets.begin(), mipmap_it))}; + return std::make_pair(layer, level); } std::vector BreakDown(const SurfaceParams& in_params) const { @@ -169,7 +171,7 @@ public: } else { result.reserve(mipmaps); - for (std::size_t level = 0; level < mipmaps; level++) { + for (u32 level = 0; level < mipmaps; level++) { const u32 width{std::min(params.GetMipWidth(level), in_params.GetMipWidth(level))}; const u32 height{ std::min(params.GetMipHeight(level), in_params.GetMipHeight(level))}; @@ -181,21 +183,22 @@ public: } protected: - explicit SurfaceBaseImpl(const GPUVAddr gpu_vaddr, const SurfaceParams& params); + explicit SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params); ~SurfaceBaseImpl() = default; virtual void DecorateSurfaceName() = 0; const SurfaceParams params; - GPUVAddr gpu_addr{}; - std::vector mipmap_sizes; - std::vector mipmap_offsets; const std::size_t layer_size; - const std::size_t memory_size; + const std::size_t guest_memory_size; const std::size_t host_memory_size; - CacheAddr cache_addr; + GPUVAddr gpu_addr{}; + CacheAddr cache_addr{}; CacheAddr cache_addr_end{}; - VAddr cpu_addr; + VAddr cpu_addr{}; + + std::vector mipmap_sizes; + std::vector mipmap_offsets; private: void SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, u8* buffer, -- cgit v1.2.3 From 16e8625a301b1f43ecebe459a40bf33f89322032 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 7 May 2019 21:55:55 -0300 Subject: surface_base: Split BreakDown into layered and non-layered variants --- src/video_core/texture_cache/surface_base.h | 93 +++++++++++++++-------------- 1 file changed, 48 insertions(+), 45 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 7cc122158..0cfb835d9 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -106,32 +106,32 @@ public: } bool MatchesTopology(const SurfaceParams& rhs) const { - const u32 src_bpp = params.GetBytesPerPixel(); - const u32 dst_bpp = rhs.GetBytesPerPixel(); + const u32 src_bpp{params.GetBytesPerPixel()}; + const u32 dst_bpp{rhs.GetBytesPerPixel()}; return std::tie(src_bpp, params.is_tiled) == std::tie(dst_bpp, rhs.is_tiled); } MatchStructureResult MatchesStructure(const SurfaceParams& rhs) const { - if (params.is_tiled) { - if (std::tie(params.height, params.depth, params.block_width, params.block_height, - params.block_depth, params.tile_width_spacing) == - std::tie(rhs.height, rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth, - rhs.tile_width_spacing)) { - if (params.width == rhs.width) { - return MatchStructureResult::FullMatch; - } - if (params.GetBlockAlignedWidth() == rhs.GetBlockAlignedWidth()) { - return MatchStructureResult::SemiMatch; - } - } - return MatchStructureResult::None; - } else { + if (!params.is_tiled) { if (std::tie(params.width, params.height, params.pitch) == std::tie(rhs.width, rhs.height, rhs.pitch)) { return MatchStructureResult::FullMatch; } return MatchStructureResult::None; } + // Tiled surface + if (std::tie(params.height, params.depth, params.block_width, params.block_height, + params.block_depth, params.tile_width_spacing) == + std::tie(rhs.height, rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth, + rhs.tile_width_spacing)) { + if (params.width == rhs.width) { + return MatchStructureResult::FullMatch; + } + if (params.GetBlockAlignedWidth() == rhs.GetBlockAlignedWidth()) { + return MatchStructureResult::SemiMatch; + } + } + return MatchStructureResult::None; } std::optional> GetLayerMipmap(const GPUVAddr candidate_gpu_addr) const { @@ -151,35 +151,7 @@ public: } std::vector BreakDown(const SurfaceParams& in_params) const { - std::vector result; - const u32 layers{params.depth}; - const u32 mipmaps{params.num_levels}; - - if (params.is_layered) { - result.reserve(static_cast(layers) * static_cast(mipmaps)); - for (u32 layer = 0; layer < layers; layer++) { - const u32 layer_offset{layer * mipmaps}; - for (u32 level = 0; level < mipmaps; level++) { - const u32 width{ - std::min(params.GetMipWidth(level), in_params.GetMipWidth(level))}; - const u32 height{ - std::min(params.GetMipHeight(level), in_params.GetMipHeight(level))}; - result.emplace_back(width, height, layer, level); - } - } - return result; - - } else { - result.reserve(mipmaps); - for (u32 level = 0; level < mipmaps; level++) { - const u32 width{std::min(params.GetMipWidth(level), in_params.GetMipWidth(level))}; - const u32 height{ - std::min(params.GetMipHeight(level), in_params.GetMipHeight(level))}; - const u32 depth{std::min(params.GetMipDepth(level), in_params.GetMipDepth(level))}; - result.emplace_back(width, height, depth, level); - } - return result; - } + return params.is_layered ? BreakDownLayered(in_params) : BreakDownNonLayered(in_params); } protected: @@ -203,6 +175,37 @@ protected: private: void SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, u8* buffer, u32 level); + + std::vector BreakDownLayered(const SurfaceParams& in_params) const { + const u32 layers{params.depth}; + const u32 mipmaps{params.num_levels}; + std::vector result; + result.reserve(static_cast(layers) * static_cast(mipmaps)); + + for (u32 layer = 0; layer < layers; layer++) { + for (u32 level = 0; level < mipmaps; level++) { + const u32 width{std::min(params.GetMipWidth(level), in_params.GetMipWidth(level))}; + const u32 height{ + std::min(params.GetMipHeight(level), in_params.GetMipHeight(level))}; + result.emplace_back(width, height, layer, level); + } + } + return result; + } + + std::vector BreakDownNonLayered(const SurfaceParams& in_params) const { + const u32 mipmaps{params.num_levels}; + std::vector result; + result.reserve(mipmaps); + + for (u32 level = 0; level < mipmaps; level++) { + const u32 width{std::min(params.GetMipWidth(level), in_params.GetMipWidth(level))}; + const u32 height{std::min(params.GetMipHeight(level), in_params.GetMipHeight(level))}; + const u32 depth{std::min(params.GetMipDepth(level), in_params.GetMipDepth(level))}; + result.emplace_back(width, height, depth, level); + } + return result; + } }; template -- cgit v1.2.3 From 549fd18ac44c6bcefdf6584484d775f0129e3fe3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 7 May 2019 22:03:33 -0300 Subject: surface_view: Add constructor for ViewParams --- src/video_core/texture_cache/surface_base.h | 39 +++++++++-------------------- src/video_core/texture_cache/surface_view.h | 8 +++++- 2 files changed, 19 insertions(+), 28 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 0cfb835d9..f469ab498 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -253,45 +253,30 @@ public: } TView EmplaceOverview(const SurfaceParams& overview_params) { - ViewParams vp{}; - vp.base_level = 0; - vp.num_levels = params.num_levels; - vp.target = overview_params.target; - if (params.is_layered && !overview_params.is_layered) { - vp.base_layer = 0; - vp.num_layers = 1; - } else { - vp.base_layer = 0; - vp.num_layers = params.depth; - } - return GetView(vp); + const u32 num_layers{params.is_layered && !overview_params.is_layered ? 1 : params.depth}; + const ViewParams view_params(overview_params.target, 0, num_layers, 0, params.num_levels); + return GetView(view_params); } std::optional EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr) { - if (view_addr < gpu_addr) - return {}; - if (params.target == SurfaceTarget::Texture3D || + if (view_addr < gpu_addr || params.target == SurfaceTarget::Texture3D || view_params.target == SurfaceTarget::Texture3D) { return {}; } - const std::size_t size = view_params.GetGuestSizeInBytes(); - auto layer_mipmap = GetLayerMipmap(view_addr); + const std::size_t size{view_params.GetGuestSizeInBytes()}; + const auto layer_mipmap{GetLayerMipmap(view_addr)}; if (!layer_mipmap) { return {}; } - const u32 layer = (*layer_mipmap).first; - const u32 mipmap = (*layer_mipmap).second; + const u32 layer{layer_mipmap->first}; + const u32 mipmap{layer_mipmap->second}; if (GetMipmapSize(mipmap) != size) { - // TODO: the view may cover many mimaps, this case can still go on + // TODO: The view may cover many mimaps, this case can still go on. + // This edge-case can be safely be ignored since it will just result in worse + // performance. return {}; } - ViewParams vp{}; - vp.base_layer = layer; - vp.num_layers = 1; - vp.base_level = mipmap; - vp.num_levels = 1; - vp.target = view_params.target; - return {GetView(vp)}; + return GetView(ViewParams(params.target, layer, 1, mipmap, 1)); } TView GetMainView() const { diff --git a/src/video_core/texture_cache/surface_view.h b/src/video_core/texture_cache/surface_view.h index c122800a6..1ef4509ce 100644 --- a/src/video_core/texture_cache/surface_view.h +++ b/src/video_core/texture_cache/surface_view.h @@ -13,15 +13,21 @@ namespace VideoCommon { struct ViewParams { + ViewParams(VideoCore::Surface::SurfaceTarget target, u32 base_layer, u32 num_layers, + u32 base_level, u32 num_levels) + : target{target}, base_layer{base_layer}, num_layers{num_layers}, base_level{base_level}, + num_levels{num_levels} {} + std::size_t Hash() const; bool operator==(const ViewParams& rhs) const; + VideoCore::Surface::SurfaceTarget target{}; u32 base_layer{}; u32 num_layers{}; u32 base_level{}; u32 num_levels{}; - VideoCore::Surface::SurfaceTarget target; + bool IsLayered() const { switch (target) { case VideoCore::Surface::SurfaceTarget::Texture1DArray: -- cgit v1.2.3 From 324e470879e63423844a687f7d675a0536006f07 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 7 May 2019 23:13:05 -0400 Subject: Texture Cache: Implement Blitting and Fermi Copies --- src/video_core/texture_cache/texture_cache.h | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index c9a648bbd..bb5a50ab9 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -15,6 +15,7 @@ #include "common/assert.h" #include "common/common_types.h" +#include "common/math_util.h" #include "core/memory.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/maxwell_3d.h" @@ -142,10 +143,11 @@ public: } } - TView GetFermiSurface(const Tegra::Engines::Fermi2D::Regs::Surface& config) { - SurfaceParams params = SurfaceParams::CreateForFermiCopySurface(config); - const GPUVAddr gpu_addr = config.Address(); - return GetSurface(gpu_addr, params, true).second; + void DoFermiCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, + const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, + const Common::Rectangle& src_rect, + const Common::Rectangle& dst_rect) { + ImageBlit(GetFermiSurface(src_config), GetFermiSurface(dst_config), src_rect, dst_rect); } TSurface TryFindFramebufferSurface(const u8* host_ptr) { @@ -183,6 +185,9 @@ protected: virtual void ImageCopy(TSurface src_surface, TSurface dst_surface, const CopyParams& copy_params) = 0; + virtual void ImageBlit(TSurface src, TSurface dst, const Common::Rectangle& src_rect, + const Common::Rectangle& dst_rect) = 0; + void Register(TSurface surface) { const GPUVAddr gpu_addr = surface->GetGpuAddr(); const CacheAddr cache_ptr = ToCacheAddr(memory_manager->GetPointer(gpu_addr)); @@ -223,6 +228,12 @@ protected: return new_surface; } + TSurface GetFermiSurface(const Tegra::Engines::Fermi2D::Regs::Surface& config) { + SurfaceParams params = SurfaceParams::CreateForFermiCopySurface(config); + const GPUVAddr gpu_addr = config.Address(); + return GetSurface(gpu_addr, params, true).first; + } + Core::System& system; private: -- cgit v1.2.3 From e0002599accc783be1bda5853df377c84ee6219a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 8 May 2019 03:51:54 -0300 Subject: surface_base: Add parenthesis to EmplaceOverview's predicate --- src/video_core/texture_cache/surface_base.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index f469ab498..c11998249 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -253,9 +253,8 @@ public: } TView EmplaceOverview(const SurfaceParams& overview_params) { - const u32 num_layers{params.is_layered && !overview_params.is_layered ? 1 : params.depth}; - const ViewParams view_params(overview_params.target, 0, num_layers, 0, params.num_levels); - return GetView(view_params); + const u32 num_layers{(params.is_layered && !overview_params.is_layered) ? 1 : params.depth}; + return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels)); } std::optional EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr) { -- cgit v1.2.3 From de0b1cb2b2199bd8efff78938d385fa74652cdfb Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 8 May 2019 07:09:02 -0400 Subject: Fixes to mipmap's process and reconstruct process --- src/video_core/texture_cache/surface_base.h | 4 ++-- src/video_core/texture_cache/texture_cache.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index c11998249..017ee999e 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -262,20 +262,20 @@ public: view_params.target == SurfaceTarget::Texture3D) { return {}; } - const std::size_t size{view_params.GetGuestSizeInBytes()}; const auto layer_mipmap{GetLayerMipmap(view_addr)}; if (!layer_mipmap) { return {}; } const u32 layer{layer_mipmap->first}; const u32 mipmap{layer_mipmap->second}; + const std::size_t size{view_params.GetGuestSizeInBytes()}; if (GetMipmapSize(mipmap) != size) { // TODO: The view may cover many mimaps, this case can still go on. // This edge-case can be safely be ignored since it will just result in worse // performance. return {}; } - return GetView(ViewParams(params.target, layer, 1, mipmap, 1)); + return GetView(ViewParams(view_params.target, layer, 1, mipmap, 1)); } TView GetMainView() const { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index bb5a50ab9..554b9a228 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -323,7 +323,7 @@ private: const SurfaceParams& params, const GPUVAddr gpu_addr, const u8* host_ptr) { - if (!params.is_layered || params.target == SurfaceTarget::Texture3D) { + if (params.target == SurfaceTarget::Texture3D) { return {}; } TSurface new_surface = GetUncachedSurface(gpu_addr, params); -- cgit v1.2.3 From ba677ccb5a8ae0c889751fcdd40b0c9e818ad992 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 8 May 2019 10:32:30 -0400 Subject: texture_cache: Implement guest flushing --- src/video_core/texture_cache/surface_base.cpp | 19 +++++++++++-------- src/video_core/texture_cache/texture_cache.h | 14 ++++++++++++++ 2 files changed, 25 insertions(+), 8 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 5e994cf08..dc5013240 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -63,6 +63,9 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, std::vector& staging_buffer) { MICROPROFILE_SCOPE(GPU_Load_Texture); const auto host_ptr{memory_manager.GetPointer(gpu_addr)}; + if (!host_ptr) { + return; + } if (params.is_tiled) { ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture target {}", params.block_width, static_cast(params.target)); @@ -103,7 +106,10 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, std::vector& staging_buffer) { MICROPROFILE_SCOPE(GPU_Flush_Texture); - auto host_ptr = memory_manager.GetPointer(gpu_addr); + const auto host_ptr{memory_manager.GetPointer(gpu_addr)}; + if (!host_ptr) { + return; + } if (params.is_tiled) { ASSERT_MSG(params.block_width == 1, "Block width is defined as {}", params.block_width); for (u32 level = 0; level < params.num_levels; ++level) { @@ -112,25 +118,22 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, staging_buffer.data() + host_offset, level); } } else { - UNIMPLEMENTED(); - /* ASSERT(params.target == SurfaceTarget::Texture2D); ASSERT(params.num_levels == 1); - const u32 bpp{params.GetFormatBpp() / 8}; + const u32 bpp{params.GetBytesPerPixel()}; const u32 copy_size{params.width * bpp}; if (params.pitch == copy_size) { - std::memcpy(host_ptr, staging_buffer.data(), memory_size); + std::memcpy(host_ptr, staging_buffer.data(), guest_memory_size); } else { u8* start{host_ptr}; const u8* read_to{staging_buffer.data()}; - for (u32 h = params.GetHeight(); h > 0; --h) { + for (u32 h = params.height; h > 0; --h) { std::memcpy(start, read_to, copy_size); - start += params.GetPitch(); + start += params.pitch; read_to += copy_size; } } - */ } } diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 554b9a228..422bf3e58 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -61,6 +61,20 @@ public: } } + void FlushRegion(CacheAddr addr, std::size_t size) { + auto surfaces = GetSurfacesInRegion(addr, size); + if (surfaces.empty()) { + return; + } + std::sort(surfaces.begin(), surfaces.end(), + [](const TSurface& a, const TSurface& b) -> bool { + return a->GetModificationTick() < b->GetModificationTick(); + }); + for (const auto& surface : surfaces) { + FlushSurface(surface); + } + } + TView GetTextureSurface(const Tegra::Texture::FullTextureInfo& config, const VideoCommon::Shader::Sampler& entry) { const auto gpu_addr{config.tic.Address()}; -- cgit v1.2.3 From 4e2071b6d9b414fa0152deb5e9d55674d636afe4 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 8 May 2019 17:45:59 -0400 Subject: texture_cache: Correct premature texceptions Due to our current infrastructure, it is possible for a mipmap to be set on as a render target before a texception of that mipmap's superset be set afterwards. This is problematic as we rely on texture views to set up texceptions and protecting render targets targets for 3D texture rendering. One simple solution is to configure framebuffers after texture setup but this brings other problems. This solution, forces a reconfiguration of the framebuffers after such event happens. --- src/video_core/texture_cache/surface_base.h | 17 ++++++++++++++--- src/video_core/texture_cache/texture_cache.h | 26 ++++++++++++++++++++++---- 2 files changed, 36 insertions(+), 7 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 017ee999e..179e80ddb 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -55,6 +55,11 @@ public: return (cache_addr < end) && (cache_addr_end > start); } + bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) { + const GPUVAddr gpu_addr_end = gpu_addr + guest_memory_size; + return (gpu_addr <= other_start && other_end <= gpu_addr_end); + } + // Use only when recycling a surface void SetGpuAddr(const GPUVAddr new_addr) { gpu_addr = new_addr; @@ -105,6 +110,12 @@ public: return params.target == target; } + bool MatchesSubTexture(const SurfaceParams& rhs, const GPUVAddr other_gpu_addr) const { + return std::tie(gpu_addr, params.target, params.num_levels) == + std::tie(other_gpu_addr, rhs.target, rhs.num_levels) && + params.target == SurfaceTarget::Texture2D && params.num_levels == 1; + } + bool MatchesTopology(const SurfaceParams& rhs) const { const u32 src_bpp{params.GetBytesPerPixel()}; const u32 dst_bpp{rhs.GetBytesPerPixel()}; @@ -121,9 +132,9 @@ public: } // Tiled surface if (std::tie(params.height, params.depth, params.block_width, params.block_height, - params.block_depth, params.tile_width_spacing) == + params.block_depth, params.tile_width_spacing, params.num_levels) == std::tie(rhs.height, rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth, - rhs.tile_width_spacing)) { + rhs.tile_width_spacing, rhs.num_levels)) { if (params.width == rhs.width) { return MatchStructureResult::FullMatch; } @@ -259,7 +270,7 @@ public: std::optional EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr) { if (view_addr < gpu_addr || params.target == SurfaceTarget::Texture3D || - view_params.target == SurfaceTarget::Texture3D) { + params.num_levels == 1 || view_params.target == SurfaceTarget::Texture3D) { return {}; } const auto layer_mipmap{GetLayerMipmap(view_addr)}; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 422bf3e58..96d108147 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -120,6 +120,10 @@ public: return {}; } + if (regs.color_mask[index].raw == 0) { + return {}; + } + auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), preserve_contents); if (render_targets[index].target) @@ -183,6 +187,12 @@ public: return ++ticks; } + bool ConsumeReconfigurationFlag() { + const bool result = force_reconfiguration; + force_reconfiguration = false; + return result; + } + protected: TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) : system{system}, rasterizer{rasterizer} { @@ -219,9 +229,10 @@ protected: rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); } - void Unregister(TSurface surface) { - if (surface->IsProtected()) + void Unregister(TSurface surface, const bool force_unregister = false) { + if (surface->IsProtected() && !force_unregister) { return; + } const GPUVAddr gpu_addr = surface->GetGpuAddr(); const CacheAddr cache_ptr = surface->GetCacheAddr(); const std::size_t size = surface->GetSizeInBytes(); @@ -365,8 +376,10 @@ private: std::min(src_params.height, dst_height), 1); ImageCopy(surface, new_surface, copy_params); } + force_reconfiguration = false; for (auto surface : overlaps) { - Unregister(surface); + force_reconfiguration |= surface->IsProtected(); + Unregister(surface, true); } Register(new_surface); return {{new_surface, new_surface->GetMainView()}}; @@ -379,6 +392,7 @@ private: const auto cache_addr{ToCacheAddr(host_ptr)}; const std::size_t candidate_size = params.GetGuestSizeInBytes(); auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)}; + if (overlaps.empty()) { return InitializeSurface(gpu_addr, params, preserve_contents); } @@ -403,7 +417,7 @@ private: return RebuildSurface(current_surface, params); } } - if (current_surface->GetSizeInBytes() <= candidate_size) { + if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, false); } @@ -530,6 +544,10 @@ private: u64 ticks{}; + // Sometimes Setup Textures can hit a surface that's on the render target, when this happens + // we force a reconfiguration of the frame buffer after setup. + bool force_reconfiguration; + // The internal Cache is different for the Texture Cache. It's based on buckets // of 1MB. This fits better for the purpose of this cache as textures are normaly // large in size. -- cgit v1.2.3 From b347543e8341ae323ea232d47df2c144fe21c739 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 8 May 2019 18:27:29 -0400 Subject: Reduce amount of size calculations. --- src/video_core/texture_cache/surface_base.cpp | 22 +++++++---- src/video_core/texture_cache/surface_base.h | 28 +++++--------- src/video_core/texture_cache/surface_params.cpp | 31 +-------------- src/video_core/texture_cache/surface_params.h | 50 +++++++++++++++++++------ src/video_core/texture_cache/texture_cache.h | 40 ++++++++++---------- 5 files changed, 85 insertions(+), 86 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index dc5013240..36ca72b4a 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -19,19 +19,27 @@ using Tegra::Texture::ConvertFromGuestToHost; using VideoCore::MortonSwizzleMode; SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params) - : params{params}, gpu_addr{gpu_addr}, layer_size{params.GetGuestLayerSize()}, - guest_memory_size{params.GetGuestSizeInBytes()}, host_memory_size{ - params.GetHostSizeInBytes()} { - mipmap_offsets.reserve(params.num_levels); - mipmap_sizes.reserve(params.num_levels); + : params{params}, mipmap_sizes(params.num_levels), + mipmap_offsets(params.num_levels), gpu_addr{gpu_addr}, host_memory_size{ + params.GetHostSizeInBytes()} { std::size_t offset = 0; for (u32 level = 0; level < params.num_levels; ++level) { const std::size_t mipmap_size{params.GetGuestMipmapSize(level)}; - mipmap_sizes.push_back(mipmap_size); - mipmap_offsets.push_back(offset); + mipmap_sizes[level] = mipmap_size; + mipmap_offsets[level] = offset; offset += mipmap_size; } + layer_size = offset; + if (params.is_layered) { + if (params.is_tiled) { + layer_size = + SurfaceParams::AlignLayered(layer_size, params.block_height, params.block_depth); + } + guest_memory_size = layer_size * params.depth; + } else { + guest_memory_size = layer_size; + } } void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 179e80ddb..095deb602 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -9,6 +9,7 @@ #include #include "common/assert.h" +#include "common/common_funcs.h" #include "common/common_types.h" #include "video_core/gpu.h" #include "video_core/morton.h" @@ -16,16 +17,6 @@ #include "video_core/texture_cache/surface_params.h" #include "video_core/texture_cache/surface_view.h" -template > -ForwardIt binary_find(ForwardIt first, ForwardIt last, const T& value, Compare comp = {}) { - // Note: BOTH type T and the type after ForwardIt is dereferenced - // must be implicitly convertible to BOTH Type1 and Type2, used in Compare. - // This is stricter than lower_bound requirement (see above) - - first = std::lower_bound(first, last, value, comp); - return first != last && !comp(value, *first) ? first : last; -} - namespace Tegra { class MemoryManager; } @@ -153,7 +144,7 @@ public: const auto layer{static_cast(relative_address / layer_size)}; const GPUVAddr mipmap_address = relative_address - layer_size * layer; const auto mipmap_it = - binary_find(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); + Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); if (mipmap_it == mipmap_offsets.end()) { return {}; } @@ -172,8 +163,8 @@ protected: virtual void DecorateSurfaceName() = 0; const SurfaceParams params; - const std::size_t layer_size; - const std::size_t guest_memory_size; + std::size_t layer_size; + std::size_t guest_memory_size; const std::size_t host_memory_size; GPUVAddr gpu_addr{}; CacheAddr cache_addr{}; @@ -268,9 +259,11 @@ public: return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels)); } - std::optional EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr) { - if (view_addr < gpu_addr || params.target == SurfaceTarget::Texture3D || - params.num_levels == 1 || view_params.target == SurfaceTarget::Texture3D) { + std::optional EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr, + const std::size_t candidate_size) { + if (params.target == SurfaceTarget::Texture3D || + (params.num_levels == 1 && !params.is_layered) || + view_params.target == SurfaceTarget::Texture3D) { return {}; } const auto layer_mipmap{GetLayerMipmap(view_addr)}; @@ -279,8 +272,7 @@ public: } const u32 layer{layer_mipmap->first}; const u32 mipmap{layer_mipmap->second}; - const std::size_t size{view_params.GetGuestSizeInBytes()}; - if (GetMipmapSize(mipmap) != size) { + if (GetMipmapSize(mipmap) != candidate_size) { // TODO: The view may cover many mimaps, this case can still go on. // This edge-case can be safely be ignored since it will just result in worse // performance. diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index d9052152c..b537b26e2 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -4,13 +4,12 @@ #include -#include "common/cityhash.h" #include "common/alignment.h" +#include "common/cityhash.h" #include "core/core.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/surface.h" #include "video_core/texture_cache/surface_params.h" -#include "video_core/textures/decoders.h" namespace VideoCommon { @@ -169,18 +168,6 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface( return params; } -u32 SurfaceParams::GetMipWidth(u32 level) const { - return std::max(1U, width >> level); -} - -u32 SurfaceParams::GetMipHeight(u32 level) const { - return std::max(1U, height >> level); -} - -u32 SurfaceParams::GetMipDepth(u32 level) const { - return is_layered ? depth : std::max(1U, depth >> level); -} - bool SurfaceParams::IsLayered() const { switch (target) { case SurfaceTarget::Texture1DArray: @@ -275,22 +262,6 @@ std::size_t SurfaceParams::GetHostLayerSize(u32 level) const { return GetInnerMipmapMemorySize(level, true, false); } -u32 SurfaceParams::GetDefaultBlockWidth() const { - return VideoCore::Surface::GetDefaultBlockWidth(pixel_format); -} - -u32 SurfaceParams::GetDefaultBlockHeight() const { - return VideoCore::Surface::GetDefaultBlockHeight(pixel_format); -} - -u32 SurfaceParams::GetBitsPerPixel() const { - return VideoCore::Surface::GetFormatBpp(pixel_format); -} - -u32 SurfaceParams::GetBytesPerPixel() const { - return VideoCore::Surface::GetBytesPerPixel(pixel_format); -} - bool SurfaceParams::IsPixelFormatZeta() const { return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat && pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat; diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index ec8efa210..e0ec1be0e 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -10,8 +10,9 @@ #include "common/common_types.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/surface.h" #include "video_core/shader/shader_ir.h" +#include "video_core/surface.h" +#include "video_core/textures/decoders.h" namespace VideoCommon { @@ -50,10 +51,17 @@ public: std::size_t GetHostSizeInBytes() const { std::size_t host_size_in_bytes; if (IsPixelFormatASTC(pixel_format)) { + constexpr std::size_t rgb8_bpp = 4ULL; // ASTC is uncompressed in software, in emulated as RGBA8 - host_size_in_bytes = static_cast(Common::AlignUp(width, GetDefaultBlockWidth())) * - static_cast(Common::AlignUp(height, GetDefaultBlockHeight())) * - static_cast(depth) * 4ULL; + host_size_in_bytes = 0; + for (std::size_t level = 0; level < num_levels; level++) { + const std::size_t width = + Common::AlignUp(GetMipWidth(level), GetDefaultBlockWidth()); + const std::size_t height = + Common::AlignUp(GetMipHeight(level), GetDefaultBlockHeight()); + const std::size_t depth = is_layered ? depth : GetMipDepth(level); + host_size_in_bytes += width * height * depth * rgb8_bpp; + } } else { host_size_in_bytes = GetInnerMemorySize(true, false, false); } @@ -65,13 +73,19 @@ public: } /// Returns the width of a given mipmap level. - u32 GetMipWidth(u32 level) const; + u32 GetMipWidth(u32 level) const { + return std::max(1U, width >> level); + } /// Returns the height of a given mipmap level. - u32 GetMipHeight(u32 level) const; + u32 GetMipHeight(u32 level) const { + return std::max(1U, height >> level); + } /// Returns the depth of a given mipmap level. - u32 GetMipDepth(u32 level) const; + u32 GetMipDepth(u32 level) const { + return is_layered ? depth : std::max(1U, depth >> level); + } /// Returns the block height of a given mipmap level. u32 GetMipBlockHeight(u32 level) const; @@ -79,6 +93,12 @@ public: /// Returns the block depth of a given mipmap level. u32 GetMipBlockDepth(u32 level) const; + // Helper used for out of class size calculations + static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height, + const u32 block_depth) { + return Common::AlignUp(out_size, Tegra::Texture::GetGOBSize() * block_height * block_depth); + } + /// Returns the offset in bytes in guest memory of a given mipmap level. std::size_t GetGuestMipmapLevelOffset(u32 level) const; @@ -98,16 +118,24 @@ public: std::size_t GetHostLayerSize(u32 level) const; /// Returns the default block width. - u32 GetDefaultBlockWidth() const; + u32 GetDefaultBlockWidth() const { + return VideoCore::Surface::GetDefaultBlockWidth(pixel_format); + } /// Returns the default block height. - u32 GetDefaultBlockHeight() const; + u32 GetDefaultBlockHeight() const { + return VideoCore::Surface::GetDefaultBlockHeight(pixel_format); + } /// Returns the bits per pixel. - u32 GetBitsPerPixel() const; + u32 GetBitsPerPixel() const { + return VideoCore::Surface::GetFormatBpp(pixel_format); + } /// Returns the bytes per pixel. - u32 GetBytesPerPixel() const; + u32 GetBytesPerPixel() const { + return VideoCore::Surface::GetBytesPerPixel(pixel_format); + } /// Returns true if the pixel format is a depth and/or stencil format. bool IsPixelFormatZeta() const; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 96d108147..fbfd1ff0b 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -120,10 +120,6 @@ public: return {}; } - if (regs.color_mask[index].raw == 0) { - return {}; - } - auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), preserve_contents); if (render_targets[index].target) @@ -165,7 +161,9 @@ public: const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, const Common::Rectangle& src_rect, const Common::Rectangle& dst_rect) { - ImageBlit(GetFermiSurface(src_config), GetFermiSurface(dst_config), src_rect, dst_rect); + TSurface dst_surface = GetFermiSurface(dst_config); + ImageBlit(GetFermiSurface(src_config), dst_surface, src_rect, dst_rect); + dst_surface->MarkAsModified(true, Tick()); } TSurface TryFindFramebufferSurface(const u8* host_ptr) { @@ -270,10 +268,6 @@ private: RecycleStrategy PickStrategy(std::vector& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, const bool untopological) { - // Untopological decision - if (untopological) { - return RecycleStrategy::Ignore; - } // 3D Textures decision if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) { return RecycleStrategy::Flush; @@ -284,12 +278,16 @@ private: return RecycleStrategy::Flush; } } + // Untopological decision + if (untopological) { + return RecycleStrategy::Ignore; + } return RecycleStrategy::Ignore; } std::pair RecycleSurface(std::vector& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, - const u8* host_ptr, const bool preserve_contents, + const bool preserve_contents, const bool untopological) { for (auto surface : overlaps) { Unregister(surface); @@ -328,6 +326,7 @@ private: } Unregister(current_surface); Register(new_surface); + new_surface->MarkAsModified(current_surface->IsModified(), Tick()); return {new_surface, new_surface->GetMainView()}; } @@ -351,6 +350,7 @@ private: if (params.target == SurfaceTarget::Texture3D) { return {}; } + bool modified = false; TSurface new_surface = GetUncachedSurface(gpu_addr, params); for (auto surface : overlaps) { const SurfaceParams& src_params = surface->GetSurfaceParams(); @@ -358,7 +358,7 @@ private: // We send this cases to recycle as they are more complex to handle return {}; } - const std::size_t candidate_size = src_params.GetGuestSizeInBytes(); + const std::size_t candidate_size = surface->GetSizeInBytes(); auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; if (!mipmap_layer) { return {}; @@ -368,6 +368,7 @@ private: if (new_surface->GetMipmapSize(mipmap) != candidate_size) { return {}; } + modified |= surface->IsModified(); // Now we got all the data set up const u32 dst_width{params.GetMipWidth(mipmap)}; const u32 dst_height{params.GetMipHeight(mipmap)}; @@ -381,6 +382,7 @@ private: force_reconfiguration |= surface->IsProtected(); Unregister(surface, true); } + new_surface->MarkAsModified(modified, Tick()); Register(new_surface); return {{new_surface, new_surface->GetMainView()}}; } @@ -399,8 +401,7 @@ private: for (auto surface : overlaps) { if (!surface->MatchesTopology(params)) { - return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, - true); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, true); } } @@ -418,27 +419,26 @@ private: } } if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { - return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, - false); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } - std::optional view = current_surface->EmplaceView(params, gpu_addr); + std::optional view = + current_surface->EmplaceView(params, gpu_addr, candidate_size); if (view.has_value()) { const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); if (is_mirage) { LOG_CRITICAL(HW_GPU, "Mirage View Unsupported"); - return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, - false); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } return {current_surface, *view}; } - return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, false); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } else { std::optional> view = ReconstructSurface(overlaps, params, gpu_addr, host_ptr); if (view.has_value()) { return *view; } - return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, false); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } } -- cgit v1.2.3 From 28d7c2f5a5089051410d37a03d5a4a42e4230842 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 10 May 2019 01:10:16 -0300 Subject: texture_cache: Change internal cache from lists to vectors --- src/video_core/texture_cache/texture_cache.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index fbfd1ff0b..1c2b63dae 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -4,11 +4,11 @@ #pragma once -#include #include #include #include #include +#include #include #include @@ -172,7 +172,7 @@ public: return nullptr; } const CacheAddr page = cache_addr >> registry_page_bits; - std::list& list = registry[page]; + std::vector& list = registry[page]; for (auto& s : list) { if (s->GetCacheAddr() == cache_addr) { return s; @@ -482,7 +482,8 @@ private: CacheAddr start = surface->GetCacheAddr() >> registry_page_bits; const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; while (start <= end) { - registry[start].remove(surface); + auto& reg{registry[start]}; + reg.erase(std::find(reg.begin(), reg.end(), surface)); start++; } } @@ -496,7 +497,7 @@ private: const CacheAddr end = (cache_addr_end - 1) >> registry_page_bits; std::vector surfaces; while (start <= end) { - std::list& list = registry[start]; + std::vector& list = registry[start]; for (auto& s : list) { if (!s->IsPicked() && s->Overlaps(cache_addr, cache_addr_end)) { s->MarkAsPicked(true); @@ -553,12 +554,12 @@ private: // large in size. static constexpr u64 registry_page_bits{20}; static constexpr u64 registry_page_size{1 << registry_page_bits}; - std::unordered_map> registry; + std::unordered_map> registry; /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have /// previously been used. This is to prevent surfaces from being constantly created and /// destroyed when used with different surface parameters. - std::unordered_map> surface_reserve; + std::unordered_map> surface_reserve; std::array render_targets; DepthBufferInfo depth_buffer; -- cgit v1.2.3 From 345e73f2feb0701e3c3099d002a1c21fb524eae4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 10 May 2019 04:17:48 -0300 Subject: video_core: Use un-shifted block sizes to avoid integer divisions Instead of storing all block width, height and depths in their shifted form: block_width = 1U << block_shift; Store them like they are provided by the emulated hardware (their block_shift form). This way we can avoid doing the costly Common::AlignUp operation to align texture sizes and drop CPU integer divisions with bitwise logic (defined in Common::AlignBits). --- src/video_core/texture_cache/surface_base.cpp | 3 +- src/video_core/texture_cache/surface_params.cpp | 39 +++++++++++++------------ src/video_core/texture_cache/surface_params.h | 7 +++-- src/video_core/texture_cache/texture_cache.h | 3 ++ 4 files changed, 28 insertions(+), 24 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 36ca72b4a..510d1aef5 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -22,7 +22,6 @@ SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params) : params{params}, mipmap_sizes(params.num_levels), mipmap_offsets(params.num_levels), gpu_addr{gpu_addr}, host_memory_size{ params.GetHostSizeInBytes()} { - std::size_t offset = 0; for (u32 level = 0; level < params.num_levels; ++level) { const std::size_t mipmap_size{params.GetGuestMipmapSize(level)}; @@ -75,7 +74,7 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, return; } if (params.is_tiled) { - ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture target {}", + ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}", params.block_width, static_cast(params.target)); for (u32 level = 0; level < params.num_levels; ++level) { const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)}; diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index b537b26e2..3a47f404d 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -96,9 +96,9 @@ SurfaceParams SurfaceParams::CreateForDepthBuffer( SurfaceParams params; params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; params.srgb_conversion = false; - params.block_width = 1 << std::min(block_width, 5U); - params.block_height = 1 << std::min(block_height, 5U); - params.block_depth = 1 << std::min(block_depth, 5U); + params.block_width = std::min(block_width, 5U); + params.block_height = std::min(block_height, 5U); + params.block_depth = std::min(block_depth, 5U); params.tile_width_spacing = 1; params.pixel_format = PixelFormatFromDepthFormat(format); params.component_type = ComponentTypeFromDepthFormat(format); @@ -120,9 +120,9 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; - params.block_width = 1 << config.memory_layout.block_width; - params.block_height = 1 << config.memory_layout.block_height; - params.block_depth = 1 << config.memory_layout.block_depth; + params.block_width = config.memory_layout.block_width; + params.block_height = config.memory_layout.block_height; + params.block_depth = config.memory_layout.block_depth; params.tile_width_spacing = 1; params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); params.component_type = ComponentTypeFromRenderTarget(config.format); @@ -149,9 +149,9 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface( params.is_tiled = !config.linear; params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; - params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 32U) : 0, - params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 32U) : 0, - params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 32U) : 0, + params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 5U) : 0, + params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 5U) : 0, + params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 5U) : 0, params.tile_width_spacing = 1; params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); params.component_type = ComponentTypeFromRenderTarget(config.format); @@ -190,9 +190,9 @@ u32 SurfaceParams::GetMipBlockHeight(u32 level) const { const u32 height{GetMipHeight(level)}; const u32 default_block_height{GetDefaultBlockHeight()}; const u32 blocks_in_y{(height + default_block_height - 1) / default_block_height}; - u32 block_height = 16; - while (block_height > 1 && blocks_in_y <= block_height * 4) { - block_height >>= 1; + u32 block_height = 4; + while (block_height > 0 && blocks_in_y <= (1U << block_height) * 4) { + --block_height; } return block_height; } @@ -202,17 +202,17 @@ u32 SurfaceParams::GetMipBlockDepth(u32 level) const { return this->block_depth; } if (is_layered) { - return 1; + return 0; } const u32 depth{GetMipDepth(level)}; - u32 block_depth = 32; - while (block_depth > 1 && depth * 2 <= block_depth) { - block_depth >>= 1; + u32 block_depth = 5; + while (block_depth > 0 && depth * 2 <= (1U << block_depth)) { + --block_depth; } - if (block_depth == 32 && GetMipBlockHeight(level) >= 4) { - return 16; + if (block_depth == 5 && GetMipBlockHeight(level) >= 2) { + return 4; } return block_depth; @@ -252,7 +252,8 @@ std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) co size += GetInnerMipmapMemorySize(level, as_host_size, uncompressed); } if (is_tiled && is_layered) { - return Common::AlignUp(size, Tegra::Texture::GetGOBSize() * block_height * block_depth); + return Common::AlignBits(size, + Tegra::Texture::GetGOBSizeShift() + block_height + block_depth); } return size; } diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index e0ec1be0e..7c48782c7 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -54,12 +54,12 @@ public: constexpr std::size_t rgb8_bpp = 4ULL; // ASTC is uncompressed in software, in emulated as RGBA8 host_size_in_bytes = 0; - for (std::size_t level = 0; level < num_levels; level++) { + for (u32 level = 0; level < num_levels; ++level) { const std::size_t width = Common::AlignUp(GetMipWidth(level), GetDefaultBlockWidth()); const std::size_t height = Common::AlignUp(GetMipHeight(level), GetDefaultBlockHeight()); - const std::size_t depth = is_layered ? depth : GetMipDepth(level); + const std::size_t depth = is_layered ? this->depth : GetMipDepth(level); host_size_in_bytes += width * height * depth * rgb8_bpp; } } else { @@ -96,7 +96,8 @@ public: // Helper used for out of class size calculations static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height, const u32 block_depth) { - return Common::AlignUp(out_size, Tegra::Texture::GetGOBSize() * block_height * block_depth); + return Common::AlignBits(out_size, + Tegra::Texture::GetGOBSizeShift() + block_height + block_depth); } /// Returns the offset in bytes in guest memory of a given mipmap level. diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 1c2b63dae..f35d0c88f 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -81,6 +81,9 @@ public: if (!gpu_addr) { return {}; } + if (gpu_addr == 0x1b7ec0000) { + // __debugbreak(); + } const auto params{SurfaceParams::CreateForTexture(system, config, entry)}; return GetSurface(gpu_addr, params, true).second; } -- cgit v1.2.3 From a4a58be2d46e95df4cead2916b6efbd658a0deaa Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 10 May 2019 17:59:18 -0400 Subject: texture_cache: Implement L1_Inner_cache --- src/video_core/texture_cache/texture_cache.h | 43 +++++++++++++++++++--------- 1 file changed, 30 insertions(+), 13 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index f35d0c88f..ad0fbd7ce 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -395,6 +395,26 @@ private: const auto host_ptr{memory_manager->GetPointer(gpu_addr)}; const auto cache_addr{ToCacheAddr(host_ptr)}; + + if (l1_cache.count(cache_addr) > 0) { + TSurface current_surface = l1_cache[cache_addr]; + if (!current_surface->MatchesTopology(params)) { + std::vector overlaps{current_surface}; + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, true); + } + MatchStructureResult s_result = current_surface->MatchesStructure(params); + if (s_result != MatchStructureResult::None && + current_surface->GetGpuAddr() == gpu_addr && + (params.target != SurfaceTarget::Texture3D || + current_surface->MatchTarget(params.target))) { + if (s_result == MatchStructureResult::FullMatch) { + return ManageStructuralMatch(current_surface, params); + } else { + return RebuildSurface(current_surface, params); + } + } + } + const std::size_t candidate_size = params.GetGuestSizeInBytes(); auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)}; @@ -410,17 +430,6 @@ private: if (overlaps.size() == 1) { TSurface current_surface = overlaps[0]; - MatchStructureResult s_result = current_surface->MatchesStructure(params); - if (s_result != MatchStructureResult::None && - current_surface->GetGpuAddr() == gpu_addr && - (params.target != SurfaceTarget::Texture3D || - current_surface->MatchTarget(params.target))) { - if (s_result == MatchStructureResult::FullMatch) { - return ManageStructuralMatch(current_surface, params); - } else { - return RebuildSurface(current_surface, params); - } - } if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } @@ -473,8 +482,10 @@ private: } void RegisterInnerCache(TSurface& surface) { - CacheAddr start = surface->GetCacheAddr() >> registry_page_bits; + const CacheAddr cache_addr = surface->GetCacheAddr(); + CacheAddr start = cache_addr >> registry_page_bits; const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; + l1_cache[cache_addr] = surface; while (start <= end) { registry[start].push_back(surface); start++; @@ -482,8 +493,10 @@ private: } void UnregisterInnerCache(TSurface& surface) { - CacheAddr start = surface->GetCacheAddr() >> registry_page_bits; + const CacheAddr cache_addr = surface->GetCacheAddr(); + CacheAddr start = cache_addr >> registry_page_bits; const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; + l1_cache.erase(cache_addr); while (start <= end) { auto& reg{registry[start]}; reg.erase(std::find(reg.begin(), reg.end(), surface)); @@ -559,6 +572,10 @@ private: static constexpr u64 registry_page_size{1 << registry_page_bits}; std::unordered_map> registry; + // The L1 Cache is used for fast texture lookup before checking the overlaps + // This avoids calculating size and other stuffs. + std::unordered_map l1_cache; + /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have /// previously been used. This is to prevent surfaces from being constantly created and /// destroyed when used with different surface parameters. -- cgit v1.2.3 From 94f2be5473182789ec3f6388b43fcd708a505500 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 10 May 2019 22:12:35 -0400 Subject: texture_cache: Optimize GetMipBlockHeight and GetMipBlockDepth --- src/video_core/texture_cache/surface_params.cpp | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 3a47f404d..e7e671d8c 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -5,6 +5,7 @@ #include #include "common/alignment.h" +#include "common/bit_util.h" #include "common/cityhash.h" #include "core/core.h" #include "video_core/engines/shader_bytecode.h" @@ -190,11 +191,8 @@ u32 SurfaceParams::GetMipBlockHeight(u32 level) const { const u32 height{GetMipHeight(level)}; const u32 default_block_height{GetDefaultBlockHeight()}; const u32 blocks_in_y{(height + default_block_height - 1) / default_block_height}; - u32 block_height = 4; - while (block_height > 0 && blocks_in_y <= (1U << block_height) * 4) { - --block_height; - } - return block_height; + const u32 block_height = Common::Log2Ceil32(blocks_in_y); + return std::clamp(block_height, 3U, 8U) - 3U; } u32 SurfaceParams::GetMipBlockDepth(u32 level) const { @@ -206,15 +204,10 @@ u32 SurfaceParams::GetMipBlockDepth(u32 level) const { } const u32 depth{GetMipDepth(level)}; - u32 block_depth = 5; - while (block_depth > 0 && depth * 2 <= (1U << block_depth)) { - --block_depth; - } - - if (block_depth == 5 && GetMipBlockHeight(level) >= 2) { - return 4; + const u32 block_depth = Common::Log2Ceil32(depth); + if (block_depth > 4) { + return 5 - (GetMipBlockHeight(level) >= 2); } - return block_depth; } -- cgit v1.2.3 From 5192521dc3f752c385de356158706899f523e498 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 10 May 2019 22:26:46 -0400 Subject: texture_cache: Implement GPU Dirty Flags --- src/video_core/texture_cache/texture_cache.h | 37 +++++++++++++++++----------- 1 file changed, 22 insertions(+), 15 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index ad0fbd7ce..8aa0d6515 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -81,17 +81,22 @@ public: if (!gpu_addr) { return {}; } - if (gpu_addr == 0x1b7ec0000) { - // __debugbreak(); - } const auto params{SurfaceParams::CreateForTexture(system, config, entry)}; return GetSurface(gpu_addr, params, true).second; } TView GetDepthBufferSurface(bool preserve_contents) { - const auto& regs{system.GPU().Maxwell3D().regs}; + auto& maxwell3d = system.GPU().Maxwell3D(); + + if (!maxwell3d.dirty_flags.zeta_buffer) { + return depth_buffer.view; + } + maxwell3d.dirty_flags.zeta_buffer = false; + + const auto& regs{maxwell3d.regs}; const auto gpu_addr{regs.zeta.Address()}; if (!gpu_addr || !regs.zeta_enable) { + SetEmptyDepthBuffer(); return {}; } const auto depth_params{SurfaceParams::CreateForDepthBuffer( @@ -101,6 +106,8 @@ public: auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents); if (depth_buffer.target) depth_buffer.target->MarkAsProtected(false); + depth_buffer.target = surface_view.first; + depth_buffer.view = surface_view.second; if (depth_buffer.target) depth_buffer.target->MarkAsProtected(true); return surface_view.second; @@ -108,8 +115,13 @@ public: TView GetColorBufferSurface(std::size_t index, bool preserve_contents) { ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); + auto& maxwell3d = system.GPU().Maxwell3D(); + if (!maxwell3d.dirty_flags.color_buffer[index]) { + return render_targets[index].view; + } + maxwell3d.dirty_flags.color_buffer.reset(index); - const auto& regs{system.GPU().Maxwell3D().regs}; + const auto& regs{maxwell3d.regs}; if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { SetEmptyColorBuffer(index); @@ -128,6 +140,7 @@ public: if (render_targets[index].target) render_targets[index].target->MarkAsProtected(false); render_targets[index].target = surface_view.first; + render_targets[index].view = surface_view.second; if (render_targets[index].target) render_targets[index].target->MarkAsProtected(true); return surface_view.second; @@ -154,7 +167,6 @@ public: void SetEmptyColorBuffer(std::size_t index) { if (render_targets[index].target != nullptr) { render_targets[index].target->MarkAsProtected(false); - std::memset(&render_targets[index].config, sizeof(RenderTargetConfig), 0); render_targets[index].target = nullptr; render_targets[index].view = nullptr; } @@ -545,13 +557,7 @@ private: return {}; } - struct RenderInfo { - RenderTargetConfig config; - TSurface target; - TView view; - }; - - struct DepthBufferInfo { + struct FramebufferTargetInfo { TSurface target; TView view; }; @@ -580,8 +586,9 @@ private: /// previously been used. This is to prevent surfaces from being constantly created and /// destroyed when used with different surface parameters. std::unordered_map> surface_reserve; - std::array render_targets; - DepthBufferInfo depth_buffer; + std::array + render_targets; + FramebufferTargetInfo depth_buffer; std::vector staging_buffer; }; -- cgit v1.2.3 From 1bbc9debfbcbd960874e2f877604506d174f613c Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 10 May 2019 23:42:08 -0400 Subject: Remove Framebuffer reconfiguration and restrict rendertarget protection --- src/video_core/texture_cache/surface_base.h | 15 +++++++++----- src/video_core/texture_cache/texture_cache.h | 30 +++++++++------------------- 2 files changed, 19 insertions(+), 26 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 095deb602..78db2d665 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -218,12 +218,12 @@ public: virtual void DownloadTexture(std::vector& staging_buffer) = 0; void MarkAsModified(const bool is_modified_, const u64 tick) { - is_modified = is_modified_ || is_protected; + is_modified = is_modified_ || is_target; modification_tick = tick; } - void MarkAsProtected(const bool is_protected) { - this->is_protected = is_protected; + void MarkAsRenderTarget(const bool is_target) { + this->is_target = is_target; } void MarkAsPicked(const bool is_picked) { @@ -235,7 +235,12 @@ public: } bool IsProtected() const { - return is_protected; + // Only 3D Slices are to be protected + return is_target && params.block_depth > 0; + } + + bool IsRenderTarget() const { + return is_target; } bool IsRegistered() const { @@ -307,7 +312,7 @@ private: } bool is_modified{}; - bool is_protected{}; + bool is_target{}; bool is_registered{}; bool is_picked{}; u64 modification_tick{}; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 8aa0d6515..4ac5668c8 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -105,11 +105,11 @@ public: regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents); if (depth_buffer.target) - depth_buffer.target->MarkAsProtected(false); + depth_buffer.target->MarkAsRenderTarget(false); depth_buffer.target = surface_view.first; depth_buffer.view = surface_view.second; if (depth_buffer.target) - depth_buffer.target->MarkAsProtected(true); + depth_buffer.target->MarkAsRenderTarget(true); return surface_view.second; } @@ -138,11 +138,11 @@ public: auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), preserve_contents); if (render_targets[index].target) - render_targets[index].target->MarkAsProtected(false); + render_targets[index].target->MarkAsRenderTarget(false); render_targets[index].target = surface_view.first; render_targets[index].view = surface_view.second; if (render_targets[index].target) - render_targets[index].target->MarkAsProtected(true); + render_targets[index].target->MarkAsRenderTarget(true); return surface_view.second; } @@ -158,7 +158,7 @@ public: void SetEmptyDepthBuffer() { if (depth_buffer.target != nullptr) { - depth_buffer.target->MarkAsProtected(false); + depth_buffer.target->MarkAsRenderTarget(false); depth_buffer.target = nullptr; depth_buffer.view = nullptr; } @@ -166,7 +166,7 @@ public: void SetEmptyColorBuffer(std::size_t index) { if (render_targets[index].target != nullptr) { - render_targets[index].target->MarkAsProtected(false); + render_targets[index].target->MarkAsRenderTarget(false); render_targets[index].target = nullptr; render_targets[index].view = nullptr; } @@ -200,12 +200,6 @@ public: return ++ticks; } - bool ConsumeReconfigurationFlag() { - const bool result = force_reconfiguration; - force_reconfiguration = false; - return result; - } - protected: TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) : system{system}, rasterizer{rasterizer} { @@ -242,8 +236,8 @@ protected: rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); } - void Unregister(TSurface surface, const bool force_unregister = false) { - if (surface->IsProtected() && !force_unregister) { + void Unregister(TSurface surface) { + if (surface->IsProtected()) { return; } const GPUVAddr gpu_addr = surface->GetGpuAddr(); @@ -392,10 +386,8 @@ private: std::min(src_params.height, dst_height), 1); ImageCopy(surface, new_surface, copy_params); } - force_reconfiguration = false; for (auto surface : overlaps) { - force_reconfiguration |= surface->IsProtected(); - Unregister(surface, true); + Unregister(surface); } new_surface->MarkAsModified(modified, Tick()); Register(new_surface); @@ -567,10 +559,6 @@ private: u64 ticks{}; - // Sometimes Setup Textures can hit a surface that's on the render target, when this happens - // we force a reconfiguration of the frame buffer after setup. - bool force_reconfiguration; - // The internal Cache is different for the Texture Cache. It's based on buckets // of 1MB. This fits better for the purpose of this cache as textures are normaly // large in size. -- cgit v1.2.3 From 07cc7e0c12143a84744abb8dc03eb46eb615b308 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 10 May 2019 23:50:01 -0400 Subject: texture_cache: Add ASync Protections --- src/video_core/texture_cache/texture_cache.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 4ac5668c8..1b8ada910 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -5,6 +5,7 @@ #pragma once #include +#include #include #include #include @@ -56,12 +57,16 @@ public: } void InvalidateRegion(CacheAddr addr, std::size_t size) { + std::lock_guard lock{mutex}; + for (const auto& surface : GetSurfacesInRegion(addr, size)) { Unregister(surface); } } void FlushRegion(CacheAddr addr, std::size_t size) { + std::lock_guard lock{mutex}; + auto surfaces = GetSurfacesInRegion(addr, size); if (surfaces.empty()) { return; @@ -220,6 +225,8 @@ protected: const Common::Rectangle& dst_rect) = 0; void Register(TSurface surface) { + std::lock_guard lock{mutex}; + const GPUVAddr gpu_addr = surface->GetGpuAddr(); const CacheAddr cache_ptr = ToCacheAddr(memory_manager->GetPointer(gpu_addr)); const std::size_t size = surface->GetSizeInBytes(); @@ -237,6 +244,8 @@ protected: } void Unregister(TSurface surface) { + std::lock_guard lock{mutex}; + if (surface->IsProtected()) { return; } @@ -579,6 +588,7 @@ private: FramebufferTargetInfo depth_buffer; std::vector staging_buffer; + std::recursive_mutex mutex; }; } // namespace VideoCommon -- cgit v1.2.3 From 2131f715730580dfeb692acdf3ae3e62ffd455c1 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 10 May 2019 23:02:14 -0300 Subject: surface_params: Optimize CreateForTexture Instead of using Common::AlignUp, use Common::AlignBits to align the texture compression factor. --- src/video_core/texture_cache/surface_params.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index e7e671d8c..6f39f8468 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -76,8 +76,10 @@ SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, params.type = GetFormatType(params.pixel_format); // TODO: on 1DBuffer we should use the tic info. params.target = TextureType2SurfaceTarget(entry.GetType(), entry.IsArray()); - params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format)); - params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format)); + params.width = + Common::AlignBits(config.tic.Width(), GetCompressionFactorShift(params.pixel_format)); + params.height = + Common::AlignBits(config.tic.Height(), GetCompressionFactorShift(params.pixel_format)); params.depth = config.tic.Depth(); if (params.target == SurfaceTarget::TextureCubemap || params.target == SurfaceTarget::TextureCubeArray) { -- cgit v1.2.3 From d65a4af89582f272efbbfd47d1ee78e616553312 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 11 May 2019 01:21:02 -0400 Subject: texture_cache return invalid buffer on deactivated color_mask --- src/video_core/texture_cache/texture_cache.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 1b8ada910..7058399e2 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -133,6 +133,11 @@ public: return {}; } + if (regs.color_mask[index].raw != 0) { + SetEmptyColorBuffer(index); + return {}; + } + const auto& config{regs.rt[index]}; const auto gpu_addr{config.Address()}; if (!gpu_addr) { -- cgit v1.2.3 From 9098905dd13bb68f2fe49a9590688b76cc999fdd Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 11 May 2019 03:15:49 -0300 Subject: gl_framebuffer_cache: Use a hashed struct to cache framebuffers --- src/video_core/texture_cache/texture_cache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 7058399e2..419c0de5e 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -133,7 +133,7 @@ public: return {}; } - if (regs.color_mask[index].raw != 0) { + if (regs.color_mask[index].raw == 0) { SetEmptyColorBuffer(index); return {}; } -- cgit v1.2.3 From c2ed348bddc1cd1bd97ce789d7855b1571e45ef4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 12 May 2019 18:31:03 -0300 Subject: surface_params: Ensure pitch is always written to avoid surface leaks --- src/video_core/texture_cache/surface_params.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 6f39f8468..8472b69dc 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -111,6 +111,7 @@ SurfaceParams SurfaceParams::CreateForDepthBuffer( params.unaligned_height = zeta_height; params.target = SurfaceTarget::Texture2D; params.depth = 1; + params.pitch = 0; params.num_levels = 1; params.is_layered = false; return params; @@ -131,6 +132,7 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz params.component_type = ComponentTypeFromRenderTarget(config.format); params.type = GetFormatType(params.pixel_format); if (params.is_tiled) { + params.pitch = 0; params.width = config.width; } else { const u32 bpp = GetFormatBpp(params.pixel_format) / CHAR_BIT; -- cgit v1.2.3 From 7731a0e2d15da04eea746b4b8dd5c6c4b29f9f29 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 12 May 2019 20:33:52 -0400 Subject: texture_cache: General Fixes Fixed ASTC mipmaps loading Fixed alignment on openGL upload/download Fixed Block Height Calculation Removed unalign_height --- src/video_core/texture_cache/surface_base.cpp | 18 +++++++-- src/video_core/texture_cache/surface_base.h | 4 ++ src/video_core/texture_cache/surface_params.cpp | 52 ++++++++++++++----------- src/video_core/texture_cache/surface_params.h | 27 +++++++++---- 4 files changed, 68 insertions(+), 33 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 510d1aef5..ceff51043 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -17,6 +17,7 @@ MICROPROFILE_DEFINE(GPU_Flush_Texture, "GPU", "Texture Flush", MP_RGB(128, 192, using Tegra::Texture::ConvertFromGuestToHost; using VideoCore::MortonSwizzleMode; +using VideoCore::Surface::SurfaceCompression; SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params) : params{params}, mipmap_sizes(params.num_levels), @@ -102,9 +103,20 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, } } - for (u32 level = 0; level < params.num_levels; ++level) { - const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)}; - ConvertFromGuestToHost(staging_buffer.data() + host_offset, params.pixel_format, + auto compression_type = params.GetCompressionType(); + if (compression_type == SurfaceCompression::None || + compression_type == SurfaceCompression::Compressed) + return; + + for (u32 level_up = params.num_levels; level_up > 0; --level_up) { + const u32 level = level_up - 1; + const std::size_t in_host_offset{params.GetHostMipmapLevelOffset(level)}; + const std::size_t out_host_offset = compression_type == SurfaceCompression::Rearranged + ? in_host_offset + : params.GetConvertedMipmapOffset(level); + u8* in_buffer = staging_buffer.data() + in_host_offset; + u8* out_buffer = staging_buffer.data() + out_host_offset; + ConvertFromGuestToHost(in_buffer, out_buffer, params.pixel_format, params.GetMipWidth(level), params.GetMipHeight(level), params.GetMipDepth(level), true, true); } diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 78db2d665..cb7f22706 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -93,6 +93,10 @@ public: return mipmap_sizes[level]; } + bool IsLinear() const { + return !params.is_tiled; + } + bool MatchFormat(VideoCore::Surface::PixelFormat pixel_format) const { return params.pixel_format == pixel_format; } diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 8472b69dc..d9d157d02 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -76,17 +76,14 @@ SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, params.type = GetFormatType(params.pixel_format); // TODO: on 1DBuffer we should use the tic info. params.target = TextureType2SurfaceTarget(entry.GetType(), entry.IsArray()); - params.width = - Common::AlignBits(config.tic.Width(), GetCompressionFactorShift(params.pixel_format)); - params.height = - Common::AlignBits(config.tic.Height(), GetCompressionFactorShift(params.pixel_format)); + params.width = config.tic.Width(); + params.height = config.tic.Height(); params.depth = config.tic.Depth(); if (params.target == SurfaceTarget::TextureCubemap || params.target == SurfaceTarget::TextureCubeArray) { params.depth *= 6; } params.pitch = params.is_tiled ? 0 : config.tic.Pitch(); - params.unaligned_height = config.tic.Height(); params.num_levels = config.tic.max_mip_level + 1; params.is_layered = params.IsLayered(); return params; @@ -108,7 +105,6 @@ SurfaceParams SurfaceParams::CreateForDepthBuffer( params.type = GetFormatType(params.pixel_format); params.width = zeta_width; params.height = zeta_height; - params.unaligned_height = zeta_height; params.target = SurfaceTarget::Texture2D; params.depth = 1; params.pitch = 0; @@ -141,7 +137,6 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz } params.height = config.height; params.depth = 1; - params.unaligned_height = config.height; params.target = SurfaceTarget::Texture2D; params.num_levels = 1; params.is_layered = false; @@ -164,7 +159,6 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface( params.width = config.width; params.height = config.height; params.pitch = config.pitch; - params.unaligned_height = config.height; // TODO(Rodrigo): Try to guess the surface target from depth and layer parameters params.target = SurfaceTarget::Texture2D; params.depth = 1; @@ -185,18 +179,18 @@ bool SurfaceParams::IsLayered() const { } } +// Auto block resizing algorithm from: +// https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c u32 SurfaceParams::GetMipBlockHeight(u32 level) const { - // Auto block resizing algorithm from: - // https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c if (level == 0) { return this->block_height; } - const u32 height{GetMipHeight(level)}; + const u32 height_new{GetMipHeight(level)}; const u32 default_block_height{GetDefaultBlockHeight()}; - const u32 blocks_in_y{(height + default_block_height - 1) / default_block_height}; - const u32 block_height = Common::Log2Ceil32(blocks_in_y); - return std::clamp(block_height, 3U, 8U) - 3U; + const u32 blocks_in_y{(height_new + default_block_height - 1) / default_block_height}; + const u32 block_height_new = Common::Log2Ceil32(blocks_in_y); + return std::clamp(block_height_new, 3U, 7U) - 3U; } u32 SurfaceParams::GetMipBlockDepth(u32 level) const { @@ -207,12 +201,12 @@ u32 SurfaceParams::GetMipBlockDepth(u32 level) const { return 0; } - const u32 depth{GetMipDepth(level)}; - const u32 block_depth = Common::Log2Ceil32(depth); - if (block_depth > 4) { + const u32 depth_new{GetMipDepth(level)}; + const u32 block_depth_new = Common::Log2Ceil32(depth_new); + if (block_depth_new > 4) { return 5 - (GetMipBlockHeight(level) >= 2); } - return block_depth; + return block_depth_new; } std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const { @@ -231,6 +225,14 @@ std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level) const { return offset; } +std::size_t SurfaceParams::GetConvertedMipmapOffset(u32 level) const { + std::size_t offset = 0; + for (u32 i = 0; i < level; i++) { + offset += GetConvertedMipmapSize(i); + } + return offset; +} + std::size_t SurfaceParams::GetGuestMipmapSize(u32 level) const { return GetInnerMipmapMemorySize(level, false, false); } @@ -239,6 +241,14 @@ std::size_t SurfaceParams::GetHostMipmapSize(u32 level) const { return GetInnerMipmapMemorySize(level, true, false) * GetNumLayers(); } +std::size_t SurfaceParams::GetConvertedMipmapSize(u32 level) const { + constexpr std::size_t rgb8_bpp = 4ULL; + const std::size_t width_t = GetMipWidth(level); + const std::size_t height_t = GetMipHeight(level); + const std::size_t depth_t = is_layered ? depth : GetMipDepth(level); + return width_t * height_t * depth_t * rgb8_bpp; +} + std::size_t SurfaceParams::GetGuestLayerSize() const { return GetLayerSize(false, false); } @@ -287,12 +297,10 @@ std::size_t SurfaceParams::Hash() const { bool SurfaceParams::operator==(const SurfaceParams& rhs) const { return std::tie(is_tiled, block_width, block_height, block_depth, tile_width_spacing, width, - height, depth, pitch, unaligned_height, num_levels, pixel_format, - component_type, type, target) == + height, depth, pitch, num_levels, pixel_format, component_type, type, target) == std::tie(rhs.is_tiled, rhs.block_width, rhs.block_height, rhs.block_depth, rhs.tile_width_spacing, rhs.width, rhs.height, rhs.depth, rhs.pitch, - rhs.unaligned_height, rhs.num_levels, rhs.pixel_format, rhs.component_type, - rhs.type, rhs.target); + rhs.num_levels, rhs.pixel_format, rhs.component_type, rhs.type, rhs.target); } std::string SurfaceParams::TargetName() const { diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index 7c48782c7..b3082173f 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -7,6 +7,7 @@ #include #include "common/alignment.h" +#include "common/bit_util.h" #include "common/common_types.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/maxwell_3d.h" @@ -16,6 +17,8 @@ namespace VideoCommon { +using VideoCore::Surface::SurfaceCompression; + class SurfaceParams { public: /// Creates SurfaceCachedParams from a texture configuration. @@ -50,17 +53,12 @@ public: std::size_t GetHostSizeInBytes() const { std::size_t host_size_in_bytes; - if (IsPixelFormatASTC(pixel_format)) { + if (GetCompressionType() == SurfaceCompression::Converted) { constexpr std::size_t rgb8_bpp = 4ULL; // ASTC is uncompressed in software, in emulated as RGBA8 host_size_in_bytes = 0; for (u32 level = 0; level < num_levels; ++level) { - const std::size_t width = - Common::AlignUp(GetMipWidth(level), GetDefaultBlockWidth()); - const std::size_t height = - Common::AlignUp(GetMipHeight(level), GetDefaultBlockHeight()); - const std::size_t depth = is_layered ? this->depth : GetMipDepth(level); - host_size_in_bytes += width * height * depth * rgb8_bpp; + host_size_in_bytes += GetConvertedMipmapSize(level); } } else { host_size_in_bytes = GetInnerMemorySize(true, false, false); @@ -93,6 +91,12 @@ public: /// Returns the block depth of a given mipmap level. u32 GetMipBlockDepth(u32 level) const; + u32 GetRowAlignment(u32 level) const { + const u32 bpp = + GetCompressionType() == SurfaceCompression::Converted ? 4 : GetBytesPerPixel(); + return 1U << Common::CountTrailingZeroes32(GetMipWidth(level) * bpp); + } + // Helper used for out of class size calculations static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height, const u32 block_depth) { @@ -106,12 +110,16 @@ public: /// Returns the offset in bytes in host memory (linear) of a given mipmap level. std::size_t GetHostMipmapLevelOffset(u32 level) const; + std::size_t GetConvertedMipmapOffset(u32 level) const; + /// Returns the size in bytes in guest memory of a given mipmap level. std::size_t GetGuestMipmapSize(u32 level) const; /// Returns the size in bytes in host memory (linear) of a given mipmap level. std::size_t GetHostMipmapSize(u32 level) const; + std::size_t GetConvertedMipmapSize(u32 level) const; + /// Returns the size of a layer in bytes in guest memory. std::size_t GetGuestLayerSize() const; @@ -141,6 +149,10 @@ public: /// Returns true if the pixel format is a depth and/or stencil format. bool IsPixelFormatZeta() const; + SurfaceCompression GetCompressionType() const { + return VideoCore::Surface::GetFormatCompressionType(pixel_format); + } + std::string TargetName() const; bool is_tiled; @@ -154,7 +166,6 @@ public: u32 height; u32 depth; u32 pitch; - u32 unaligned_height; u32 num_levels; VideoCore::Surface::PixelFormat pixel_format; VideoCore::Surface::ComponentType component_type; -- cgit v1.2.3 From a79831d9d02f7c42d82ea36210cac7952a3ef16e Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 13 May 2019 19:14:02 -0400 Subject: texture_cache: Implement Guard mechanism --- src/video_core/texture_cache/texture_cache.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 419c0de5e..2ad6210dd 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -64,6 +64,10 @@ public: } } + void Guard(bool new_guard) { + guard_cache = new_guard; + } + void FlushRegion(CacheAddr addr, std::size_t size) { std::lock_guard lock{mutex}; @@ -251,7 +255,7 @@ protected: void Unregister(TSurface surface) { std::lock_guard lock{mutex}; - if (surface->IsProtected()) { + if (guard_cache && surface->IsProtected()) { return; } const GPUVAddr gpu_addr = surface->GetGpuAddr(); @@ -573,6 +577,9 @@ private: u64 ticks{}; + // Guards the cache for protection conflicts. + bool guard_cache{}; + // The internal Cache is different for the Texture Cache. It's based on buckets // of 1MB. This fits better for the purpose of this cache as textures are normaly // large in size. -- cgit v1.2.3 From 4530511ee4dfc92ddbfed7f91978f332be517c90 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 13 May 2019 21:35:32 -0400 Subject: texture_cache: Try to Reconstruct Surface on bigger than overlap. This fixes clouds in SMO Cap Kingdom and lens on Cloud Kingdom. Also moved accurate_gpu setting check to Pick Strategy --- src/video_core/texture_cache/texture_cache.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 2ad6210dd..38b56475f 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -295,6 +295,9 @@ private: RecycleStrategy PickStrategy(std::vector& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, const bool untopological) { + if (Settings::values.use_accurate_gpu_emulation) { + return RecycleStrategy::Flush; + } // 3D Textures decision if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) { return RecycleStrategy::Flush; @@ -319,10 +322,7 @@ private: for (auto surface : overlaps) { Unregister(surface); } - RecycleStrategy strategy = !Settings::values.use_accurate_gpu_emulation - ? PickStrategy(overlaps, params, gpu_addr, untopological) - : RecycleStrategy::Flush; - switch (strategy) { + switch (PickStrategy(overlaps, params, gpu_addr, untopological)) { case RecycleStrategy::Ignore: { return InitializeSurface(gpu_addr, params, preserve_contents); } @@ -453,6 +453,13 @@ private: if (overlaps.size() == 1) { TSurface current_surface = overlaps[0]; if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { + if (current_surface->GetGpuAddr() == gpu_addr) { + std::optional> view = + ReconstructSurface(overlaps, params, gpu_addr, host_ptr); + if (view.has_value()) { + return *view; + } + } return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } std::optional view = -- cgit v1.2.3 From 6162cb922e67c6c529fb17a91da726fdf3444a50 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 13 May 2019 22:59:18 -0400 Subject: texture_cache: Document the most important methods. --- src/video_core/texture_cache/texture_cache.h | 95 +++++++++++++++++++++++++--- 1 file changed, 87 insertions(+), 8 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 38b56475f..04e9528b8 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -64,6 +64,10 @@ public: } } + /** + * `Guard` guarantees that rendertargets don't unregister themselves if the + * collide. Protection is currently only done on 3D slices. + **/ void Guard(bool new_guard) { guard_cache = new_guard; } @@ -293,6 +297,14 @@ private: BufferCopy = 3, }; + /** + * `PickStrategy` takes care of selecting a proper strategy to deal with a texture recycle. + * @param overlaps, the overlapping surfaces registered in the cache. + * @param params, the paremeters on the new surface. + * @param gpu_addr, the starting address of the new surface. + * @param untopological, tells the recycler that the texture has no way to match the overlaps + * due to topological reasons. + **/ RecycleStrategy PickStrategy(std::vector& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, const bool untopological) { if (Settings::values.use_accurate_gpu_emulation) { @@ -315,6 +327,18 @@ private: return RecycleStrategy::Ignore; } + /** + * `RecycleSurface` es a method we use to decide what to do with textures we can't resolve in + *the cache It has 2 implemented strategies: Ignore and Flush. Ignore just unregisters all the + *overlaps and loads the new texture. Flush, flushes all the overlaps into memory and loads the + *new surface from that data. + * @param overlaps, the overlapping surfaces registered in the cache. + * @param params, the paremeters on the new surface. + * @param gpu_addr, the starting address of the new surface. + * @param preserve_contents, tells if the new surface should be loaded from meory or left blank + * @param untopological, tells the recycler that the texture has no way to match the overlaps + * due to topological reasons. + **/ std::pair RecycleSurface(std::vector& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, const bool preserve_contents, @@ -343,6 +367,12 @@ private: } } + /** + * `RebuildSurface` this method takes a single surface and recreates into another that + * may differ in format, target or width alingment. + * @param current_surface, the registered surface in the cache which we want to convert. + * @param params, the new surface params which we'll use to recreate the surface. + **/ std::pair RebuildSurface(TSurface current_surface, const SurfaceParams& params) { const auto gpu_addr = current_surface->GetGpuAddr(); @@ -357,6 +387,14 @@ private: return {new_surface, new_surface->GetMainView()}; } + /** + * `ManageStructuralMatch` this method takes a single surface and checks with the new surface's + * params if it's an exact match, we return the main view of the registered surface. If it's + * formats don't match, we rebuild the surface. We call this last method a `Mirage`. If formats + * match but the targets don't, we create an overview View of the registered surface. + * @param current_surface, the registered surface in the cache which we want to convert. + * @param params, the new surface params which we want to check. + **/ std::pair ManageStructuralMatch(TSurface current_surface, const SurfaceParams& params) { const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); @@ -370,10 +408,18 @@ private: return {current_surface, current_surface->EmplaceOverview(params)}; } - std::optional> ReconstructSurface(std::vector& overlaps, - const SurfaceParams& params, - const GPUVAddr gpu_addr, - const u8* host_ptr) { + /** + * `TryReconstructSurface` unlike `RebuildSurface` where we know the registered surface + * matches the candidate in some way, we got no guarantess here. We try to see if the overlaps + * are sublayers/mipmaps of the new surface, if they all match we end up recreating a surface + * for them, else we return nothing. + * @param overlaps, the overlapping surfaces registered in the cache. + * @param params, the paremeters on the new surface. + * @param gpu_addr, the starting address of the new surface. + **/ + std::optional> TryReconstructSurface(std::vector& overlaps, + const SurfaceParams& params, + const GPUVAddr gpu_addr) { if (params.target == SurfaceTarget::Texture3D) { return {}; } @@ -412,12 +458,30 @@ private: return {{new_surface, new_surface->GetMainView()}}; } + /** + * `GetSurface` gets the starting address and parameters of a candidate surface and tries + * to find a matching surface within the cache. This is done in 3 big steps. The first is to + * check the 1st Level Cache in order to find an exact match, if we fail, we move to step 2. + * Step 2 is checking if there are any overlaps at all, if none, we just load the texture from + * memory else we move to step 3. Step 3 consists on figuring the relationship between the + * candidate texture and the overlaps. We divide the scenarios depending if there's 1 or many + * overlaps. If there's many, we just try to reconstruct a new surface out of them based on the + * candidate's parameters, if we fail, we recycle. When there's only 1 overlap then we have to + * check if the candidate is a view (layer/mipmap) of the overlap or if the registered surface + * is a mipmap/layer of the candidate. In this last case we reconstruct a new surface. + * @param gpu_addr, the starting address of the candidate surface. + * @param params, the paremeters on the candidate surface. + * @param preserve_contents, tells if the new surface should be loaded from meory or left blank. + **/ std::pair GetSurface(const GPUVAddr gpu_addr, const SurfaceParams& params, bool preserve_contents) { const auto host_ptr{memory_manager->GetPointer(gpu_addr)}; const auto cache_addr{ToCacheAddr(host_ptr)}; + // Step 1 + // Check Level 1 Cache for a fast structural match. If candidate surface + // matches at certain level we are pretty much done. if (l1_cache.count(cache_addr) > 0) { TSurface current_surface = l1_cache[cache_addr]; if (!current_surface->MatchesTopology(params)) { @@ -437,31 +501,43 @@ private: } } + // Step 2 + // Obtain all possible overlaps in the memory region const std::size_t candidate_size = params.GetGuestSizeInBytes(); auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)}; + // If none are found, we are done. we just load the surface and create it. if (overlaps.empty()) { return InitializeSurface(gpu_addr, params, preserve_contents); } + // Step 3 + // Now we need to figure the relationship between the texture and its overlaps + // we do a topological test to ensure we can find some relationship. If it fails + // inmediatly recycle the texture for (auto surface : overlaps) { if (!surface->MatchesTopology(params)) { return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, true); } } + // Split cases between 1 overlap or many. if (overlaps.size() == 1) { TSurface current_surface = overlaps[0]; + // First check if the surface is within the overlap. If not, it means + // two things either the candidate surface is a supertexture of the overlap + // or they don't match in any known way. if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { if (current_surface->GetGpuAddr() == gpu_addr) { std::optional> view = - ReconstructSurface(overlaps, params, gpu_addr, host_ptr); + TryReconstructSurface(overlaps, params, gpu_addr); if (view.has_value()) { return *view; } } return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } + // Now we check if the candidate is a mipmap/layer of the overlap std::optional view = current_surface->EmplaceView(params, gpu_addr, candidate_size); if (view.has_value()) { @@ -472,15 +548,18 @@ private: } return {current_surface, *view}; } - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } else { + // If there are many overlaps, odds are they are subtextures of the candidate + // surface. We try to construct a new surface based on the candidate parameters, + // using the overlaps. If a single overlap fails, this will fail. std::optional> view = - ReconstructSurface(overlaps, params, gpu_addr, host_ptr); + TryReconstructSurface(overlaps, params, gpu_addr); if (view.has_value()) { return *view; } - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } + // We failed all the tests, recycle the overlaps into a new texture. + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } std::pair InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params, -- cgit v1.2.3 From d267948a73d2364949660a24d07833ea05c9fcc8 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 14 May 2019 00:55:32 -0400 Subject: texture_cache: loose TryReconstructSurface when accurate GPU is not on. Also corrects some asserts. --- src/video_core/texture_cache/surface_base.cpp | 2 +- src/video_core/texture_cache/texture_cache.h | 20 ++++++++++++++++++-- 2 files changed, 19 insertions(+), 3 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index ceff51043..d4aa2c54b 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -130,7 +130,7 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, return; } if (params.is_tiled) { - ASSERT_MSG(params.block_width == 1, "Block width is defined as {}", params.block_width); + ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width); for (u32 level = 0; level < params.num_levels; ++level) { const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)}; SwizzleFunc(MortonSwizzleMode::LinearToMorton, host_ptr, params, diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 04e9528b8..85c9160e0 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -425,6 +425,7 @@ private: } bool modified = false; TSurface new_surface = GetUncachedSurface(gpu_addr, params); + u32 passed_tests = 0; for (auto surface : overlaps) { const SurfaceParams& src_params = surface->GetSurfaceParams(); if (src_params.is_layered || src_params.num_levels > 1) { @@ -434,12 +435,12 @@ private: const std::size_t candidate_size = surface->GetSizeInBytes(); auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; if (!mipmap_layer) { - return {}; + continue; } const u32 layer{mipmap_layer->first}; const u32 mipmap{mipmap_layer->second}; if (new_surface->GetMipmapSize(mipmap) != candidate_size) { - return {}; + continue; } modified |= surface->IsModified(); // Now we got all the data set up @@ -448,8 +449,15 @@ private: const CopyParams copy_params(0, 0, 0, 0, 0, layer, 0, mipmap, std::min(src_params.width, dst_width), std::min(src_params.height, dst_height), 1); + passed_tests++; ImageCopy(surface, new_surface, copy_params); } + if (passed_tests == 0) { + return {}; + // In Accurate GPU all test should pass, else we recycle + } else if (Settings::values.use_accurate_gpu_emulation && passed_tests != overlaps.size()) { + return {}; + } for (auto surface : overlaps) { Unregister(surface); } @@ -548,6 +556,14 @@ private: } return {current_surface, *view}; } + // The next case is unsafe, so if we r in accurate GPU, just skip it + if (Settings::values.use_accurate_gpu_emulation) { + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); + } + // This is the case the texture is a part of the parent. + if (current_surface->MatchesSubTexture(params, gpu_addr)) { + return RebuildSurface(current_surface, params); + } } else { // If there are many overlaps, odds are they are subtextures of the candidate // surface. We try to construct a new surface based on the candidate parameters, -- cgit v1.2.3 From 175aa343ff1c9f931b266caf2d19b8df943dab0d Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 18 May 2019 04:57:49 -0400 Subject: texture_cache: Fermi2D reform and implement View Mirage This also does some fixes on compressed textures reinterpret and on the Fermi2D engine in general. --- src/video_core/texture_cache/surface_base.h | 15 ++++++---- src/video_core/texture_cache/surface_params.h | 14 ++++++++++ src/video_core/texture_cache/texture_cache.h | 40 +++++++++++++++++---------- 3 files changed, 49 insertions(+), 20 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index cb7f22706..a3dd1c607 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -126,14 +126,19 @@ public: return MatchStructureResult::None; } // Tiled surface - if (std::tie(params.height, params.depth, params.block_width, params.block_height, - params.block_depth, params.tile_width_spacing, params.num_levels) == - std::tie(rhs.height, rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth, + if (std::tie(params.depth, params.block_width, params.block_height, params.block_depth, + params.tile_width_spacing, params.num_levels) == + std::tie(rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth, rhs.tile_width_spacing, rhs.num_levels)) { - if (params.width == rhs.width) { + if (std::tie(params.width, params.height) == std::tie(rhs.width, rhs.height)) { return MatchStructureResult::FullMatch; } - if (params.GetBlockAlignedWidth() == rhs.GetBlockAlignedWidth()) { + const u32 ws = SurfaceParams::ConvertWidth(rhs.GetBlockAlignedWidth(), + params.pixel_format, rhs.pixel_format); + const u32 hs = + SurfaceParams::ConvertHeight(rhs.height, params.pixel_format, rhs.pixel_format); + const u32 w1 = params.GetBlockAlignedWidth(); + if (std::tie(w1, params.height) == std::tie(ws, hs)) { return MatchStructureResult::SemiMatch; } } diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index b3082173f..13a08a60f 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -126,6 +126,20 @@ public: /// Returns the size of a layer in bytes in host memory for a given mipmap level. std::size_t GetHostLayerSize(u32 level) const; + static u32 ConvertWidth(u32 width, VideoCore::Surface::PixelFormat pixel_format_from, + VideoCore::Surface::PixelFormat pixel_format_to) { + const u32 bw1 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_from); + const u32 bw2 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_to); + return (width * bw2 + bw1 - 1) / bw1; + } + + static u32 ConvertHeight(u32 height, VideoCore::Surface::PixelFormat pixel_format_from, + VideoCore::Surface::PixelFormat pixel_format_to) { + const u32 bh1 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_from); + const u32 bh2 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_to); + return (height * bh2 + bh1 - 1) / bh1; + } + /// Returns the default block width. u32 GetDefaultBlockWidth() const { return VideoCore::Surface::GetDefaultBlockWidth(pixel_format); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 85c9160e0..593ceeaf6 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -141,11 +141,6 @@ public: return {}; } - if (regs.color_mask[index].raw == 0) { - SetEmptyColorBuffer(index); - return {}; - } - const auto& config{regs.rt[index]}; const auto gpu_addr{config.Address()}; if (!gpu_addr) { @@ -192,11 +187,11 @@ public: void DoFermiCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, - const Common::Rectangle& src_rect, - const Common::Rectangle& dst_rect) { - TSurface dst_surface = GetFermiSurface(dst_config); - ImageBlit(GetFermiSurface(src_config), dst_surface, src_rect, dst_rect); - dst_surface->MarkAsModified(true, Tick()); + const Tegra::Engines::Fermi2D::Config& copy_config) { + std::pair dst_surface = GetFermiSurface(dst_config); + std::pair src_surface = GetFermiSurface(src_config); + ImageBlit(src_surface.second, dst_surface.second, copy_config); + dst_surface.first->MarkAsModified(true, Tick()); } TSurface TryFindFramebufferSurface(const u8* host_ptr) { @@ -234,8 +229,8 @@ protected: virtual void ImageCopy(TSurface src_surface, TSurface dst_surface, const CopyParams& copy_params) = 0; - virtual void ImageBlit(TSurface src, TSurface dst, const Common::Rectangle& src_rect, - const Common::Rectangle& dst_rect) = 0; + virtual void ImageBlit(TView src_view, TView dst_view, + const Tegra::Engines::Fermi2D::Config& copy_config) = 0; void Register(TSurface surface) { std::lock_guard lock{mutex}; @@ -282,10 +277,11 @@ protected: return new_surface; } - TSurface GetFermiSurface(const Tegra::Engines::Fermi2D::Regs::Surface& config) { + std::pair GetFermiSurface( + const Tegra::Engines::Fermi2D::Regs::Surface& config) { SurfaceParams params = SurfaceParams::CreateForFermiCopySurface(config); const GPUVAddr gpu_addr = config.Address(); - return GetSurface(gpu_addr, params, true).first; + return GetSurface(gpu_addr, params, true); } Core::System& system; @@ -551,7 +547,21 @@ private: if (view.has_value()) { const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); if (is_mirage) { - LOG_CRITICAL(HW_GPU, "Mirage View Unsupported"); + // On a mirage view, we need to recreate the surface under this new view + // and then obtain a view again. + SurfaceParams new_params = current_surface->GetSurfaceParams(); + const u32 wh = SurfaceParams::ConvertWidth( + new_params.width, new_params.pixel_format, params.pixel_format); + const u32 hh = SurfaceParams::ConvertHeight( + new_params.height, new_params.pixel_format, params.pixel_format); + new_params.width = wh; + new_params.height = hh; + new_params.pixel_format = params.pixel_format; + std::pair pair = RebuildSurface(current_surface, new_params); + std::optional mirage_view = + pair.first->EmplaceView(params, gpu_addr, candidate_size); + if (mirage_view) + return {pair.first, *mirage_view}; return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } return {current_surface, *view}; -- cgit v1.2.3 From fcac55d5bff025fee822c2e7b0e06cdc178143dc Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 21 May 2019 07:56:53 -0400 Subject: texture_cache: Add checks for texture buffers. --- src/video_core/texture_cache/surface_base.h | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index a3dd1c607..210f27907 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -114,10 +114,23 @@ public: bool MatchesTopology(const SurfaceParams& rhs) const { const u32 src_bpp{params.GetBytesPerPixel()}; const u32 dst_bpp{rhs.GetBytesPerPixel()}; - return std::tie(src_bpp, params.is_tiled) == std::tie(dst_bpp, rhs.is_tiled); + const bool ib1 = params.IsBuffer(); + const bool ib2 = rhs.IsBuffer(); + return std::tie(src_bpp, params.is_tiled, ib1) == std::tie(dst_bpp, rhs.is_tiled, ib2); } MatchStructureResult MatchesStructure(const SurfaceParams& rhs) const { + // Buffer surface Check + if (params.IsBuffer()) { + const std::size_t wd1 = params.width*params.GetBytesPerPixel(); + const std::size_t wd2 = rhs.width*rhs.GetBytesPerPixel(); + if (wd1 == wd2) { + return MatchStructureResult::FullMatch; + } + return MatchStructureResult::None; + } + + // Linear Surface check if (!params.is_tiled) { if (std::tie(params.width, params.height, params.pitch) == std::tie(rhs.width, rhs.height, rhs.pitch)) { @@ -125,7 +138,8 @@ public: } return MatchStructureResult::None; } - // Tiled surface + + // Tiled Surface check if (std::tie(params.depth, params.block_width, params.block_height, params.block_depth, params.tile_width_spacing, params.num_levels) == std::tie(rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth, -- cgit v1.2.3 From e60ed2bb3e7e4ce63cc263019cce72a080c536ed Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 21 May 2019 08:36:00 -0400 Subject: texture_cache: return null surface on invalid address --- src/video_core/texture_cache/texture_cache.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 593ceeaf6..24c87127d 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -483,6 +483,18 @@ private: const auto host_ptr{memory_manager->GetPointer(gpu_addr)}; const auto cache_addr{ToCacheAddr(host_ptr)}; + // Step 0: guarantee a valid surface + if (!cache_addr) { + // Return a null surface if it's invalid + SurfaceParams new_params = params; + new_params.width = 1; + new_params.height = 1; + new_params.depth = 1; + new_params.block_height = 0; + new_params.block_depth = 0; + return InitializeSurface(gpu_addr, new_params, false); + } + // Step 1 // Check Level 1 Cache for a fast structural match. If candidate surface // matches at certain level we are pretty much done. -- cgit v1.2.3 From bdf9faab331cd79ca5c5e51c2369fc801e8cecea Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 21 May 2019 11:24:20 -0400 Subject: texture_cache: Handle uncontinuous surfaces. --- src/video_core/texture_cache/surface_base.cpp | 47 ++++++++++++++++++++++----- src/video_core/texture_cache/surface_base.h | 34 ++++++++++++++++--- src/video_core/texture_cache/texture_cache.h | 17 ++++++---- 3 files changed, 79 insertions(+), 19 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index d4aa2c54b..7e90960f7 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -68,12 +68,27 @@ void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, const Surf } void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, - std::vector& staging_buffer) { + StagingCache& staging_cache) { MICROPROFILE_SCOPE(GPU_Load_Texture); - const auto host_ptr{memory_manager.GetPointer(gpu_addr)}; - if (!host_ptr) { - return; + auto& staging_buffer = staging_cache.GetBuffer(0); + u8* host_ptr; + is_continuous = memory_manager.IsBlockContinuous(gpu_addr, guest_memory_size); + + // Handle continuouty + if (is_continuous) { + // Use physical memory directly + host_ptr = memory_manager.GetPointer(gpu_addr); + if (!host_ptr) { + return; + } + } else { + // Use an extra temporal buffer + auto& tmp_buffer = staging_cache.GetBuffer(1); + tmp_buffer.resize(guest_memory_size); + host_ptr = tmp_buffer.data(); + memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); } + if (params.is_tiled) { ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}", params.block_width, static_cast(params.target)); @@ -123,12 +138,25 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, } void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, - std::vector& staging_buffer) { + StagingCache& staging_cache) { MICROPROFILE_SCOPE(GPU_Flush_Texture); - const auto host_ptr{memory_manager.GetPointer(gpu_addr)}; - if (!host_ptr) { - return; + auto& staging_buffer = staging_cache.GetBuffer(0); + u8* host_ptr; + + // Handle continuouty + if (is_continuous) { + // Use physical memory directly + host_ptr = memory_manager.GetPointer(gpu_addr); + if (!host_ptr) { + return; + } + } else { + // Use an extra temporal buffer + auto& tmp_buffer = staging_cache.GetBuffer(1); + tmp_buffer.resize(guest_memory_size); + host_ptr = tmp_buffer.data(); } + if (params.is_tiled) { ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width); for (u32 level = 0; level < params.num_levels; ++level) { @@ -154,6 +182,9 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, } } } + if (!is_continuous) { + memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); + } } } // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 210f27907..dacbc97c7 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -32,11 +32,28 @@ enum class MatchStructureResult : u32 { None = 2, }; +class StagingCache { +public: + StagingCache() {} + ~StagingCache() = default; + + std::vector& GetBuffer(std::size_t index) { + return staging_buffer[index]; + } + + void SetSize(std::size_t size) { + staging_buffer.resize(size); + } + +private: + std::vector> staging_buffer; +}; + class SurfaceBaseImpl { public: - void LoadBuffer(Tegra::MemoryManager& memory_manager, std::vector& staging_buffer); + void LoadBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache); - void FlushBuffer(Tegra::MemoryManager& memory_manager, std::vector& staging_buffer); + void FlushBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache); GPUVAddr GetGpuAddr() const { return gpu_addr; @@ -93,6 +110,14 @@ public: return mipmap_sizes[level]; } + void MarkAsContinuous(const bool is_continuous) { + this->is_continuous = is_continuous; + } + + bool IsContinuous() const { + return is_continuous; + } + bool IsLinear() const { return !params.is_tiled; } @@ -122,8 +147,8 @@ public: MatchStructureResult MatchesStructure(const SurfaceParams& rhs) const { // Buffer surface Check if (params.IsBuffer()) { - const std::size_t wd1 = params.width*params.GetBytesPerPixel(); - const std::size_t wd2 = rhs.width*rhs.GetBytesPerPixel(); + const std::size_t wd1 = params.width * params.GetBytesPerPixel(); + const std::size_t wd2 = rhs.width * rhs.GetBytesPerPixel(); if (wd1 == wd2) { return MatchStructureResult::FullMatch; } @@ -193,6 +218,7 @@ protected: CacheAddr cache_addr{}; CacheAddr cache_addr_end{}; VAddr cpu_addr{}; + bool is_continuous{}; std::vector mipmap_sizes; std::vector mipmap_offsets; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 24c87127d..ab4e094ea 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -220,6 +220,7 @@ protected: SetEmptyColorBuffer(i); } SetEmptyDepthBuffer(); + staging_cache.SetSize(2); } ~TextureCache() = default; @@ -244,6 +245,8 @@ protected: gpu_addr); return; } + bool continuouty = memory_manager->IsBlockContinuous(gpu_addr, size); + surface->MarkAsContinuous(continuouty); surface->SetCacheAddr(cache_ptr); surface->SetCpuAddr(*cpu_addr); RegisterInnerCache(surface); @@ -611,9 +614,9 @@ private: } void LoadSurface(const TSurface& surface) { - staging_buffer.resize(surface->GetHostSizeInBytes()); - surface->LoadBuffer(*memory_manager, staging_buffer); - surface->UploadTexture(staging_buffer); + staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); + surface->LoadBuffer(*memory_manager, staging_cache); + surface->UploadTexture(staging_cache.GetBuffer(0)); surface->MarkAsModified(false, Tick()); } @@ -621,9 +624,9 @@ private: if (!surface->IsModified()) { return; } - staging_buffer.resize(surface->GetHostSizeInBytes()); - surface->DownloadTexture(staging_buffer); - surface->FlushBuffer(*memory_manager, staging_buffer); + staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); + surface->DownloadTexture(staging_cache.GetBuffer(0)); + surface->FlushBuffer(*memory_manager, staging_cache); surface->MarkAsModified(false, Tick()); } @@ -723,7 +726,7 @@ private: render_targets; FramebufferTargetInfo depth_buffer; - std::vector staging_buffer; + StagingCache staging_cache; std::recursive_mutex mutex; }; -- cgit v1.2.3 From ea1525dab1bf7e9e56471b6d5fd50014bfeb4f96 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 21 May 2019 12:48:28 -0400 Subject: Fix rebase errors --- src/video_core/texture_cache/surface_params.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index 13a08a60f..d9aa0b521 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -167,6 +167,10 @@ public: return VideoCore::Surface::GetFormatCompressionType(pixel_format); } + bool IsBuffer() const { + return target == VideoCore::Surface::SurfaceTarget::TextureBuffer; + } + std::string TargetName() const; bool is_tiled; -- cgit v1.2.3 From 0966665fc225eee29b3ed87baefd74f79c19d307 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 22 May 2019 12:30:53 -0400 Subject: texture_cache: Only load on recycle with accurate GPU. Testing so far has proven this to be quite safe as texture memory read added a 2-5ms load to the current cache. --- src/video_core/texture_cache/texture_cache.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index ab4e094ea..685bd28f4 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -342,12 +342,13 @@ private: const SurfaceParams& params, const GPUVAddr gpu_addr, const bool preserve_contents, const bool untopological) { + const bool do_load = Settings::values.use_accurate_gpu_emulation && preserve_contents; for (auto surface : overlaps) { Unregister(surface); } switch (PickStrategy(overlaps, params, gpu_addr, untopological)) { case RecycleStrategy::Ignore: { - return InitializeSurface(gpu_addr, params, preserve_contents); + return InitializeSurface(gpu_addr, params, do_load); } case RecycleStrategy::Flush: { std::sort(overlaps.begin(), overlaps.end(), @@ -361,7 +362,7 @@ private: } default: { UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!"); - return InitializeSurface(gpu_addr, params, preserve_contents); + return InitializeSurface(gpu_addr, params, do_load); } } } -- cgit v1.2.3 From 92513541529e90f4f79a1f2c3f8ccf5a199e4c20 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 24 May 2019 11:59:23 -0400 Subject: texture_cache: Correct copying between compressed and uncompressed formats --- src/video_core/texture_cache/surface_base.h | 9 ++++----- src/video_core/texture_cache/surface_params.h | 20 ++++++++++++++++++++ src/video_core/texture_cache/texture_cache.h | 8 +++----- 3 files changed, 27 insertions(+), 10 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index dacbc97c7..77c2d6758 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -235,9 +235,8 @@ private: for (u32 layer = 0; layer < layers; layer++) { for (u32 level = 0; level < mipmaps; level++) { - const u32 width{std::min(params.GetMipWidth(level), in_params.GetMipWidth(level))}; - const u32 height{ - std::min(params.GetMipHeight(level), in_params.GetMipHeight(level))}; + const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level); + const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level); result.emplace_back(width, height, layer, level); } } @@ -250,8 +249,8 @@ private: result.reserve(mipmaps); for (u32 level = 0; level < mipmaps; level++) { - const u32 width{std::min(params.GetMipWidth(level), in_params.GetMipWidth(level))}; - const u32 height{std::min(params.GetMipHeight(level), in_params.GetMipHeight(level))}; + const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level); + const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level); const u32 depth{std::min(params.GetMipDepth(level), in_params.GetMipDepth(level))}; result.emplace_back(width, height, depth, level); } diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index d9aa0b521..c3affd621 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -140,6 +140,26 @@ public: return (height * bh2 + bh1 - 1) / bh1; } + // this finds the maximun possible width between 2 2D layers of different formats + static u32 IntersectWidth(const SurfaceParams& src_params, const SurfaceParams& dst_params, + const u32 src_level, const u32 dst_level) { + const u32 bw1 = src_params.GetDefaultBlockWidth(); + const u32 bw2 = dst_params.GetDefaultBlockWidth(); + const u32 t_src_width = (src_params.GetMipWidth(src_level) * bw2 + bw1 - 1) / bw1; + const u32 t_dst_width = (dst_params.GetMipWidth(dst_level) * bw1 + bw2 - 1) / bw2; + return std::min(t_src_width, t_dst_width); + } + + // this finds the maximun possible height between 2 2D layers of different formats + static u32 IntersectHeight(const SurfaceParams& src_params, const SurfaceParams& dst_params, + const u32 src_level, const u32 dst_level) { + const u32 bh1 = src_params.GetDefaultBlockHeight(); + const u32 bh2 = dst_params.GetDefaultBlockHeight(); + const u32 t_src_height = (src_params.GetMipHeight(src_level) * bh2 + bh1 - 1) / bh1; + const u32 t_dst_height = (dst_params.GetMipHeight(dst_level) * bh1 + bh2 - 1) / bh2; + return std::min(t_src_height, t_dst_height); + } + /// Returns the default block width. u32 GetDefaultBlockWidth() const { return VideoCore::Surface::GetDefaultBlockWidth(pixel_format); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 685bd28f4..d2093e581 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -444,11 +444,9 @@ private: } modified |= surface->IsModified(); // Now we got all the data set up - const u32 dst_width{params.GetMipWidth(mipmap)}; - const u32 dst_height{params.GetMipHeight(mipmap)}; - const CopyParams copy_params(0, 0, 0, 0, 0, layer, 0, mipmap, - std::min(src_params.width, dst_width), - std::min(src_params.height, dst_height), 1); + const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap); + const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap); + const CopyParams copy_params(0, 0, 0, 0, 0, layer, 0, mipmap, width, height, 1); passed_tests++; ImageCopy(surface, new_surface, copy_params); } -- cgit v1.2.3 From 228f516bb4426a41a4d1c1756751557f7a0eecda Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 24 May 2019 15:34:31 -0400 Subject: texture_cache uncompress-compress is untopological. This makes conflicts between non compress and compress textures to be auto recycled. It also limits the amount of mipmaps a texture can have if it goes above it's limit. --- src/video_core/texture_cache/surface_base.h | 18 +++++++++++++++-- src/video_core/texture_cache/surface_params.cpp | 4 ++++ src/video_core/texture_cache/surface_params.h | 14 +++++++++++++ src/video_core/texture_cache/texture_cache.h | 26 +++++++++++++------------ 4 files changed, 48 insertions(+), 14 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 77c2d6758..70b5258c9 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -32,6 +32,12 @@ enum class MatchStructureResult : u32 { None = 2, }; +enum class MatchTopologyResult : u32 { + FullMatch = 0, + CompressUnmatch = 1, + None = 2, +}; + class StagingCache { public: StagingCache() {} @@ -136,12 +142,20 @@ public: params.target == SurfaceTarget::Texture2D && params.num_levels == 1; } - bool MatchesTopology(const SurfaceParams& rhs) const { + MatchTopologyResult MatchesTopology(const SurfaceParams& rhs) const { const u32 src_bpp{params.GetBytesPerPixel()}; const u32 dst_bpp{rhs.GetBytesPerPixel()}; const bool ib1 = params.IsBuffer(); const bool ib2 = rhs.IsBuffer(); - return std::tie(src_bpp, params.is_tiled, ib1) == std::tie(dst_bpp, rhs.is_tiled, ib2); + if (std::tie(src_bpp, params.is_tiled, ib1) == std::tie(dst_bpp, rhs.is_tiled, ib2)) { + const bool cb1 = params.IsCompressed(); + const bool cb2 = rhs.IsCompressed(); + if (cb1 == cb2) { + return MatchTopologyResult::FullMatch; + } + return MatchTopologyResult::CompressUnmatch; + } + return MatchTopologyResult::None; } MatchStructureResult MatchesStructure(const SurfaceParams& rhs) const { diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index d9d157d02..77c09264a 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -85,6 +85,7 @@ SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, } params.pitch = params.is_tiled ? 0 : config.tic.Pitch(); params.num_levels = config.tic.max_mip_level + 1; + params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap()); params.is_layered = params.IsLayered(); return params; } @@ -109,6 +110,7 @@ SurfaceParams SurfaceParams::CreateForDepthBuffer( params.depth = 1; params.pitch = 0; params.num_levels = 1; + params.emulated_levels = 1; params.is_layered = false; return params; } @@ -139,6 +141,7 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz params.depth = 1; params.target = SurfaceTarget::Texture2D; params.num_levels = 1; + params.emulated_levels = 1; params.is_layered = false; return params; } @@ -163,6 +166,7 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface( params.target = SurfaceTarget::Texture2D; params.depth = 1; params.num_levels = 1; + params.emulated_levels = 1; params.is_layered = params.IsLayered(); return params; } diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index c3affd621..5fde695b6 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -160,6 +160,19 @@ public: return std::min(t_src_height, t_dst_height); } + u32 MaxPossibleMipmap() const { + const u32 max_mipmap_w = Common::Log2Ceil32(width) + 1U; + const u32 max_mipmap_h = Common::Log2Ceil32(height) + 1U; + const u32 max_mipmap = std::max(max_mipmap_w, max_mipmap_h); + if (target != VideoCore::Surface::SurfaceTarget::Texture3D) + return max_mipmap; + return std::max(max_mipmap, Common::Log2Ceil32(depth) + 1U); + } + + bool IsCompressed() const { + return GetDefaultBlockHeight() > 1 || GetDefaultBlockWidth() > 1; + } + /// Returns the default block width. u32 GetDefaultBlockWidth() const { return VideoCore::Surface::GetDefaultBlockWidth(pixel_format); @@ -205,6 +218,7 @@ public: u32 depth; u32 pitch; u32 num_levels; + u32 emulated_levels; VideoCore::Surface::PixelFormat pixel_format; VideoCore::Surface::ComponentType component_type; VideoCore::Surface::SurfaceType type; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index d2093e581..69ef7a2bd 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -305,7 +305,7 @@ private: * due to topological reasons. **/ RecycleStrategy PickStrategy(std::vector& overlaps, const SurfaceParams& params, - const GPUVAddr gpu_addr, const bool untopological) { + const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { if (Settings::values.use_accurate_gpu_emulation) { return RecycleStrategy::Flush; } @@ -320,8 +320,8 @@ private: } } // Untopological decision - if (untopological) { - return RecycleStrategy::Ignore; + if (untopological == MatchTopologyResult::CompressUnmatch) { + return RecycleStrategy::Flush; } return RecycleStrategy::Ignore; } @@ -341,7 +341,7 @@ private: std::pair RecycleSurface(std::vector& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, const bool preserve_contents, - const bool untopological) { + const MatchTopologyResult untopological) { const bool do_load = Settings::values.use_accurate_gpu_emulation && preserve_contents; for (auto surface : overlaps) { Unregister(surface); @@ -502,9 +502,10 @@ private: // matches at certain level we are pretty much done. if (l1_cache.count(cache_addr) > 0) { TSurface current_surface = l1_cache[cache_addr]; - if (!current_surface->MatchesTopology(params)) { + auto topological_result = current_surface->MatchesTopology(params); + if (topological_result != MatchTopologyResult::FullMatch) { std::vector overlaps{current_surface}; - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, true); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, topological_result); } MatchStructureResult s_result = current_surface->MatchesStructure(params); if (s_result != MatchStructureResult::None && @@ -534,8 +535,9 @@ private: // we do a topological test to ensure we can find some relationship. If it fails // inmediatly recycle the texture for (auto surface : overlaps) { - if (!surface->MatchesTopology(params)) { - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, true); + auto topological_result = surface->MatchesTopology(params); + if (topological_result != MatchTopologyResult::FullMatch) { + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, topological_result); } } @@ -553,7 +555,7 @@ private: return *view; } } - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); } // Now we check if the candidate is a mipmap/layer of the overlap std::optional view = @@ -576,13 +578,13 @@ private: pair.first->EmplaceView(params, gpu_addr, candidate_size); if (mirage_view) return {pair.first, *mirage_view}; - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); } return {current_surface, *view}; } // The next case is unsafe, so if we r in accurate GPU, just skip it if (Settings::values.use_accurate_gpu_emulation) { - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); } // This is the case the texture is a part of the parent. if (current_surface->MatchesSubTexture(params, gpu_addr)) { @@ -599,7 +601,7 @@ private: } } // We failed all the tests, recycle the overlaps into a new texture. - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); } std::pair InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params, -- cgit v1.2.3 From 60bf761afbb125abd324e4b798d18a1611b5777b Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 1 Jun 2019 19:12:00 -0400 Subject: texture_cache: Implement Buffer Copy and detect Turing GPUs Image Copies --- src/video_core/texture_cache/texture_cache.h | 40 +++++++++++++++++++++------- 1 file changed, 31 insertions(+), 9 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 69ef7a2bd..e0d0e1f70 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -214,6 +214,13 @@ public: } protected: + // This structure is used for communicating with the backend, on which behaviors + // it supports and what not, to avoid assuming certain things about hardware. + // The backend is RESPONSIBLE for filling this settings on creation. + struct Support { + bool depth_color_image_copies; + } support_info; + TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) : system{system}, rasterizer{rasterizer} { for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { @@ -233,6 +240,10 @@ protected: virtual void ImageBlit(TView src_view, TView dst_view, const Tegra::Engines::Fermi2D::Config& copy_config) = 0; + // Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture + // and reading it from a sepparate buffer. + virtual void BufferCopy(TSurface src_surface, TSurface dst_surface) = 0; + void Register(TSurface surface) { std::lock_guard lock{mutex}; @@ -377,9 +388,14 @@ private: const SurfaceParams& params) { const auto gpu_addr = current_surface->GetGpuAddr(); TSurface new_surface = GetUncachedSurface(gpu_addr, params); - std::vector bricks = current_surface->BreakDown(params); - for (auto& brick : bricks) { - ImageCopy(current_surface, new_surface, brick); + const auto& cr_params = current_surface->GetSurfaceParams(); + if (!support_info.depth_color_image_copies && cr_params.type != params.type) { + BufferCopy(current_surface, new_surface); + } else { + std::vector bricks = current_surface->BreakDown(params); + for (auto& brick : bricks) { + ImageCopy(current_surface, new_surface, brick); + } } Unregister(current_surface); Register(new_surface); @@ -505,7 +521,8 @@ private: auto topological_result = current_surface->MatchesTopology(params); if (topological_result != MatchTopologyResult::FullMatch) { std::vector overlaps{current_surface}; - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, topological_result); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, + topological_result); } MatchStructureResult s_result = current_surface->MatchesStructure(params); if (s_result != MatchStructureResult::None && @@ -537,7 +554,8 @@ private: for (auto surface : overlaps) { auto topological_result = surface->MatchesTopology(params); if (topological_result != MatchTopologyResult::FullMatch) { - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, topological_result); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, + topological_result); } } @@ -555,7 +573,8 @@ private: return *view; } } - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, + MatchTopologyResult::FullMatch); } // Now we check if the candidate is a mipmap/layer of the overlap std::optional view = @@ -578,13 +597,15 @@ private: pair.first->EmplaceView(params, gpu_addr, candidate_size); if (mirage_view) return {pair.first, *mirage_view}; - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, + MatchTopologyResult::FullMatch); } return {current_surface, *view}; } // The next case is unsafe, so if we r in accurate GPU, just skip it if (Settings::values.use_accurate_gpu_emulation) { - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, + MatchTopologyResult::FullMatch); } // This is the case the texture is a part of the parent. if (current_surface->MatchesSubTexture(params, gpu_addr)) { @@ -601,7 +622,8 @@ private: } } // We failed all the tests, recycle the overlaps into a new texture. - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, + MatchTopologyResult::FullMatch); } std::pair InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params, -- cgit v1.2.3 From 3809041c24a6ebea009923c14fb36aa1031bf188 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 1 Jun 2019 22:15:55 -0400 Subject: texture_cache: Optimize GetSurface and use references on functions that don't change a surface. --- src/video_core/texture_cache/texture_cache.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index e0d0e1f70..951168357 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -234,15 +234,15 @@ protected: virtual TSurface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) = 0; - virtual void ImageCopy(TSurface src_surface, TSurface dst_surface, + virtual void ImageCopy(TSurface& src_surface, TSurface& dst_surface, const CopyParams& copy_params) = 0; - virtual void ImageBlit(TView src_view, TView dst_view, + virtual void ImageBlit(TView& src_view, TView& dst_view, const Tegra::Engines::Fermi2D::Config& copy_config) = 0; // Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture // and reading it from a sepparate buffer. - virtual void BufferCopy(TSurface src_surface, TSurface dst_surface) = 0; + virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; void Register(TSurface surface) { std::lock_guard lock{mutex}; @@ -516,8 +516,9 @@ private: // Step 1 // Check Level 1 Cache for a fast structural match. If candidate surface // matches at certain level we are pretty much done. - if (l1_cache.count(cache_addr) > 0) { - TSurface current_surface = l1_cache[cache_addr]; + auto iter = l1_cache.find(cache_addr); + if (iter != l1_cache.end()) { + TSurface& current_surface = iter->second; auto topological_result = current_surface->MatchesTopology(params); if (topological_result != MatchTopologyResult::FullMatch) { std::vector overlaps{current_surface}; @@ -526,7 +527,6 @@ private: } MatchStructureResult s_result = current_surface->MatchesStructure(params); if (s_result != MatchStructureResult::None && - current_surface->GetGpuAddr() == gpu_addr && (params.target != SurfaceTarget::Texture3D || current_surface->MatchTarget(params.target))) { if (s_result == MatchStructureResult::FullMatch) { -- cgit v1.2.3 From 9f755218a1359cbd004e6c287f5fead0897c1d11 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 1 Jun 2019 23:03:22 -0400 Subject: texture_cache: move some large methods to cpp files --- src/video_core/texture_cache/surface_base.cpp | 103 +++++++++++++++++++++++ src/video_core/texture_cache/surface_base.h | 106 ++---------------------- src/video_core/texture_cache/surface_params.cpp | 33 -------- src/video_core/texture_cache/surface_params.h | 32 +++++-- 4 files changed, 135 insertions(+), 139 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 7e90960f7..8c6edb04f 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -42,6 +42,109 @@ SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params) } } +MatchTopologyResult SurfaceBaseImpl::MatchesTopology(const SurfaceParams& rhs) const { + const u32 src_bpp{params.GetBytesPerPixel()}; + const u32 dst_bpp{rhs.GetBytesPerPixel()}; + const bool ib1 = params.IsBuffer(); + const bool ib2 = rhs.IsBuffer(); + if (std::tie(src_bpp, params.is_tiled, ib1) == std::tie(dst_bpp, rhs.is_tiled, ib2)) { + const bool cb1 = params.IsCompressed(); + const bool cb2 = rhs.IsCompressed(); + if (cb1 == cb2) { + return MatchTopologyResult::FullMatch; + } + return MatchTopologyResult::CompressUnmatch; + } + return MatchTopologyResult::None; +} + +MatchStructureResult SurfaceBaseImpl::MatchesStructure(const SurfaceParams& rhs) const { + // Buffer surface Check + if (params.IsBuffer()) { + const std::size_t wd1 = params.width * params.GetBytesPerPixel(); + const std::size_t wd2 = rhs.width * rhs.GetBytesPerPixel(); + if (wd1 == wd2) { + return MatchStructureResult::FullMatch; + } + return MatchStructureResult::None; + } + + // Linear Surface check + if (!params.is_tiled) { + if (std::tie(params.width, params.height, params.pitch) == + std::tie(rhs.width, rhs.height, rhs.pitch)) { + return MatchStructureResult::FullMatch; + } + return MatchStructureResult::None; + } + + // Tiled Surface check + if (std::tie(params.depth, params.block_width, params.block_height, params.block_depth, + params.tile_width_spacing, params.num_levels) == + std::tie(rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth, + rhs.tile_width_spacing, rhs.num_levels)) { + if (std::tie(params.width, params.height) == std::tie(rhs.width, rhs.height)) { + return MatchStructureResult::FullMatch; + } + const u32 ws = SurfaceParams::ConvertWidth(rhs.GetBlockAlignedWidth(), params.pixel_format, + rhs.pixel_format); + const u32 hs = + SurfaceParams::ConvertHeight(rhs.height, params.pixel_format, rhs.pixel_format); + const u32 w1 = params.GetBlockAlignedWidth(); + if (std::tie(w1, params.height) == std::tie(ws, hs)) { + return MatchStructureResult::SemiMatch; + } + } + return MatchStructureResult::None; +} + +std::optional> SurfaceBaseImpl::GetLayerMipmap( + const GPUVAddr candidate_gpu_addr) const { + if (candidate_gpu_addr < gpu_addr) { + return {}; + } + const auto relative_address{static_cast(candidate_gpu_addr - gpu_addr)}; + const auto layer{static_cast(relative_address / layer_size)}; + const GPUVAddr mipmap_address = relative_address - layer_size * layer; + const auto mipmap_it = + Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); + if (mipmap_it == mipmap_offsets.end()) { + return {}; + } + const auto level{static_cast(std::distance(mipmap_offsets.begin(), mipmap_it))}; + return std::make_pair(layer, level); +} + +std::vector SurfaceBaseImpl::BreakDownLayered(const SurfaceParams& in_params) const { + const u32 layers{params.depth}; + const u32 mipmaps{params.num_levels}; + std::vector result; + result.reserve(static_cast(layers) * static_cast(mipmaps)); + + for (u32 layer = 0; layer < layers; layer++) { + for (u32 level = 0; level < mipmaps; level++) { + const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level); + const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level); + result.emplace_back(width, height, layer, level); + } + } + return result; +} + +std::vector SurfaceBaseImpl::BreakDownNonLayered(const SurfaceParams& in_params) const { + const u32 mipmaps{params.num_levels}; + std::vector result; + result.reserve(mipmaps); + + for (u32 level = 0; level < mipmaps; level++) { + const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level); + const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level); + const u32 depth{std::min(params.GetMipDepth(level), in_params.GetMipDepth(level))}; + result.emplace_back(width, height, depth, level); + } + return result; +} + void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, u8* buffer, u32 level) { const u32 width{params.GetMipWidth(level)}; diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 70b5258c9..9d19ecd5f 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -136,83 +136,17 @@ public: return params.target == target; } + MatchTopologyResult MatchesTopology(const SurfaceParams& rhs) const; + + MatchStructureResult MatchesStructure(const SurfaceParams& rhs) const; + bool MatchesSubTexture(const SurfaceParams& rhs, const GPUVAddr other_gpu_addr) const { return std::tie(gpu_addr, params.target, params.num_levels) == std::tie(other_gpu_addr, rhs.target, rhs.num_levels) && params.target == SurfaceTarget::Texture2D && params.num_levels == 1; } - MatchTopologyResult MatchesTopology(const SurfaceParams& rhs) const { - const u32 src_bpp{params.GetBytesPerPixel()}; - const u32 dst_bpp{rhs.GetBytesPerPixel()}; - const bool ib1 = params.IsBuffer(); - const bool ib2 = rhs.IsBuffer(); - if (std::tie(src_bpp, params.is_tiled, ib1) == std::tie(dst_bpp, rhs.is_tiled, ib2)) { - const bool cb1 = params.IsCompressed(); - const bool cb2 = rhs.IsCompressed(); - if (cb1 == cb2) { - return MatchTopologyResult::FullMatch; - } - return MatchTopologyResult::CompressUnmatch; - } - return MatchTopologyResult::None; - } - - MatchStructureResult MatchesStructure(const SurfaceParams& rhs) const { - // Buffer surface Check - if (params.IsBuffer()) { - const std::size_t wd1 = params.width * params.GetBytesPerPixel(); - const std::size_t wd2 = rhs.width * rhs.GetBytesPerPixel(); - if (wd1 == wd2) { - return MatchStructureResult::FullMatch; - } - return MatchStructureResult::None; - } - - // Linear Surface check - if (!params.is_tiled) { - if (std::tie(params.width, params.height, params.pitch) == - std::tie(rhs.width, rhs.height, rhs.pitch)) { - return MatchStructureResult::FullMatch; - } - return MatchStructureResult::None; - } - - // Tiled Surface check - if (std::tie(params.depth, params.block_width, params.block_height, params.block_depth, - params.tile_width_spacing, params.num_levels) == - std::tie(rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth, - rhs.tile_width_spacing, rhs.num_levels)) { - if (std::tie(params.width, params.height) == std::tie(rhs.width, rhs.height)) { - return MatchStructureResult::FullMatch; - } - const u32 ws = SurfaceParams::ConvertWidth(rhs.GetBlockAlignedWidth(), - params.pixel_format, rhs.pixel_format); - const u32 hs = - SurfaceParams::ConvertHeight(rhs.height, params.pixel_format, rhs.pixel_format); - const u32 w1 = params.GetBlockAlignedWidth(); - if (std::tie(w1, params.height) == std::tie(ws, hs)) { - return MatchStructureResult::SemiMatch; - } - } - return MatchStructureResult::None; - } - - std::optional> GetLayerMipmap(const GPUVAddr candidate_gpu_addr) const { - if (candidate_gpu_addr < gpu_addr) { - return {}; - } - const auto relative_address{static_cast(candidate_gpu_addr - gpu_addr)}; - const auto layer{static_cast(relative_address / layer_size)}; - const GPUVAddr mipmap_address = relative_address - layer_size * layer; - const auto mipmap_it = - Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); - if (mipmap_it == mipmap_offsets.end()) { - return {}; - } - const auto level{static_cast(std::distance(mipmap_offsets.begin(), mipmap_it))}; - return std::make_pair(layer, level); - } + std::optional> GetLayerMipmap(const GPUVAddr candidate_gpu_addr) const; std::vector BreakDown(const SurfaceParams& in_params) const { return params.is_layered ? BreakDownLayered(in_params) : BreakDownNonLayered(in_params); @@ -241,35 +175,9 @@ private: void SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, u8* buffer, u32 level); - std::vector BreakDownLayered(const SurfaceParams& in_params) const { - const u32 layers{params.depth}; - const u32 mipmaps{params.num_levels}; - std::vector result; - result.reserve(static_cast(layers) * static_cast(mipmaps)); - - for (u32 layer = 0; layer < layers; layer++) { - for (u32 level = 0; level < mipmaps; level++) { - const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level); - const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level); - result.emplace_back(width, height, layer, level); - } - } - return result; - } - - std::vector BreakDownNonLayered(const SurfaceParams& in_params) const { - const u32 mipmaps{params.num_levels}; - std::vector result; - result.reserve(mipmaps); + std::vector BreakDownLayered(const SurfaceParams& in_params) const; - for (u32 level = 0; level < mipmaps; level++) { - const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level); - const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level); - const u32 depth{std::min(params.GetMipDepth(level), in_params.GetMipDepth(level))}; - result.emplace_back(width, height, depth, level); - } - return result; - } + std::vector BreakDownNonLayered(const SurfaceParams& in_params) const; }; template diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 77c09264a..60a7356bb 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -6,7 +6,6 @@ #include "common/alignment.h" #include "common/bit_util.h" -#include "common/cityhash.h" #include "core/core.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/surface.h" @@ -237,14 +236,6 @@ std::size_t SurfaceParams::GetConvertedMipmapOffset(u32 level) const { return offset; } -std::size_t SurfaceParams::GetGuestMipmapSize(u32 level) const { - return GetInnerMipmapMemorySize(level, false, false); -} - -std::size_t SurfaceParams::GetHostMipmapSize(u32 level) const { - return GetInnerMipmapMemorySize(level, true, false) * GetNumLayers(); -} - std::size_t SurfaceParams::GetConvertedMipmapSize(u32 level) const { constexpr std::size_t rgb8_bpp = 4ULL; const std::size_t width_t = GetMipWidth(level); @@ -253,10 +244,6 @@ std::size_t SurfaceParams::GetConvertedMipmapSize(u32 level) const { return width_t * height_t * depth_t * rgb8_bpp; } -std::size_t SurfaceParams::GetGuestLayerSize() const { - return GetLayerSize(false, false); -} - std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) const { std::size_t size = 0; for (u32 level = 0; level < num_levels; ++level) { @@ -269,16 +256,6 @@ std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) co return size; } -std::size_t SurfaceParams::GetHostLayerSize(u32 level) const { - ASSERT(target != SurfaceTarget::Texture3D); - return GetInnerMipmapMemorySize(level, true, false); -} - -bool SurfaceParams::IsPixelFormatZeta() const { - return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat && - pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat; -} - std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool uncompressed) const { const bool tiled{as_host_size ? false : is_tiled}; @@ -289,16 +266,6 @@ std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size GetMipBlockHeight(level), GetMipBlockDepth(level)); } -std::size_t SurfaceParams::GetInnerMemorySize(bool as_host_size, bool layer_only, - bool uncompressed) const { - return GetLayerSize(as_host_size, uncompressed) * (layer_only ? 1U : depth); -} - -std::size_t SurfaceParams::Hash() const { - return static_cast( - Common::CityHash64(reinterpret_cast(this), sizeof(*this))); -} - bool SurfaceParams::operator==(const SurfaceParams& rhs) const { return std::tie(is_tiled, block_width, block_height, block_depth, tile_width_spacing, width, height, depth, pitch, num_levels, pixel_format, component_type, type, target) == diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index 5fde695b6..c51e174cd 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -8,6 +8,7 @@ #include "common/alignment.h" #include "common/bit_util.h" +#include "common/cityhash.h" #include "common/common_types.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/maxwell_3d.h" @@ -39,7 +40,10 @@ public: static SurfaceParams CreateForFermiCopySurface( const Tegra::Engines::Fermi2D::Regs::Surface& config); - std::size_t Hash() const; + std::size_t Hash() const { + return static_cast( + Common::CityHash64(reinterpret_cast(this), sizeof(*this))); + } bool operator==(const SurfaceParams& rhs) const; @@ -113,18 +117,27 @@ public: std::size_t GetConvertedMipmapOffset(u32 level) const; /// Returns the size in bytes in guest memory of a given mipmap level. - std::size_t GetGuestMipmapSize(u32 level) const; + std::size_t GetGuestMipmapSize(u32 level) const { + return GetInnerMipmapMemorySize(level, false, false); + } /// Returns the size in bytes in host memory (linear) of a given mipmap level. - std::size_t GetHostMipmapSize(u32 level) const; + std::size_t GetHostMipmapSize(u32 level) const { + return GetInnerMipmapMemorySize(level, true, false) * GetNumLayers(); + } std::size_t GetConvertedMipmapSize(u32 level) const; /// Returns the size of a layer in bytes in guest memory. - std::size_t GetGuestLayerSize() const; + std::size_t GetGuestLayerSize() const { + return GetLayerSize(false, false); + } /// Returns the size of a layer in bytes in host memory for a given mipmap level. - std::size_t GetHostLayerSize(u32 level) const; + std::size_t GetHostLayerSize(u32 level) const { + ASSERT(target != VideoCore::Surface::SurfaceTarget::Texture3D); + return GetInnerMipmapMemorySize(level, true, false); + } static u32 ConvertWidth(u32 width, VideoCore::Surface::PixelFormat pixel_format_from, VideoCore::Surface::PixelFormat pixel_format_to) { @@ -194,7 +207,10 @@ public: } /// Returns true if the pixel format is a depth and/or stencil format. - bool IsPixelFormatZeta() const; + bool IsPixelFormatZeta() const { + return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat && + pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat; + } SurfaceCompression GetCompressionType() const { return VideoCore::Surface::GetFormatCompressionType(pixel_format); @@ -229,7 +245,9 @@ private: std::size_t GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool uncompressed) const; /// Returns the size of all mipmap levels and aligns as needed. - std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const; + std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const { + return GetLayerSize(as_host_size, uncompressed) * (layer_only ? 1U : depth); + } /// Returns the size of a layer std::size_t GetLayerSize(bool as_host_size, bool uncompressed) const; -- cgit v1.2.3 From 6f69f06873f666174d3c0306055bc5f097d64afc Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 4 Jun 2019 12:12:40 -0400 Subject: texture_cache: Don't Image Copy if component types differ --- src/video_core/texture_cache/texture_cache.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 951168357..d2c27bcef 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -389,7 +389,8 @@ private: const auto gpu_addr = current_surface->GetGpuAddr(); TSurface new_surface = GetUncachedSurface(gpu_addr, params); const auto& cr_params = current_surface->GetSurfaceParams(); - if (!support_info.depth_color_image_copies && cr_params.type != params.type) { + if (cr_params.type != params.type && (!support_info.depth_color_image_copies || + cr_params.component_type != params.component_type)) { BufferCopy(current_surface, new_surface); } else { std::vector bricks = current_surface->BreakDown(params); -- cgit v1.2.3 From 561ce29c98bf822941061023e1f71a62175318ae Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 10 Jun 2019 10:39:59 -0400 Subject: texture_cache: correct mutex locks --- src/video_core/texture_cache/texture_cache.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index d2c27bcef..503bd2b43 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -90,6 +90,7 @@ public: TView GetTextureSurface(const Tegra::Texture::FullTextureInfo& config, const VideoCommon::Shader::Sampler& entry) { + std::lock_guard lock{mutex}; const auto gpu_addr{config.tic.Address()}; if (!gpu_addr) { return {}; @@ -99,6 +100,7 @@ public: } TView GetDepthBufferSurface(bool preserve_contents) { + std::lock_guard lock{mutex}; auto& maxwell3d = system.GPU().Maxwell3D(); if (!maxwell3d.dirty_flags.zeta_buffer) { @@ -127,6 +129,7 @@ public: } TView GetColorBufferSurface(std::size_t index, bool preserve_contents) { + std::lock_guard lock{mutex}; ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); auto& maxwell3d = system.GPU().Maxwell3D(); if (!maxwell3d.dirty_flags.color_buffer[index]) { @@ -188,6 +191,7 @@ public: void DoFermiCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, const Tegra::Engines::Fermi2D::Config& copy_config) { + std::lock_guard lock{mutex}; std::pair dst_surface = GetFermiSurface(dst_config); std::pair src_surface = GetFermiSurface(src_config); ImageBlit(src_surface.second, dst_surface.second, copy_config); @@ -245,8 +249,6 @@ protected: virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; void Register(TSurface surface) { - std::lock_guard lock{mutex}; - const GPUVAddr gpu_addr = surface->GetGpuAddr(); const CacheAddr cache_ptr = ToCacheAddr(memory_manager->GetPointer(gpu_addr)); const std::size_t size = surface->GetSizeInBytes(); @@ -266,8 +268,6 @@ protected: } void Unregister(TSurface surface) { - std::lock_guard lock{mutex}; - if (guard_cache && surface->IsProtected()) { return; } -- cgit v1.2.3 From b01f9c8a7090fa056ca564593eabcebab946ef41 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 11 Jun 2019 07:20:27 -0400 Subject: texture_cache: eliminate accelerated depth->color/color->depth copies due to driver instability. --- src/video_core/texture_cache/texture_cache.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 503bd2b43..c95b1b976 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -218,12 +218,6 @@ public: } protected: - // This structure is used for communicating with the backend, on which behaviors - // it supports and what not, to avoid assuming certain things about hardware. - // The backend is RESPONSIBLE for filling this settings on creation. - struct Support { - bool depth_color_image_copies; - } support_info; TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) : system{system}, rasterizer{rasterizer} { @@ -389,8 +383,7 @@ private: const auto gpu_addr = current_surface->GetGpuAddr(); TSurface new_surface = GetUncachedSurface(gpu_addr, params); const auto& cr_params = current_surface->GetSurfaceParams(); - if (cr_params.type != params.type && (!support_info.depth_color_image_copies || - cr_params.component_type != params.component_type)) { + if (cr_params.type != params.type || (cr_params.component_type != params.component_type)) { BufferCopy(current_surface, new_surface); } else { std::vector bricks = current_surface->BreakDown(params); -- cgit v1.2.3 From a56f687793a0a24a368f0dafd5333daf8cbacecf Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 12 Jun 2019 09:32:26 -0400 Subject: texture_cache: correct texture buffer on surface params --- src/video_core/texture_cache/surface_params.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 60a7356bb..f789da2c4 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -74,10 +74,17 @@ SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value()); params.type = GetFormatType(params.pixel_format); // TODO: on 1DBuffer we should use the tic info. - params.target = TextureType2SurfaceTarget(entry.GetType(), entry.IsArray()); - params.width = config.tic.Width(); - params.height = config.tic.Height(); - params.depth = config.tic.Depth(); + if (!config.tic.IsBuffer()) { + params.target = TextureType2SurfaceTarget(entry.GetType(), entry.IsArray()); + params.width = config.tic.Width(); + params.height = config.tic.Height(); + params.depth = config.tic.Depth(); + } else { + params.target = SurfaceTarget::TextureBuffer; + params.width = config.tic.Width(); + params.height = 0; + params.depth = 0; + } if (params.target == SurfaceTarget::TextureCubemap || params.target == SurfaceTarget::TextureCubeArray) { params.depth *= 6; -- cgit v1.2.3 From 2d83553ea7ab2629e7e1a83cc3345c0115d69453 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 13 Jun 2019 09:46:36 -0400 Subject: texture_cache: Implement siblings texture formats. --- src/video_core/texture_cache/surface_base.h | 4 +++ src/video_core/texture_cache/texture_cache.h | 39 +++++++++++++++++++--------- 2 files changed, 31 insertions(+), 12 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 9d19ecd5f..58265e9d3 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -132,6 +132,10 @@ public: return params.pixel_format == pixel_format; } + VideoCore::Surface::PixelFormat GetFormat() const { + return params.pixel_format; + } + bool MatchTarget(VideoCore::Surface::SurfaceTarget target) const { return params.target == target; } diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index c95b1b976..022416706 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -43,6 +43,8 @@ class RasterizerInterface; namespace VideoCommon { +using VideoCore::Surface::PixelFormat; + using VideoCore::Surface::SurfaceTarget; using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; @@ -96,7 +98,7 @@ public: return {}; } const auto params{SurfaceParams::CreateForTexture(system, config, entry)}; - return GetSurface(gpu_addr, params, true).second; + return GetSurface(gpu_addr, params, true, false).second; } TView GetDepthBufferSurface(bool preserve_contents) { @@ -118,7 +120,7 @@ public: system, regs.zeta_width, regs.zeta_height, regs.zeta.format, regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height, regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; - auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents); + auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents, true); if (depth_buffer.target) depth_buffer.target->MarkAsRenderTarget(false); depth_buffer.target = surface_view.first; @@ -152,7 +154,7 @@ public: } auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), - preserve_contents); + preserve_contents, true); if (render_targets[index].target) render_targets[index].target->MarkAsRenderTarget(false); render_targets[index].target = surface_view.first; @@ -226,6 +228,11 @@ protected: } SetEmptyDepthBuffer(); staging_cache.SetSize(2); + siblings_table[PixelFormat::Z16] = PixelFormat::R16F; + siblings_table[PixelFormat::Z32F] = PixelFormat::R32F; + siblings_table[PixelFormat::Z32FS8] = PixelFormat::RG32F; + siblings_table[PixelFormat::R16F] = PixelFormat::Z16; + siblings_table[PixelFormat::R32F] = PixelFormat::Z32F; } ~TextureCache() = default; @@ -289,7 +296,7 @@ protected: const Tegra::Engines::Fermi2D::Regs::Surface& config) { SurfaceParams params = SurfaceParams::CreateForFermiCopySurface(config); const GPUVAddr gpu_addr = config.Address(); - return GetSurface(gpu_addr, params, true); + return GetSurface(gpu_addr, params, true, false); } Core::System& system; @@ -406,16 +413,22 @@ private: * @param params, the new surface params which we want to check. **/ std::pair ManageStructuralMatch(TSurface current_surface, - const SurfaceParams& params) { + const SurfaceParams& params, bool is_render) { const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); + const bool matches_target = current_surface->MatchTarget(params.target); + auto match_check = ([&]() -> std::pair { + if (matches_target) { + return {current_surface, current_surface->GetMainView()}; + } + return {current_surface, current_surface->EmplaceOverview(params)}; + }); if (is_mirage) { + if (!is_render && siblings_table[current_surface->GetFormat()] == params.pixel_format) { + return match_check(); + } return RebuildSurface(current_surface, params); } - const bool matches_target = current_surface->MatchTarget(params.target); - if (matches_target) { - return {current_surface, current_surface->GetMainView()}; - } - return {current_surface, current_surface->EmplaceOverview(params)}; + return match_check(); } /** @@ -490,7 +503,7 @@ private: * @param preserve_contents, tells if the new surface should be loaded from meory or left blank. **/ std::pair GetSurface(const GPUVAddr gpu_addr, const SurfaceParams& params, - bool preserve_contents) { + bool preserve_contents, bool is_render) { const auto host_ptr{memory_manager->GetPointer(gpu_addr)}; const auto cache_addr{ToCacheAddr(host_ptr)}; @@ -524,7 +537,7 @@ private: (params.target != SurfaceTarget::Texture3D || current_surface->MatchTarget(params.target))) { if (s_result == MatchStructureResult::FullMatch) { - return ManageStructuralMatch(current_surface, params); + return ManageStructuralMatch(current_surface, params, is_render); } else { return RebuildSurface(current_surface, params); } @@ -724,6 +737,8 @@ private: // Guards the cache for protection conflicts. bool guard_cache{}; + std::unordered_map siblings_table; + // The internal Cache is different for the Texture Cache. It's based on buckets // of 1MB. This fits better for the purpose of this cache as textures are normaly // large in size. -- cgit v1.2.3 From 3dd76432141a5cbc97bed15788984b37e44aa4a5 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 13 Jun 2019 10:39:45 -0400 Subject: texture_cache: Use siblings textures on Rebuild and fix possible error on blitting --- src/video_core/texture_cache/texture_cache.h | 33 +++++++++++++++++++--------- 1 file changed, 23 insertions(+), 10 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 022416706..201c4d42e 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -220,7 +220,6 @@ public: } protected: - TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) : system{system}, rasterizer{rasterizer} { for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { @@ -233,6 +232,7 @@ protected: siblings_table[PixelFormat::Z32FS8] = PixelFormat::RG32F; siblings_table[PixelFormat::R16F] = PixelFormat::Z16; siblings_table[PixelFormat::R32F] = PixelFormat::Z32F; + siblings_table[PixelFormat::RG32F] = PixelFormat::Z32FS8; } ~TextureCache() = default; @@ -385,15 +385,27 @@ private: * @param current_surface, the registered surface in the cache which we want to convert. * @param params, the new surface params which we'll use to recreate the surface. **/ - std::pair RebuildSurface(TSurface current_surface, - const SurfaceParams& params) { + std::pair RebuildSurface(TSurface current_surface, const SurfaceParams& params, + bool is_render) { const auto gpu_addr = current_surface->GetGpuAddr(); - TSurface new_surface = GetUncachedSurface(gpu_addr, params); const auto& cr_params = current_surface->GetSurfaceParams(); - if (cr_params.type != params.type || (cr_params.component_type != params.component_type)) { + TSurface new_surface; + if (cr_params.pixel_format != params.pixel_format && !is_render && + siblings_table[cr_params.pixel_format] == params.pixel_format) { + SurfaceParams new_params = params; + new_params.pixel_format = cr_params.pixel_format; + new_params.component_type = cr_params.component_type; + new_params.type = cr_params.type; + new_surface = GetUncachedSurface(gpu_addr, new_params); + } else { + new_surface = GetUncachedSurface(gpu_addr, params); + } + const auto& final_params = new_surface->GetSurfaceParams(); + if (cr_params.type != final_params.type || + (cr_params.component_type != final_params.component_type)) { BufferCopy(current_surface, new_surface); } else { - std::vector bricks = current_surface->BreakDown(params); + std::vector bricks = current_surface->BreakDown(final_params); for (auto& brick : bricks) { ImageCopy(current_surface, new_surface, brick); } @@ -426,7 +438,7 @@ private: if (!is_render && siblings_table[current_surface->GetFormat()] == params.pixel_format) { return match_check(); } - return RebuildSurface(current_surface, params); + return RebuildSurface(current_surface, params, is_render); } return match_check(); } @@ -539,7 +551,7 @@ private: if (s_result == MatchStructureResult::FullMatch) { return ManageStructuralMatch(current_surface, params, is_render); } else { - return RebuildSurface(current_surface, params); + return RebuildSurface(current_surface, params, is_render); } } } @@ -599,7 +611,8 @@ private: new_params.width = wh; new_params.height = hh; new_params.pixel_format = params.pixel_format; - std::pair pair = RebuildSurface(current_surface, new_params); + std::pair pair = + RebuildSurface(current_surface, new_params, is_render); std::optional mirage_view = pair.first->EmplaceView(params, gpu_addr, candidate_size); if (mirage_view) @@ -616,7 +629,7 @@ private: } // This is the case the texture is a part of the parent. if (current_surface->MatchesSubTexture(params, gpu_addr)) { - return RebuildSurface(current_surface, params); + return RebuildSurface(current_surface, params, is_render); } } else { // If there are many overlaps, odds are they are subtextures of the candidate -- cgit v1.2.3 From 7232a1ed16e46715c29d781fb143bdf799090bec Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 13 Jun 2019 16:41:16 -0400 Subject: decoders: correct block calculation --- src/video_core/texture_cache/texture_cache.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 201c4d42e..7a9b4c27d 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -335,6 +335,9 @@ private: if (untopological == MatchTopologyResult::CompressUnmatch) { return RecycleStrategy::Flush; } + if (untopological == MatchTopologyResult::FullMatch && !params.is_tiled) { + return RecycleStrategy::Flush; + } return RecycleStrategy::Ignore; } @@ -372,6 +375,11 @@ private: } return InitializeSurface(gpu_addr, params, preserve_contents); } + case RecycleStrategy::BufferCopy: { + auto new_surface = GetUncachedSurface(gpu_addr, params); + BufferCopy(overlaps[0], new_surface); + return {new_surface, new_surface->GetMainView()}; + } default: { UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!"); return InitializeSurface(gpu_addr, params, do_load); @@ -520,6 +528,10 @@ private: const auto host_ptr{memory_manager->GetPointer(gpu_addr)}; const auto cache_addr{ToCacheAddr(host_ptr)}; + if (gpu_addr == 0x00000001682F0000ULL) { + LOG_CRITICAL(HW_GPU, "Here's the texture!"); + } + // Step 0: guarantee a valid surface if (!cache_addr) { // Return a null surface if it's invalid @@ -566,6 +578,10 @@ private: return InitializeSurface(gpu_addr, params, preserve_contents); } + if (!params.is_tiled) { + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, + MatchTopologyResult::FullMatch); + } // Step 3 // Now we need to figure the relationship between the texture and its overlaps // we do a topological test to ensure we can find some relationship. If it fails -- cgit v1.2.3 From 03d489dcf5dbe13dff1ff788c609f964dd24019c Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 14 Jun 2019 15:41:28 -0400 Subject: texture_cache: Initialize all siblings to invalid pixel format. --- src/video_core/texture_cache/texture_cache.h | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 7a9b4c27d..8213f434d 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -227,12 +227,18 @@ protected: } SetEmptyDepthBuffer(); staging_cache.SetSize(2); - siblings_table[PixelFormat::Z16] = PixelFormat::R16F; - siblings_table[PixelFormat::Z32F] = PixelFormat::R32F; - siblings_table[PixelFormat::Z32FS8] = PixelFormat::RG32F; - siblings_table[PixelFormat::R16F] = PixelFormat::Z16; - siblings_table[PixelFormat::R32F] = PixelFormat::Z32F; - siblings_table[PixelFormat::RG32F] = PixelFormat::Z32FS8; + auto make_siblings = ([this](PixelFormat a, PixelFormat b) { + siblings_table[a] = b; + siblings_table[b] = a; + }); + const u32 max_formats = static_cast(PixelFormat::Max); + siblings_table.reserve(max_formats); + for (u32 i = 0; i < max_formats; i++) { + siblings_table[static_cast(i)] = PixelFormat::Invalid; + } + make_siblings(PixelFormat::Z16, PixelFormat::R16F); + make_siblings(PixelFormat::Z32F, PixelFormat::R32F); + make_siblings(PixelFormat::Z32FS8, PixelFormat::RG32F); } ~TextureCache() = default; @@ -766,6 +772,9 @@ private: // Guards the cache for protection conflicts. bool guard_cache{}; + // The siblings table is for formats that can inter exchange with one another + // without causing issues. This is only valid when a conflict occurs on a non + // rendering use. std::unordered_map siblings_table; // The internal Cache is different for the Texture Cache. It's based on buckets -- cgit v1.2.3 From 082740d34db0996a0af73d7680c57e1abb31c712 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 14 Jun 2019 16:40:04 -0400 Subject: surface: Correct format S8Z24 --- src/video_core/texture_cache/texture_cache.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 8213f434d..a9e61cba1 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -534,10 +534,6 @@ private: const auto host_ptr{memory_manager->GetPointer(gpu_addr)}; const auto cache_addr{ToCacheAddr(host_ptr)}; - if (gpu_addr == 0x00000001682F0000ULL) { - LOG_CRITICAL(HW_GPU, "Here's the texture!"); - } - // Step 0: guarantee a valid surface if (!cache_addr) { // Return a null surface if it's invalid -- cgit v1.2.3 From fed773a86c96fc62f18181a1d3ba410b25c2edee Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 14 Jun 2019 18:40:06 -0400 Subject: texture_cache: Implement Irregular Views in surfaces --- src/video_core/texture_cache/surface_base.cpp | 3 +++ src/video_core/texture_cache/surface_base.h | 25 +++++++++++++++++++++---- 2 files changed, 24 insertions(+), 4 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 8c6edb04f..97bf9ad7a 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -100,6 +100,9 @@ MatchStructureResult SurfaceBaseImpl::MatchesStructure(const SurfaceParams& rhs) std::optional> SurfaceBaseImpl::GetLayerMipmap( const GPUVAddr candidate_gpu_addr) const { + if (gpu_addr == candidate_gpu_addr) { + return {{0,0}}; + } if (candidate_gpu_addr < gpu_addr) { return {}; } diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 58265e9d3..662221adc 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -238,6 +238,26 @@ public: return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels)); } + std::optional EmplaceIrregularView(const SurfaceParams& view_params, + const GPUVAddr view_addr, + const std::size_t candidate_size, const u32 mipmap, + const u32 layer) { + const auto layer_mipmap{GetLayerMipmap(view_addr + candidate_size)}; + if (!layer_mipmap) { + return {}; + } + const u32 end_layer{layer_mipmap->first}; + const u32 end_mipmap{layer_mipmap->second}; + if (layer != end_layer) { + if (mipmap == 0 && end_mipmap == 0) { + return GetView(ViewParams(view_params.target, layer, end_layer - layer + 1, 0, 1)); + } + return {}; + } else { + return GetView(ViewParams(view_params.target, layer, 1, mipmap, end_mipmap - mipmap + 1)); + } + } + std::optional EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr, const std::size_t candidate_size) { if (params.target == SurfaceTarget::Texture3D || @@ -252,10 +272,7 @@ public: const u32 layer{layer_mipmap->first}; const u32 mipmap{layer_mipmap->second}; if (GetMipmapSize(mipmap) != candidate_size) { - // TODO: The view may cover many mimaps, this case can still go on. - // This edge-case can be safely be ignored since it will just result in worse - // performance. - return {}; + return EmplaceIrregularView(view_params, view_addr, candidate_size, mipmap, layer); } return GetView(ViewParams(view_params.target, layer, 1, mipmap, 1)); } -- cgit v1.2.3 From 198a0395bb1b1d19de12560ac146add0705ed00e Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 15 Jun 2019 11:08:11 -0400 Subject: texture_cache: Corrections to buffers and shadow formats use. --- src/video_core/texture_cache/surface_params.cpp | 44 +++++++++++++++++++------ 1 file changed, 34 insertions(+), 10 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index f789da2c4..290ba438d 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -16,11 +16,13 @@ namespace VideoCommon { using VideoCore::Surface::ComponentTypeFromDepthFormat; using VideoCore::Surface::ComponentTypeFromRenderTarget; using VideoCore::Surface::ComponentTypeFromTexture; +using VideoCore::Surface::PixelFormat; using VideoCore::Surface::PixelFormatFromDepthFormat; using VideoCore::Surface::PixelFormatFromRenderTargetFormat; using VideoCore::Surface::PixelFormatFromTextureFormat; using VideoCore::Surface::SurfaceTarget; using VideoCore::Surface::SurfaceTargetFromTextureType; +using VideoCore::Surface::SurfaceType; SurfaceTarget TextureType2SurfaceTarget(Tegra::Shader::TextureType type, bool is_array) { switch (type) { @@ -71,6 +73,24 @@ SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, params.tile_width_spacing = params.is_tiled ? (1 << config.tic.tile_width_spacing.Value()) : 1; params.pixel_format = PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(), params.srgb_conversion); + params.type = GetFormatType(params.pixel_format); + if (entry.IsShadow() && params.type == SurfaceType::ColorTexture) { + switch (params.pixel_format) { + case PixelFormat::R16F: { + params.pixel_format = PixelFormat::Z16; + break; + } + case PixelFormat::R32F: { + params.pixel_format = PixelFormat::Z32F; + break; + } + default: { + UNIMPLEMENTED_MSG("Unimplemented shadow convert format: {}", + static_cast(params.pixel_format)); + } + } + params.type = GetFormatType(params.pixel_format); + } params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value()); params.type = GetFormatType(params.pixel_format); // TODO: on 1DBuffer we should use the tic info. @@ -79,20 +99,24 @@ SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, params.width = config.tic.Width(); params.height = config.tic.Height(); params.depth = config.tic.Depth(); + params.pitch = params.is_tiled ? 0 : config.tic.Pitch(); + if (params.target == SurfaceTarget::TextureCubemap || + params.target == SurfaceTarget::TextureCubeArray) { + params.depth *= 6; + } + params.num_levels = config.tic.max_mip_level + 1; + params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap()); + params.is_layered = params.IsLayered(); } else { params.target = SurfaceTarget::TextureBuffer; params.width = config.tic.Width(); - params.height = 0; - params.depth = 0; + params.pitch = params.width * params.GetBytesPerPixel(); + params.height = 1; + params.depth = 1; + params.num_levels = 1; + params.emulated_levels = 1; + params.is_layered = false; } - if (params.target == SurfaceTarget::TextureCubemap || - params.target == SurfaceTarget::TextureCubeArray) { - params.depth *= 6; - } - params.pitch = params.is_tiled ? 0 : config.tic.Pitch(); - params.num_levels = config.tic.max_mip_level + 1; - params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap()); - params.is_layered = params.IsLayered(); return params; } -- cgit v1.2.3 From d7587842eb404a52eb75a12816028f0706821dd0 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 15 Jun 2019 13:22:57 -0400 Subject: texture_cache: Implement texception detection and texture barriers. --- src/video_core/texture_cache/texture_cache.h | 37 ++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 5 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index a9e61cba1..353fa4e31 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -70,8 +70,12 @@ public: * `Guard` guarantees that rendertargets don't unregister themselves if the * collide. Protection is currently only done on 3D slices. **/ - void Guard(bool new_guard) { - guard_cache = new_guard; + void GuardRenderTargets(bool new_guard) { + guard_render_targets = new_guard; + } + + void GuardSamplers(bool new_guard) { + guard_samplers = new_guard; } void FlushRegion(CacheAddr addr, std::size_t size) { @@ -98,7 +102,25 @@ public: return {}; } const auto params{SurfaceParams::CreateForTexture(system, config, entry)}; - return GetSurface(gpu_addr, params, true, false).second; + auto pair = GetSurface(gpu_addr, params, true, false); + if (guard_samplers) { + if (sampled_textures_stack_pointer == sampled_textures_stack.size()) { + sampled_textures_stack.resize(sampled_textures_stack.size() * 2); + } + sampled_textures_stack[sampled_textures_stack_pointer] = pair.first; + sampled_textures_stack_pointer++; + } + return pair.second; + } + + bool TextureBarrier() { + bool must_do = false; + for (u32 i = 0; i < sampled_textures_stack_pointer; i++) { + must_do |= sampled_textures_stack[i]->IsRenderTarget(); + sampled_textures_stack[i] = nullptr; + } + sampled_textures_stack_pointer = 0; + return must_do; } TView GetDepthBufferSurface(bool preserve_contents) { @@ -239,6 +261,7 @@ protected: make_siblings(PixelFormat::Z16, PixelFormat::R16F); make_siblings(PixelFormat::Z32F, PixelFormat::R32F); make_siblings(PixelFormat::Z32FS8, PixelFormat::RG32F); + sampled_textures_stack.resize(64); } ~TextureCache() = default; @@ -275,7 +298,7 @@ protected: } void Unregister(TSurface surface) { - if (guard_cache && surface->IsProtected()) { + if (guard_render_targets && surface->IsProtected()) { return; } const GPUVAddr gpu_addr = surface->GetGpuAddr(); @@ -766,7 +789,8 @@ private: u64 ticks{}; // Guards the cache for protection conflicts. - bool guard_cache{}; + bool guard_render_targets{}; + bool guard_samplers{}; // The siblings table is for formats that can inter exchange with one another // without causing issues. This is only valid when a conflict occurs on a non @@ -792,6 +816,9 @@ private: render_targets; FramebufferTargetInfo depth_buffer; + std::vector sampled_textures_stack{}; + u32 sampled_textures_stack_pointer{}; + StagingCache staging_cache; std::recursive_mutex mutex; }; -- cgit v1.2.3 From 6acdae0e4c9d0c20f668cd86250b5d5b0dbd70c4 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 17 Jun 2019 19:19:47 -0400 Subject: texture_cache: Correct format R16U as sibling --- src/video_core/texture_cache/surface_params.cpp | 1 + src/video_core/texture_cache/texture_cache.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 290ba438d..a670fc1a9 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -76,6 +76,7 @@ SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, params.type = GetFormatType(params.pixel_format); if (entry.IsShadow() && params.type == SurfaceType::ColorTexture) { switch (params.pixel_format) { + case PixelFormat::R16U: case PixelFormat::R16F: { params.pixel_format = PixelFormat::Z16; break; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 353fa4e31..78821503e 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -258,7 +258,7 @@ protected: for (u32 i = 0; i < max_formats; i++) { siblings_table[static_cast(i)] = PixelFormat::Invalid; } - make_siblings(PixelFormat::Z16, PixelFormat::R16F); + make_siblings(PixelFormat::Z16, PixelFormat::R16U); make_siblings(PixelFormat::Z32F, PixelFormat::R32F); make_siblings(PixelFormat::Z32FS8, PixelFormat::RG32F); sampled_textures_stack.resize(64); -- cgit v1.2.3 From 97c8c9f49a3327f8f38dd460951071630c3e26fa Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 20 Jun 2019 14:58:32 -0400 Subject: texture_cache: Eliminate linear textures fallthrough --- src/video_core/texture_cache/texture_cache.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 78821503e..d86ddeb76 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -603,10 +603,6 @@ private: return InitializeSurface(gpu_addr, params, preserve_contents); } - if (!params.is_tiled) { - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, - MatchTopologyResult::FullMatch); - } // Step 3 // Now we need to figure the relationship between the texture and its overlaps // we do a topological test to ensure we can find some relationship. If it fails -- cgit v1.2.3 From d1812316e1b0f03af2ba10d4fe04be728e72725c Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 20 Jun 2019 21:22:20 -0400 Subject: texture_cache: Style and Corrections --- src/video_core/texture_cache/surface_base.cpp | 2 +- src/video_core/texture_cache/surface_base.h | 3 ++- src/video_core/texture_cache/texture_cache.h | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 97bf9ad7a..051014c6a 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -101,7 +101,7 @@ MatchStructureResult SurfaceBaseImpl::MatchesStructure(const SurfaceParams& rhs) std::optional> SurfaceBaseImpl::GetLayerMipmap( const GPUVAddr candidate_gpu_addr) const { if (gpu_addr == candidate_gpu_addr) { - return {{0,0}}; + return {{0, 0}}; } if (candidate_gpu_addr < gpu_addr) { return {}; diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 662221adc..252b18538 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -254,7 +254,8 @@ public: } return {}; } else { - return GetView(ViewParams(view_params.target, layer, 1, mipmap, end_mipmap - mipmap + 1)); + return GetView( + ViewParams(view_params.target, layer, 1, mipmap, end_mipmap - mipmap + 1)); } } diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index d86ddeb76..b720856f2 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -18,6 +18,7 @@ #include "common/common_types.h" #include "common/math_util.h" #include "core/memory.h" +#include "core/settings.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/gpu.h" -- cgit v1.2.3 From 0837290992e0873f270cd032d2d0e5b91b643267 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 24 Jun 2019 02:08:52 -0300 Subject: texture_cache/surface_base: Address feedback --- src/video_core/texture_cache/surface_base.cpp | 4 ++++ src/video_core/texture_cache/surface_base.h | 8 ++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 051014c6a..7a0fdb19b 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -19,6 +19,10 @@ using Tegra::Texture::ConvertFromGuestToHost; using VideoCore::MortonSwizzleMode; using VideoCore::Surface::SurfaceCompression; +StagingCache::StagingCache() = default; + +StagingCache::~StagingCache() = default; + SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params) : params{params}, mipmap_sizes(params.num_levels), mipmap_offsets(params.num_levels), gpu_addr{gpu_addr}, host_memory_size{ diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 252b18538..d632630ce 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -40,13 +40,17 @@ enum class MatchTopologyResult : u32 { class StagingCache { public: - StagingCache() {} - ~StagingCache() = default; + explicit StagingCache(); + ~StagingCache(); std::vector& GetBuffer(std::size_t index) { return staging_buffer[index]; } + const std::vector& GetBuffer(std::size_t index) const { + return staging_buffer[index]; + } + void SetSize(std::size_t size) { staging_buffer.resize(size); } -- cgit v1.2.3 From 34841a41c308aa1336f71fbce3006302452302d1 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 24 Jun 2019 02:09:56 -0300 Subject: texture_cache/surface_view: Address feedback --- src/video_core/texture_cache/surface_view.h | 1 - 1 file changed, 1 deletion(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_view.h b/src/video_core/texture_cache/surface_view.h index 1ef4509ce..04ca5639b 100644 --- a/src/video_core/texture_cache/surface_view.h +++ b/src/video_core/texture_cache/surface_view.h @@ -44,7 +44,6 @@ struct ViewParams { class ViewBase { public: ViewBase(const ViewParams& params) : params{params} {} - ~ViewBase() = default; const ViewParams& GetViewParams() const { return params; -- cgit v1.2.3 From 7565389700a5741460a118d1fcc5e14fccb4b413 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 24 Jun 2019 02:15:57 -0300 Subject: texture_cache: Include "core/core.h" --- src/video_core/texture_cache/texture_cache.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index b720856f2..a91b2a220 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -17,6 +17,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "common/math_util.h" +#include "core/core.h" #include "core/memory.h" #include "core/settings.h" #include "video_core/engines/fermi_2d.h" @@ -30,10 +31,6 @@ #include "video_core/texture_cache/surface_params.h" #include "video_core/texture_cache/surface_view.h" -namespace Core { -class System; -} - namespace Tegra::Texture { struct FullTextureInfo; } -- cgit v1.2.3 From 58c8a44e7aa18f768db39a36870d8b279257e1d8 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 25 Jun 2019 17:26:00 -0400 Subject: texture_cache: Query MemoryManager from the system --- src/video_core/texture_cache/texture_cache.h | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index a91b2a220..1516fcea3 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -52,10 +52,6 @@ class TextureCache { using IntervalType = typename IntervalMap::interval_type; public: - void InitMemoryMananger(Tegra::MemoryManager& memory_manager) { - this->memory_manager = &memory_manager; - } - void InvalidateRegion(CacheAddr addr, std::size_t size) { std::lock_guard lock{mutex}; @@ -278,15 +274,16 @@ protected: void Register(TSurface surface) { const GPUVAddr gpu_addr = surface->GetGpuAddr(); - const CacheAddr cache_ptr = ToCacheAddr(memory_manager->GetPointer(gpu_addr)); + const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr)); const std::size_t size = surface->GetSizeInBytes(); - const std::optional cpu_addr = memory_manager->GpuToCpuAddress(gpu_addr); + const std::optional cpu_addr = + system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); if (!cache_ptr || !cpu_addr) { LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", gpu_addr); return; } - bool continuouty = memory_manager->IsBlockContinuous(gpu_addr, size); + bool continuouty = system.GPU().MemoryManager().IsBlockContinuous(gpu_addr, size); surface->MarkAsContinuous(continuouty); surface->SetCacheAddr(cache_ptr); surface->SetCpuAddr(*cpu_addr); @@ -552,7 +549,7 @@ private: std::pair GetSurface(const GPUVAddr gpu_addr, const SurfaceParams& params, bool preserve_contents, bool is_render) { - const auto host_ptr{memory_manager->GetPointer(gpu_addr)}; + const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; const auto cache_addr{ToCacheAddr(host_ptr)}; // Step 0: guarantee a valid surface @@ -693,7 +690,7 @@ private: void LoadSurface(const TSurface& surface) { staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); - surface->LoadBuffer(*memory_manager, staging_cache); + surface->LoadBuffer(system.GPU().MemoryManager(), staging_cache); surface->UploadTexture(staging_cache.GetBuffer(0)); surface->MarkAsModified(false, Tick()); } @@ -704,7 +701,7 @@ private: } staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); surface->DownloadTexture(staging_cache.GetBuffer(0)); - surface->FlushBuffer(*memory_manager, staging_cache); + surface->FlushBuffer(system.GPU().MemoryManager(), staging_cache); surface->MarkAsModified(false, Tick()); } @@ -778,7 +775,6 @@ private: }; VideoCore::RasterizerInterface& rasterizer; - Tegra::MemoryManager* memory_manager; u64 ticks{}; -- cgit v1.2.3 From fb234560b060e528d66a77815330766e5aa88594 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 25 Jun 2019 17:42:50 -0400 Subject: copy_params: use constexpr for constructor --- src/video_core/texture_cache/copy_params.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/copy_params.h b/src/video_core/texture_cache/copy_params.h index 8cf010142..9c21a0649 100644 --- a/src/video_core/texture_cache/copy_params.h +++ b/src/video_core/texture_cache/copy_params.h @@ -9,13 +9,14 @@ namespace VideoCommon { struct CopyParams { - CopyParams(u32 source_x, u32 source_y, u32 source_z, u32 dest_x, u32 dest_y, u32 dest_z, - u32 source_level, u32 dest_level, u32 width, u32 height, u32 depth) + constexpr CopyParams(u32 source_x, u32 source_y, u32 source_z, u32 dest_x, u32 dest_y, + u32 dest_z, u32 source_level, u32 dest_level, u32 width, u32 height, + u32 depth) : source_x{source_x}, source_y{source_y}, source_z{source_z}, dest_x{dest_x}, dest_y{dest_y}, dest_z{dest_z}, source_level{source_level}, dest_level{dest_level}, width{width}, height{height}, depth{depth} {} - CopyParams(u32 width, u32 height, u32 depth, u32 level) + constexpr CopyParams(u32 width, u32 height, u32 depth, u32 level) : source_x{}, source_y{}, source_z{}, dest_x{}, dest_y{}, dest_z{}, source_level{level}, dest_level{level}, width{width}, height{height}, depth{depth} {} -- cgit v1.2.3 From c0abc7124d6ecd17f9da5ee5b3de9cb3dbf3ce1f Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 25 Jun 2019 18:03:25 -0400 Subject: surface_params: Corrections, asserts and documentation. --- src/video_core/texture_cache/surface_params.cpp | 4 +- src/video_core/texture_cache/surface_params.h | 97 ++++++++++++++----------- 2 files changed, 58 insertions(+), 43 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index a670fc1a9..340ed2ca0 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -269,11 +269,11 @@ std::size_t SurfaceParams::GetConvertedMipmapOffset(u32 level) const { } std::size_t SurfaceParams::GetConvertedMipmapSize(u32 level) const { - constexpr std::size_t rgb8_bpp = 4ULL; + constexpr std::size_t rgba8_bpp = 4ULL; const std::size_t width_t = GetMipWidth(level); const std::size_t height_t = GetMipHeight(level); const std::size_t depth_t = is_layered ? depth : GetMipDepth(level); - return width_t * height_t * depth_t * rgb8_bpp; + return width_t * height_t * depth_t * rgba8_bpp; } std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) const { diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index c51e174cd..4dfb882f0 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -95,25 +95,21 @@ public: /// Returns the block depth of a given mipmap level. u32 GetMipBlockDepth(u32 level) const; + /// returns the best possible row/pitch alignment for the surface. u32 GetRowAlignment(u32 level) const { const u32 bpp = GetCompressionType() == SurfaceCompression::Converted ? 4 : GetBytesPerPixel(); return 1U << Common::CountTrailingZeroes32(GetMipWidth(level) * bpp); } - // Helper used for out of class size calculations - static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height, - const u32 block_depth) { - return Common::AlignBits(out_size, - Tegra::Texture::GetGOBSizeShift() + block_height + block_depth); - } - /// Returns the offset in bytes in guest memory of a given mipmap level. std::size_t GetGuestMipmapLevelOffset(u32 level) const; /// Returns the offset in bytes in host memory (linear) of a given mipmap level. std::size_t GetHostMipmapLevelOffset(u32 level) const; + /// Returns the offset in bytes in host memory (linear) of a given mipmap level + // for a texture that is converted in host gpu. std::size_t GetConvertedMipmapOffset(u32 level) const; /// Returns the size in bytes in guest memory of a given mipmap level. @@ -139,40 +135,7 @@ public: return GetInnerMipmapMemorySize(level, true, false); } - static u32 ConvertWidth(u32 width, VideoCore::Surface::PixelFormat pixel_format_from, - VideoCore::Surface::PixelFormat pixel_format_to) { - const u32 bw1 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_from); - const u32 bw2 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_to); - return (width * bw2 + bw1 - 1) / bw1; - } - - static u32 ConvertHeight(u32 height, VideoCore::Surface::PixelFormat pixel_format_from, - VideoCore::Surface::PixelFormat pixel_format_to) { - const u32 bh1 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_from); - const u32 bh2 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_to); - return (height * bh2 + bh1 - 1) / bh1; - } - - // this finds the maximun possible width between 2 2D layers of different formats - static u32 IntersectWidth(const SurfaceParams& src_params, const SurfaceParams& dst_params, - const u32 src_level, const u32 dst_level) { - const u32 bw1 = src_params.GetDefaultBlockWidth(); - const u32 bw2 = dst_params.GetDefaultBlockWidth(); - const u32 t_src_width = (src_params.GetMipWidth(src_level) * bw2 + bw1 - 1) / bw1; - const u32 t_dst_width = (dst_params.GetMipWidth(dst_level) * bw1 + bw2 - 1) / bw2; - return std::min(t_src_width, t_dst_width); - } - - // this finds the maximun possible height between 2 2D layers of different formats - static u32 IntersectHeight(const SurfaceParams& src_params, const SurfaceParams& dst_params, - const u32 src_level, const u32 dst_level) { - const u32 bh1 = src_params.GetDefaultBlockHeight(); - const u32 bh2 = dst_params.GetDefaultBlockHeight(); - const u32 t_src_height = (src_params.GetMipHeight(src_level) * bh2 + bh1 - 1) / bh1; - const u32 t_dst_height = (dst_params.GetMipHeight(dst_level) * bh1 + bh2 - 1) / bh2; - return std::min(t_src_height, t_dst_height); - } - + /// Returns the max possible mipmap that the texture can have in host gpu u32 MaxPossibleMipmap() const { const u32 max_mipmap_w = Common::Log2Ceil32(width) + 1U; const u32 max_mipmap_h = Common::Log2Ceil32(height) + 1U; @@ -182,6 +145,7 @@ public: return std::max(max_mipmap, Common::Log2Ceil32(depth) + 1U); } + /// Returns if the guest surface is a compressed surface. bool IsCompressed() const { return GetDefaultBlockHeight() > 1 || GetDefaultBlockWidth() > 1; } @@ -212,16 +176,67 @@ public: pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat; } + /// Returns how the compression should be handled for this texture. Values + /// are: None(no compression), Compressed(texture is compressed), + /// Converted(texture is converted before upload/ after download), + /// Rearranged(texture is swizzled before upload/after download). SurfaceCompression GetCompressionType() const { return VideoCore::Surface::GetFormatCompressionType(pixel_format); } + /// Returns is the surface is a TextureBuffer type of surface. bool IsBuffer() const { return target == VideoCore::Surface::SurfaceTarget::TextureBuffer; } + /// Returns the debug name of the texture for use in graphic debuggers. std::string TargetName() const; + // Helper used for out of class size calculations + static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height, + const u32 block_depth) { + return Common::AlignBits(out_size, + Tegra::Texture::GetGOBSizeShift() + block_height + block_depth); + } + + /// Converts a width from a type of surface into another. This helps represent the + /// equivalent value between compressed/non-compressed textures. + static u32 ConvertWidth(u32 width, VideoCore::Surface::PixelFormat pixel_format_from, + VideoCore::Surface::PixelFormat pixel_format_to) { + const u32 bw1 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_from); + const u32 bw2 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_to); + return (width * bw2 + bw1 - 1) / bw1; + } + + /// Converts a height from a type of surface into another. This helps represent the + /// equivalent value between compressed/non-compressed textures. + static u32 ConvertHeight(u32 height, VideoCore::Surface::PixelFormat pixel_format_from, + VideoCore::Surface::PixelFormat pixel_format_to) { + const u32 bh1 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_from); + const u32 bh2 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_to); + return (height * bh2 + bh1 - 1) / bh1; + } + + // Finds the maximun possible width between 2 2D layers of different formats + static u32 IntersectWidth(const SurfaceParams& src_params, const SurfaceParams& dst_params, + const u32 src_level, const u32 dst_level) { + const u32 bw1 = src_params.GetDefaultBlockWidth(); + const u32 bw2 = dst_params.GetDefaultBlockWidth(); + const u32 t_src_width = (src_params.GetMipWidth(src_level) * bw2 + bw1 - 1) / bw1; + const u32 t_dst_width = (dst_params.GetMipWidth(dst_level) * bw1 + bw2 - 1) / bw2; + return std::min(t_src_width, t_dst_width); + } + + // Finds the maximun possible height between 2 2D layers of different formats + static u32 IntersectHeight(const SurfaceParams& src_params, const SurfaceParams& dst_params, + const u32 src_level, const u32 dst_level) { + const u32 bh1 = src_params.GetDefaultBlockHeight(); + const u32 bh2 = dst_params.GetDefaultBlockHeight(); + const u32 t_src_height = (src_params.GetMipHeight(src_level) * bh2 + bh1 - 1) / bh1; + const u32 t_dst_height = (dst_params.GetMipHeight(dst_level) * bh1 + bh2 - 1) / bh2; + return std::min(t_src_height, t_dst_height); + } + bool is_tiled; bool srgb_conversion; bool is_layered; -- cgit v1.2.3 From 88bc39374fd7cffd2864229ae60bdab3aebb37ea Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 25 Jun 2019 18:36:19 -0400 Subject: texture_cache: Corrections, documentation and asserts --- src/video_core/texture_cache/texture_cache.h | 84 ++++++++++++++-------------- 1 file changed, 42 insertions(+), 42 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 1516fcea3..fb6ca41ff 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -60,10 +60,10 @@ public: } } - /** + /*** * `Guard` guarantees that rendertargets don't unregister themselves if the * collide. Protection is currently only done on 3D slices. - **/ + ***/ void GuardRenderTargets(bool new_guard) { guard_render_targets = new_guard; } @@ -191,19 +191,21 @@ public: } void SetEmptyDepthBuffer() { - if (depth_buffer.target != nullptr) { - depth_buffer.target->MarkAsRenderTarget(false); - depth_buffer.target = nullptr; - depth_buffer.view = nullptr; + if (depth_buffer.target == nullptr) { + return; } + depth_buffer.target->MarkAsRenderTarget(false); + depth_buffer.target = nullptr; + depth_buffer.view = nullptr; } void SetEmptyColorBuffer(std::size_t index) { - if (render_targets[index].target != nullptr) { - render_targets[index].target->MarkAsRenderTarget(false); - render_targets[index].target = nullptr; - render_targets[index].view = nullptr; + if (render_targets[index].target == nullptr) { + return; } + render_targets[index].target->MarkAsRenderTarget(false); + render_targets[index].target = nullptr; + render_targets[index].view = nullptr; } void DoFermiCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, @@ -283,8 +285,8 @@ protected: gpu_addr); return; } - bool continuouty = system.GPU().MemoryManager().IsBlockContinuous(gpu_addr, size); - surface->MarkAsContinuous(continuouty); + const bool continuous = system.GPU().MemoryManager().IsBlockContinuous(gpu_addr, size); + surface->MarkAsContinuous(continuous); surface->SetCacheAddr(cache_ptr); surface->SetCpuAddr(*cpu_addr); RegisterInnerCache(surface); @@ -381,8 +383,8 @@ private: const SurfaceParams& params, const GPUVAddr gpu_addr, const bool preserve_contents, const MatchTopologyResult untopological) { - const bool do_load = Settings::values.use_accurate_gpu_emulation && preserve_contents; - for (auto surface : overlaps) { + const bool do_load = preserve_contents && Settings::values.use_accurate_gpu_emulation; + for (auto& surface : overlaps) { Unregister(surface); } switch (PickStrategy(overlaps, params, gpu_addr, untopological)) { @@ -394,7 +396,7 @@ private: [](const TSurface& a, const TSurface& b) -> bool { return a->GetModificationTick() < b->GetModificationTick(); }); - for (auto surface : overlaps) { + for (auto& surface : overlaps) { FlushSurface(surface); } return InitializeSurface(gpu_addr, params, preserve_contents); @@ -460,19 +462,19 @@ private: const SurfaceParams& params, bool is_render) { const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); const bool matches_target = current_surface->MatchTarget(params.target); - auto match_check = ([&]() -> std::pair { + const auto match_check = ([&]() -> std::pair { if (matches_target) { return {current_surface, current_surface->GetMainView()}; } return {current_surface, current_surface->EmplaceOverview(params)}; }); - if (is_mirage) { - if (!is_render && siblings_table[current_surface->GetFormat()] == params.pixel_format) { - return match_check(); - } - return RebuildSurface(current_surface, params, is_render); + if (!is_mirage) { + return match_check(); + } + if (!is_render && siblings_table[current_surface->GetFormat()] == params.pixel_format) { + return match_check(); } - return match_check(); + return RebuildSurface(current_surface, params, is_render); } /** @@ -493,7 +495,7 @@ private: bool modified = false; TSurface new_surface = GetUncachedSurface(gpu_addr, params); u32 passed_tests = 0; - for (auto surface : overlaps) { + for (auto& surface : overlaps) { const SurfaceParams& src_params = surface->GetSurfaceParams(); if (src_params.is_layered || src_params.num_levels > 1) { // We send this cases to recycle as they are more complex to handle @@ -504,8 +506,7 @@ private: if (!mipmap_layer) { continue; } - const u32 layer{mipmap_layer->first}; - const u32 mipmap{mipmap_layer->second}; + const auto [layer, mipmap] = *mipmap_layer; if (new_surface->GetMipmapSize(mipmap) != candidate_size) { continue; } @@ -519,7 +520,7 @@ private: } if (passed_tests == 0) { return {}; - // In Accurate GPU all test should pass, else we recycle + // In Accurate GPU all tests should pass, else we recycle } else if (Settings::values.use_accurate_gpu_emulation && passed_tests != overlaps.size()) { return {}; } @@ -548,7 +549,6 @@ private: **/ std::pair GetSurface(const GPUVAddr gpu_addr, const SurfaceParams& params, bool preserve_contents, bool is_render) { - const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; const auto cache_addr{ToCacheAddr(host_ptr)}; @@ -570,17 +570,17 @@ private: auto iter = l1_cache.find(cache_addr); if (iter != l1_cache.end()) { TSurface& current_surface = iter->second; - auto topological_result = current_surface->MatchesTopology(params); + const auto topological_result = current_surface->MatchesTopology(params); if (topological_result != MatchTopologyResult::FullMatch) { std::vector overlaps{current_surface}; return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, topological_result); } - MatchStructureResult s_result = current_surface->MatchesStructure(params); - if (s_result != MatchStructureResult::None && + const auto struct_result = current_surface->MatchesStructure(params); + if (struct_result != MatchStructureResult::None && (params.target != SurfaceTarget::Texture3D || current_surface->MatchTarget(params.target))) { - if (s_result == MatchStructureResult::FullMatch) { + if (struct_result == MatchStructureResult::FullMatch) { return ManageStructuralMatch(current_surface, params, is_render); } else { return RebuildSurface(current_surface, params, is_render); @@ -602,8 +602,8 @@ private: // Now we need to figure the relationship between the texture and its overlaps // we do a topological test to ensure we can find some relationship. If it fails // inmediatly recycle the texture - for (auto surface : overlaps) { - auto topological_result = surface->MatchesTopology(params); + for (const auto& surface : overlaps) { + const auto topological_result = surface->MatchesTopology(params); if (topological_result != MatchTopologyResult::FullMatch) { return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, topological_result); @@ -620,7 +620,7 @@ private: if (current_surface->GetGpuAddr() == gpu_addr) { std::optional> view = TryReconstructSurface(overlaps, params, gpu_addr); - if (view.has_value()) { + if (view) { return *view; } } @@ -630,7 +630,7 @@ private: // Now we check if the candidate is a mipmap/layer of the overlap std::optional view = current_surface->EmplaceView(params, gpu_addr, candidate_size); - if (view.has_value()) { + if (view) { const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); if (is_mirage) { // On a mirage view, we need to recreate the surface under this new view @@ -669,7 +669,7 @@ private: // using the overlaps. If a single overlap fails, this will fail. std::optional> view = TryReconstructSurface(overlaps, params, gpu_addr); - if (view.has_value()) { + if (view) { return *view; } } @@ -738,16 +738,16 @@ private: std::vector surfaces; while (start <= end) { std::vector& list = registry[start]; - for (auto& s : list) { - if (!s->IsPicked() && s->Overlaps(cache_addr, cache_addr_end)) { - s->MarkAsPicked(true); - surfaces.push_back(s); + for (auto& surface : list) { + if (!surface->IsPicked() && surface->Overlaps(cache_addr, cache_addr_end)) { + surface->MarkAsPicked(true); + surfaces.push_back(surface); } } start++; } - for (auto& s : surfaces) { - s->MarkAsPicked(false); + for (auto& surface : surfaces) { + surface->MarkAsPicked(false); } return surfaces; } -- cgit v1.2.3 From 223ca8075399463e51d4afea1adb0c5b6fba8588 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 25 Jun 2019 19:35:08 -0400 Subject: texture_cache: Correct variable naming. --- src/video_core/texture_cache/texture_cache.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index fb6ca41ff..b5b0e91ef 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -225,9 +225,9 @@ public: } const CacheAddr page = cache_addr >> registry_page_bits; std::vector& list = registry[page]; - for (auto& s : list) { - if (s->GetCacheAddr() == cache_addr) { - return s; + for (auto& surface : list) { + if (surface->GetCacheAddr() == cache_addr) { + return surface; } } return nullptr; -- cgit v1.2.3 From 3f3c3ca5f96fd5742524703f20b531338fa2e5f7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 29 Jun 2019 17:29:39 -0300 Subject: texture_cache: Address feedback --- src/video_core/texture_cache/surface_base.h | 2 +- src/video_core/texture_cache/texture_cache.h | 21 +++++++++++---------- 2 files changed, 12 insertions(+), 11 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index d632630ce..eaed6545d 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -294,8 +294,8 @@ protected: virtual TView CreateView(const ViewParams& view_key) = 0; - std::unordered_map views; TView main_view; + std::unordered_map views; private: TView GetView(const ViewParams& key) { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index b5b0e91ef..9436a5ff2 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -79,10 +79,9 @@ public: if (surfaces.empty()) { return; } - std::sort(surfaces.begin(), surfaces.end(), - [](const TSurface& a, const TSurface& b) -> bool { - return a->GetModificationTick() < b->GetModificationTick(); - }); + std::sort(surfaces.begin(), surfaces.end(), [](const TSurface& a, const TSurface& b) { + return a->GetModificationTick() < b->GetModificationTick(); + }); for (const auto& surface : surfaces) { FlushSurface(surface); } @@ -181,13 +180,15 @@ public: } void MarkColorBufferInUse(std::size_t index) { - if (render_targets[index].target) - render_targets[index].target->MarkAsModified(true, Tick()); + if (auto& render_target = render_targets[index].target) { + render_target->MarkAsModified(true, Tick()); + } } void MarkDepthBufferInUse() { - if (depth_buffer.target) + if (depth_buffer.target) { depth_buffer.target->MarkAsModified(true, Tick()); + } } void SetEmptyDepthBuffer() { @@ -245,11 +246,11 @@ protected: } SetEmptyDepthBuffer(); staging_cache.SetSize(2); - auto make_siblings = ([this](PixelFormat a, PixelFormat b) { + const auto make_siblings = [this](PixelFormat a, PixelFormat b) { siblings_table[a] = b; siblings_table[b] = a; - }); - const u32 max_formats = static_cast(PixelFormat::Max); + }; + const auto max_formats = static_cast(PixelFormat::Max); siblings_table.reserve(max_formats); for (u32 i = 0; i < max_formats; i++) { siblings_table[static_cast(i)] = PixelFormat::Invalid; -- cgit v1.2.3 From dd9ace502bfd2239ceddad8c5c41baf0e10e2144 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 29 Jun 2019 18:54:13 -0300 Subject: texture_cache: Use std::array for siblings_table --- src/video_core/texture_cache/texture_cache.h | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 9436a5ff2..9fcf87744 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -4,6 +4,8 @@ #pragma once +#include +#include #include #include #include @@ -244,20 +246,19 @@ protected: for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { SetEmptyColorBuffer(i); } + SetEmptyDepthBuffer(); staging_cache.SetSize(2); + const auto make_siblings = [this](PixelFormat a, PixelFormat b) { - siblings_table[a] = b; - siblings_table[b] = a; + siblings_table[static_cast(a)] = b; + siblings_table[static_cast(b)] = a; }; - const auto max_formats = static_cast(PixelFormat::Max); - siblings_table.reserve(max_formats); - for (u32 i = 0; i < max_formats; i++) { - siblings_table[static_cast(i)] = PixelFormat::Invalid; - } + std::fill(siblings_table.begin(), siblings_table.end(), PixelFormat::Invalid); make_siblings(PixelFormat::Z16, PixelFormat::R16U); make_siblings(PixelFormat::Z32F, PixelFormat::R32F); make_siblings(PixelFormat::Z32FS8, PixelFormat::RG32F); + sampled_textures_stack.resize(64); } @@ -426,7 +427,8 @@ private: const auto& cr_params = current_surface->GetSurfaceParams(); TSurface new_surface; if (cr_params.pixel_format != params.pixel_format && !is_render && - siblings_table[cr_params.pixel_format] == params.pixel_format) { + siblings_table[static_cast(cr_params.pixel_format)] == + params.pixel_format) { SurfaceParams new_params = params; new_params.pixel_format = cr_params.pixel_format; new_params.component_type = cr_params.component_type; @@ -472,7 +474,8 @@ private: if (!is_mirage) { return match_check(); } - if (!is_render && siblings_table[current_surface->GetFormat()] == params.pixel_format) { + if (!is_render && siblings_table[static_cast(current_surface->GetFormat())] == + params.pixel_format) { return match_check(); } return RebuildSurface(current_surface, params, is_render); @@ -786,7 +789,7 @@ private: // The siblings table is for formats that can inter exchange with one another // without causing issues. This is only valid when a conflict occurs on a non // rendering use. - std::unordered_map siblings_table; + std::array(PixelFormat::Max)> siblings_table; // The internal Cache is different for the Texture Cache. It's based on buckets // of 1MB. This fits better for the purpose of this cache as textures are normaly -- cgit v1.2.3 From f6f1a8f26a302dc33df635625c490f0d65880059 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 29 Jun 2019 19:52:37 -0300 Subject: texture_cache: Style changes --- src/video_core/texture_cache/surface_params.h | 9 +++------ src/video_core/texture_cache/texture_cache.h | 3 +-- 2 files changed, 4 insertions(+), 8 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index 4dfb882f0..358d6757c 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -95,7 +95,7 @@ public: /// Returns the block depth of a given mipmap level. u32 GetMipBlockDepth(u32 level) const; - /// returns the best possible row/pitch alignment for the surface. + /// Returns the best possible row/pitch alignment for the surface. u32 GetRowAlignment(u32 level) const { const u32 bpp = GetCompressionType() == SurfaceCompression::Converted ? 4 : GetBytesPerPixel(); @@ -109,7 +109,7 @@ public: std::size_t GetHostMipmapLevelOffset(u32 level) const; /// Returns the offset in bytes in host memory (linear) of a given mipmap level - // for a texture that is converted in host gpu. + /// for a texture that is converted in host gpu. std::size_t GetConvertedMipmapOffset(u32 level) const; /// Returns the size in bytes in guest memory of a given mipmap level. @@ -176,10 +176,7 @@ public: pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat; } - /// Returns how the compression should be handled for this texture. Values - /// are: None(no compression), Compressed(texture is compressed), - /// Converted(texture is converted before upload/ after download), - /// Rearranged(texture is swizzled before upload/after download). + /// Returns how the compression should be handled for this texture. SurfaceCompression GetCompressionType() const { return VideoCore::Surface::GetFormatCompressionType(pixel_format); } diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 9fcf87744..3df3e17dd 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -571,8 +571,7 @@ private: // Step 1 // Check Level 1 Cache for a fast structural match. If candidate surface // matches at certain level we are pretty much done. - auto iter = l1_cache.find(cache_addr); - if (iter != l1_cache.end()) { + if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) { TSurface& current_surface = iter->second; const auto topological_result = current_surface->MatchesTopology(params); if (topological_result != MatchTopologyResult::FullMatch) { -- cgit v1.2.3 From 8eae66907e043e6e26d78cfc4b5cde7ea93a4f77 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 29 Jun 2019 20:10:31 -0300 Subject: texture_cache: Use std::vector reservation for sampled_textures --- src/video_core/texture_cache/texture_cache.h | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 3df3e17dd..8edae3d97 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -97,25 +97,19 @@ public: return {}; } const auto params{SurfaceParams::CreateForTexture(system, config, entry)}; - auto pair = GetSurface(gpu_addr, params, true, false); + const auto [surface, view] = GetSurface(gpu_addr, params, true, false); if (guard_samplers) { - if (sampled_textures_stack_pointer == sampled_textures_stack.size()) { - sampled_textures_stack.resize(sampled_textures_stack.size() * 2); - } - sampled_textures_stack[sampled_textures_stack_pointer] = pair.first; - sampled_textures_stack_pointer++; + sampled_textures.push_back(surface); } - return pair.second; + return view; } bool TextureBarrier() { - bool must_do = false; - for (u32 i = 0; i < sampled_textures_stack_pointer; i++) { - must_do |= sampled_textures_stack[i]->IsRenderTarget(); - sampled_textures_stack[i] = nullptr; - } - sampled_textures_stack_pointer = 0; - return must_do; + const bool any_rt = + std::any_of(sampled_textures.begin(), sampled_textures.end(), + [](const auto& surface) { return surface->IsRenderTarget(); }); + sampled_textures.clear(); + return any_rt; } TView GetDepthBufferSurface(bool preserve_contents) { @@ -259,7 +253,7 @@ protected: make_siblings(PixelFormat::Z32F, PixelFormat::R32F); make_siblings(PixelFormat::Z32FS8, PixelFormat::RG32F); - sampled_textures_stack.resize(64); + sampled_textures.reserve(64); } ~TextureCache() = default; @@ -809,8 +803,7 @@ private: render_targets; FramebufferTargetInfo depth_buffer; - std::vector sampled_textures_stack{}; - u32 sampled_textures_stack_pointer{}; + std::vector sampled_textures; StagingCache staging_cache; std::recursive_mutex mutex; -- cgit v1.2.3 From 6e1db6b7038329a9716763c8bdf14cc5b578fec1 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 29 Jun 2019 20:47:46 -0300 Subject: texture_cache: Pack sibling queries inside a method --- src/video_core/texture_cache/texture_cache.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 8edae3d97..c9e72531a 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -421,8 +421,7 @@ private: const auto& cr_params = current_surface->GetSurfaceParams(); TSurface new_surface; if (cr_params.pixel_format != params.pixel_format && !is_render && - siblings_table[static_cast(cr_params.pixel_format)] == - params.pixel_format) { + GetSiblingFormat(cr_params.pixel_format) == params.pixel_format) { SurfaceParams new_params = params; new_params.pixel_format = cr_params.pixel_format; new_params.component_type = cr_params.component_type; @@ -459,17 +458,16 @@ private: const SurfaceParams& params, bool is_render) { const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); const bool matches_target = current_surface->MatchTarget(params.target); - const auto match_check = ([&]() -> std::pair { + const auto match_check = [&]() -> std::pair { if (matches_target) { return {current_surface, current_surface->GetMainView()}; } return {current_surface, current_surface->EmplaceOverview(params)}; - }); + }; if (!is_mirage) { return match_check(); } - if (!is_render && siblings_table[static_cast(current_surface->GetFormat())] == - params.pixel_format) { + if (!is_render && GetSiblingFormat(current_surface->GetFormat()) == params.pixel_format) { return match_check(); } return RebuildSurface(current_surface, params, is_render); @@ -766,6 +764,10 @@ private: return {}; } + constexpr PixelFormat GetSiblingFormat(PixelFormat format) const { + return siblings_table[static_cast(format)]; + } + struct FramebufferTargetInfo { TSurface target; TView view; -- cgit v1.2.3 From 30b176f92b67ec7a9b1ce08cf89d50abd125f8a8 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 4 Jul 2019 19:38:19 -0400 Subject: texture_cache: Correct Texture Buffer Uploading --- src/video_core/texture_cache/surface_params.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 340ed2ca0..9c56e2b4f 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -310,6 +310,8 @@ std::string SurfaceParams::TargetName() const { switch (target) { case SurfaceTarget::Texture1D: return "1D"; + case SurfaceTarget::TextureBuffer: + return "TexBuffer"; case SurfaceTarget::Texture2D: return "2D"; case SurfaceTarget::Texture3D: -- cgit v1.2.3 From 3b9d89839dc62e9e63a3cbe9636cf85276babdfb Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 4 Jul 2019 21:10:59 -0400 Subject: texture_cache: Address Feedback --- src/video_core/texture_cache/surface_base.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index eaed6545d..8ba386a8a 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -9,7 +9,7 @@ #include #include "common/assert.h" -#include "common/common_funcs.h" +#include "common/binary_find.h" #include "common/common_types.h" #include "video_core/gpu.h" #include "video_core/morton.h" @@ -191,7 +191,7 @@ private: template class SurfaceBase : public SurfaceBaseImpl { public: - virtual void UploadTexture(std::vector& staging_buffer) = 0; + virtual void UploadTexture(const std::vector& staging_buffer) = 0; virtual void DownloadTexture(std::vector& staging_buffer) = 0; -- cgit v1.2.3