From 1b4503c571d3b961efe74fa7e35d5fa14941ec09 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 24 Apr 2019 16:35:54 -0300 Subject: texture_cache: Split texture cache into different files --- src/video_core/texture_cache/surface_base.cpp | 118 ++++++ src/video_core/texture_cache/surface_base.h | 172 +++++++++ src/video_core/texture_cache/surface_params.cpp | 412 +++++++++++++++++++++ src/video_core/texture_cache/surface_params.h | 229 ++++++++++++ src/video_core/texture_cache/surface_view.cpp | 23 ++ src/video_core/texture_cache/surface_view.h | 35 ++ src/video_core/texture_cache/texture_cache.h | 282 ++++++++++++++ .../texture_cache/texture_cache_contextless.h | 93 +++++ 8 files changed, 1364 insertions(+) create mode 100644 src/video_core/texture_cache/surface_base.cpp create mode 100644 src/video_core/texture_cache/surface_base.h create mode 100644 src/video_core/texture_cache/surface_params.cpp create mode 100644 src/video_core/texture_cache/surface_params.h create mode 100644 src/video_core/texture_cache/surface_view.cpp create mode 100644 src/video_core/texture_cache/surface_view.h create mode 100644 src/video_core/texture_cache/texture_cache.h create mode 100644 src/video_core/texture_cache/texture_cache_contextless.h (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp new file mode 100644 index 000000000..8680485b4 --- /dev/null +++ b/src/video_core/texture_cache/surface_base.cpp @@ -0,0 +1,118 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/morton.h" +#include "video_core/texture_cache/surface_base.h" +#include "video_core/texture_cache/surface_params.h" +#include "video_core/textures/convert.h" + +namespace VideoCommon { + +using Tegra::Texture::ConvertFromGuestToHost; +using VideoCore::MortonSwizzleMode; + +namespace { +void SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, u8* buffer, + u32 level) { + const u32 width{params.GetMipWidth(level)}; + const u32 height{params.GetMipHeight(level)}; + const u32 block_height{params.GetMipBlockHeight(level)}; + const u32 block_depth{params.GetMipBlockDepth(level)}; + + std::size_t guest_offset{params.GetGuestMipmapLevelOffset(level)}; + if (params.IsLayered()) { + std::size_t host_offset{0}; + const std::size_t guest_stride = params.GetGuestLayerSize(); + const std::size_t host_stride = params.GetHostLayerSize(level); + for (u32 layer = 0; layer < params.GetNumLayers(); layer++) { + MortonSwizzle(mode, params.GetPixelFormat(), width, block_height, height, block_depth, + 1, params.GetTileWidthSpacing(), buffer + host_offset, + memory + guest_offset); + guest_offset += guest_stride; + host_offset += host_stride; + } + } else { + MortonSwizzle(mode, params.GetPixelFormat(), width, block_height, height, block_depth, + params.GetMipDepth(level), params.GetTileWidthSpacing(), buffer, + memory + guest_offset); + } +} +} // Anonymous namespace + +SurfaceBaseImpl::SurfaceBaseImpl(const SurfaceParams& params) : params{params} { + staging_buffer.resize(params.GetHostSizeInBytes()); +} + +SurfaceBaseImpl::~SurfaceBaseImpl() = default; + +void SurfaceBaseImpl::LoadBuffer() { + if (params.IsTiled()) { + ASSERT_MSG(params.GetBlockWidth() == 1, "Block width is defined as {} on texture target {}", + params.GetBlockWidth(), static_cast(params.GetTarget())); + for (u32 level = 0; level < params.GetNumLevels(); ++level) { + u8* const buffer{GetStagingBufferLevelData(level)}; + SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params, buffer, level); + } + } else { + ASSERT_MSG(params.GetNumLevels() == 1, "Linear mipmap loading is not implemented"); + const u32 bpp{GetFormatBpp(params.GetPixelFormat()) / CHAR_BIT}; + const u32 block_width{params.GetDefaultBlockWidth()}; + const u32 block_height{params.GetDefaultBlockHeight()}; + const u32 width{(params.GetWidth() + block_width - 1) / block_width}; + const u32 height{(params.GetHeight() + block_height - 1) / block_height}; + const u32 copy_size{width * bpp}; + if (params.GetPitch() == copy_size) { + std::memcpy(staging_buffer.data(), host_ptr, params.GetHostSizeInBytes()); + } else { + const u8* start{host_ptr}; + u8* write_to{staging_buffer.data()}; + for (u32 h = height; h > 0; --h) { + std::memcpy(write_to, start, copy_size); + start += params.GetPitch(); + write_to += copy_size; + } + } + } + + for (u32 level = 0; level < params.GetNumLevels(); ++level) { + ConvertFromGuestToHost(GetStagingBufferLevelData(level), params.GetPixelFormat(), + params.GetMipWidth(level), params.GetMipHeight(level), + params.GetMipDepth(level), true, true); + } +} + +void SurfaceBaseImpl::FlushBuffer() { + if (params.IsTiled()) { + ASSERT_MSG(params.GetBlockWidth() == 1, "Block width is defined as {}", + params.GetBlockWidth()); + for (u32 level = 0; level < params.GetNumLevels(); ++level) { + u8* const buffer = GetStagingBufferLevelData(level); + SwizzleFunc(MortonSwizzleMode::LinearToMorton, GetHostPtr(), params, buffer, level); + } + } else { + UNIMPLEMENTED(); + /* + ASSERT(params.GetTarget() == SurfaceTarget::Texture2D); + ASSERT(params.GetNumLevels() == 1); + + const u32 bpp{params.GetFormatBpp() / 8}; + const u32 copy_size{params.GetWidth() * bpp}; + if (params.GetPitch() == copy_size) { + std::memcpy(host_ptr, staging_buffer.data(), GetSizeInBytes()); + } else { + u8* start{host_ptr}; + const u8* read_to{staging_buffer.data()}; + for (u32 h = params.GetHeight(); h > 0; --h) { + std::memcpy(start, read_to, copy_size); + start += params.GetPitch(); + read_to += copy_size; + } + } + */ + } +} + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h new file mode 100644 index 000000000..d0142a9e6 --- /dev/null +++ b/src/video_core/texture_cache/surface_base.h @@ -0,0 +1,172 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/gpu.h" +#include "video_core/texture_cache/surface_params.h" +#include "video_core/texture_cache/surface_view.h" + +namespace VideoCommon { + +class SurfaceBaseImpl { +public: + void LoadBuffer(); + + void FlushBuffer(); + + GPUVAddr GetGpuAddr() const { + ASSERT(is_registered); + return gpu_addr; + } + + VAddr GetCpuAddr() const { + ASSERT(is_registered); + return cpu_addr; + } + + u8* GetHostPtr() const { + ASSERT(is_registered); + return host_ptr; + } + + CacheAddr GetCacheAddr() const { + ASSERT(is_registered); + return cache_addr; + } + + const SurfaceParams& GetSurfaceParams() const { + return params; + } + + void Register(GPUVAddr gpu_addr_, VAddr cpu_addr_, u8* host_ptr_) { + ASSERT(!is_registered); + is_registered = true; + gpu_addr = gpu_addr_; + cpu_addr = cpu_addr_; + host_ptr = host_ptr_; + cache_addr = ToCacheAddr(host_ptr_); + DecorateSurfaceName(); + } + + void Unregister() { + ASSERT(is_registered); + is_registered = false; + } + + bool IsRegistered() const { + return is_registered; + } + + std::size_t GetSizeInBytes() const { + return params.GetGuestSizeInBytes(); + } + + u8* GetStagingBufferLevelData(u32 level) { + return staging_buffer.data() + params.GetHostMipmapLevelOffset(level); + } + +protected: + explicit SurfaceBaseImpl(const SurfaceParams& params); + ~SurfaceBaseImpl(); // non-virtual is intended + + virtual void DecorateSurfaceName() = 0; + + const SurfaceParams params; + +private: + GPUVAddr gpu_addr{}; + VAddr cpu_addr{}; + u8* host_ptr{}; + CacheAddr cache_addr{}; + bool is_registered{}; + + std::vector staging_buffer; +}; + +template +class SurfaceBase : public SurfaceBaseImpl { + static_assert(std::is_trivially_copyable_v); + +public: + virtual TExecutionContext UploadTexture(TExecutionContext exctx) = 0; + + virtual TExecutionContext DownloadTexture(TExecutionContext exctx) = 0; + + TView* TryGetView(GPUVAddr view_addr, const SurfaceParams& view_params) { + if (view_addr < GetGpuAddr() || !params.IsFamiliar(view_params)) { + // It can't be a view if it's in a prior address. + return {}; + } + + const auto relative_offset{static_cast(view_addr - GetGpuAddr())}; + const auto it{view_offset_map.find(relative_offset)}; + if (it == view_offset_map.end()) { + // Couldn't find an aligned view. + return {}; + } + const auto [layer, level] = it->second; + + if (!params.IsViewValid(view_params, layer, level)) { + return {}; + } + + return GetView(layer, view_params.GetNumLayers(), level, view_params.GetNumLevels()); + } + + void MarkAsModified(bool is_modified_) { + is_modified = is_modified_; + if (is_modified_) { + modification_tick = texture_cache.Tick(); + } + } + + TView* GetView(GPUVAddr view_addr, const SurfaceParams& view_params) { + TView* view{TryGetView(view_addr, view_params)}; + ASSERT(view != nullptr); + return view; + } + + bool IsModified() const { + return is_modified; + } + + u64 GetModificationTick() const { + return modification_tick; + } + +protected: + explicit SurfaceBase(TTextureCache& texture_cache, const SurfaceParams& params) + : SurfaceBaseImpl{params}, texture_cache{texture_cache}, + view_offset_map{params.CreateViewOffsetMap()} {} + + ~SurfaceBase() = default; + + virtual std::unique_ptr CreateView(const ViewKey& view_key) = 0; + +private: + TView* GetView(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels) { + const ViewKey key{base_layer, num_layers, base_level, num_levels}; + const auto [entry, is_cache_miss] = views.try_emplace(key); + auto& view{entry->second}; + if (is_cache_miss) { + view = CreateView(key); + } + return view.get(); + } + + TTextureCache& texture_cache; + const std::map> view_offset_map; + + std::unordered_map> views; + + bool is_modified{}; + u64 modification_tick{}; +}; + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp new file mode 100644 index 000000000..d1f8c53d5 --- /dev/null +++ b/src/video_core/texture_cache/surface_params.cpp @@ -0,0 +1,412 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/cityhash.h" +#include "common/alignment.h" +#include "core/core.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/surface_params.h" +#include "video_core/textures/decoders.h" + +namespace VideoCommon { + +using VideoCore::Surface::ComponentTypeFromDepthFormat; +using VideoCore::Surface::ComponentTypeFromRenderTarget; +using VideoCore::Surface::ComponentTypeFromTexture; +using VideoCore::Surface::PixelFormatFromDepthFormat; +using VideoCore::Surface::PixelFormatFromRenderTargetFormat; +using VideoCore::Surface::PixelFormatFromTextureFormat; +using VideoCore::Surface::SurfaceTarget; +using VideoCore::Surface::SurfaceTargetFromTextureType; + +namespace { +constexpr u32 GetMipmapSize(bool uncompressed, u32 mip_size, u32 tile) { + return uncompressed ? mip_size : std::max(1U, (mip_size + tile - 1) / tile); +} +} // Anonymous namespace + +SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, + const Tegra::Texture::FullTextureInfo& config) { + SurfaceParams params; + params.is_tiled = config.tic.IsTiled(); + params.srgb_conversion = config.tic.IsSrgbConversionEnabled(); + params.block_width = params.is_tiled ? config.tic.BlockWidth() : 0, + params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0, + params.block_depth = params.is_tiled ? config.tic.BlockDepth() : 0, + params.tile_width_spacing = params.is_tiled ? (1 << config.tic.tile_width_spacing.Value()) : 1; + params.pixel_format = PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(), + params.srgb_conversion); + params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value()); + params.type = GetFormatType(params.pixel_format); + params.target = SurfaceTargetFromTextureType(config.tic.texture_type); + params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format)); + params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format)); + params.depth = config.tic.Depth(); + if (params.target == SurfaceTarget::TextureCubemap || + params.target == SurfaceTarget::TextureCubeArray) { + params.depth *= 6; + } + params.pitch = params.is_tiled ? 0 : config.tic.Pitch(); + params.unaligned_height = config.tic.Height(); + params.num_levels = config.tic.max_mip_level + 1; + + params.CalculateCachedValues(); + return params; +} + +SurfaceParams SurfaceParams::CreateForDepthBuffer( + Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format, + u32 block_width, u32 block_height, u32 block_depth, + Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) { + SurfaceParams params; + params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; + params.srgb_conversion = false; + params.block_width = 1 << std::min(block_width, 5U); + params.block_height = 1 << std::min(block_height, 5U); + params.block_depth = 1 << std::min(block_depth, 5U); + params.tile_width_spacing = 1; + params.pixel_format = PixelFormatFromDepthFormat(format); + params.component_type = ComponentTypeFromDepthFormat(format); + params.type = GetFormatType(params.pixel_format); + params.width = zeta_width; + params.height = zeta_height; + params.unaligned_height = zeta_height; + params.target = SurfaceTarget::Texture2D; + params.depth = 1; + params.num_levels = 1; + + params.CalculateCachedValues(); + return params; +} + +SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::size_t index) { + const auto& config{system.GPU().Maxwell3D().regs.rt[index]}; + SurfaceParams params; + params.is_tiled = + config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; + params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || + config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; + params.block_width = 1 << config.memory_layout.block_width; + params.block_height = 1 << config.memory_layout.block_height; + params.block_depth = 1 << config.memory_layout.block_depth; + params.tile_width_spacing = 1; + params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); + params.component_type = ComponentTypeFromRenderTarget(config.format); + params.type = GetFormatType(params.pixel_format); + if (params.is_tiled) { + params.width = config.width; + } else { + const u32 bpp = GetFormatBpp(params.pixel_format) / CHAR_BIT; + params.pitch = config.width; + params.width = params.pitch / bpp; + } + params.height = config.height; + params.depth = 1; + params.unaligned_height = config.height; + params.target = SurfaceTarget::Texture2D; + params.num_levels = 1; + + params.CalculateCachedValues(); + return params; +} + +SurfaceParams SurfaceParams::CreateForFermiCopySurface( + const Tegra::Engines::Fermi2D::Regs::Surface& config) { + SurfaceParams params{}; + params.is_tiled = !config.linear; + params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || + config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; + params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 32U) : 0, + params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 32U) : 0, + params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 32U) : 0, + params.tile_width_spacing = 1; + params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); + params.component_type = ComponentTypeFromRenderTarget(config.format); + params.type = GetFormatType(params.pixel_format); + params.width = config.width; + params.height = config.height; + params.unaligned_height = config.height; + // TODO(Rodrigo): Try to guess the surface target from depth and layer parameters + params.target = SurfaceTarget::Texture2D; + params.depth = 1; + params.num_levels = 1; + + params.CalculateCachedValues(); + return params; +} + +u32 SurfaceParams::GetMipWidth(u32 level) const { + return std::max(1U, width >> level); +} + +u32 SurfaceParams::GetMipHeight(u32 level) const { + return std::max(1U, height >> level); +} + +u32 SurfaceParams::GetMipDepth(u32 level) const { + return IsLayered() ? depth : std::max(1U, depth >> level); +} + +bool SurfaceParams::IsLayered() const { + switch (target) { + case SurfaceTarget::Texture1DArray: + case SurfaceTarget::Texture2DArray: + case SurfaceTarget::TextureCubemap: + case SurfaceTarget::TextureCubeArray: + return true; + default: + return false; + } +} + +u32 SurfaceParams::GetMipBlockHeight(u32 level) const { + // Auto block resizing algorithm from: + // https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c + if (level == 0) { + return this->block_height; + } + + const u32 height{GetMipHeight(level)}; + const u32 default_block_height{GetDefaultBlockHeight()}; + const u32 blocks_in_y{(height + default_block_height - 1) / default_block_height}; + u32 block_height = 16; + while (block_height > 1 && blocks_in_y <= block_height * 4) { + block_height >>= 1; + } + return block_height; +} + +u32 SurfaceParams::GetMipBlockDepth(u32 level) const { + if (level == 0) { + return this->block_depth; + } + if (IsLayered()) { + return 1; + } + + const u32 depth{GetMipDepth(level)}; + u32 block_depth = 32; + while (block_depth > 1 && depth * 2 <= block_depth) { + block_depth >>= 1; + } + + if (block_depth == 32 && GetMipBlockHeight(level) >= 4) { + return 16; + } + + return block_depth; +} + +std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const { + std::size_t offset = 0; + for (u32 i = 0; i < level; i++) { + offset += GetInnerMipmapMemorySize(i, false, false); + } + return offset; +} + +std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level) const { + std::size_t offset = 0; + for (u32 i = 0; i < level; i++) { + offset += GetInnerMipmapMemorySize(i, true, false) * GetNumLayers(); + } + return offset; +} + +std::size_t SurfaceParams::GetHostMipmapSize(u32 level) const { + return GetInnerMipmapMemorySize(level, true, false) * GetNumLayers(); +} + +std::size_t SurfaceParams::GetGuestLayerSize() const { + return GetLayerSize(false, false); +} + +std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) const { + std::size_t size = 0; + for (u32 level = 0; level < num_levels; ++level) { + size += GetInnerMipmapMemorySize(level, as_host_size, uncompressed); + } + if (is_tiled && (IsLayered() || target == SurfaceTarget::Texture3D)) { + return Common::AlignUp(size, Tegra::Texture::GetGOBSize() * block_height * block_depth); + } + return size; +} + +std::size_t SurfaceParams::GetHostLayerSize(u32 level) const { + ASSERT(target != SurfaceTarget::Texture3D); + return GetInnerMipmapMemorySize(level, true, false); +} + +u32 SurfaceParams::GetDefaultBlockWidth() const { + return VideoCore::Surface::GetDefaultBlockWidth(pixel_format); +} + +u32 SurfaceParams::GetDefaultBlockHeight() const { + return VideoCore::Surface::GetDefaultBlockHeight(pixel_format); +} + +u32 SurfaceParams::GetBitsPerPixel() const { + return VideoCore::Surface::GetFormatBpp(pixel_format); +} + +u32 SurfaceParams::GetBytesPerPixel() const { + return VideoCore::Surface::GetBytesPerPixel(pixel_format); +} + +bool SurfaceParams::IsFamiliar(const SurfaceParams& view_params) const { + if (std::tie(is_tiled, tile_width_spacing, pixel_format, component_type, type) != + std::tie(view_params.is_tiled, view_params.tile_width_spacing, view_params.pixel_format, + view_params.component_type, view_params.type)) { + return false; + } + + const SurfaceTarget view_target{view_params.target}; + if (view_target == target) { + return true; + } + + switch (target) { + case SurfaceTarget::Texture1D: + case SurfaceTarget::Texture2D: + case SurfaceTarget::Texture3D: + return false; + case SurfaceTarget::Texture1DArray: + return view_target == SurfaceTarget::Texture1D; + case SurfaceTarget::Texture2DArray: + return view_target == SurfaceTarget::Texture2D; + case SurfaceTarget::TextureCubemap: + return view_target == SurfaceTarget::Texture2D || + view_target == SurfaceTarget::Texture2DArray; + case SurfaceTarget::TextureCubeArray: + return view_target == SurfaceTarget::Texture2D || + view_target == SurfaceTarget::Texture2DArray || + view_target == SurfaceTarget::TextureCubemap; + default: + UNIMPLEMENTED_MSG("Unimplemented texture family={}", static_cast(target)); + return false; + } +} + +bool SurfaceParams::IsPixelFormatZeta() const { + return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat && + pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat; +} + +void SurfaceParams::CalculateCachedValues() { + switch (target) { + case SurfaceTarget::Texture1D: + case SurfaceTarget::Texture2D: + case SurfaceTarget::Texture3D: + num_layers = 1; + break; + case SurfaceTarget::Texture1DArray: + case SurfaceTarget::Texture2DArray: + case SurfaceTarget::TextureCubemap: + case SurfaceTarget::TextureCubeArray: + num_layers = depth; + break; + default: + UNREACHABLE(); + } + + guest_size_in_bytes = GetInnerMemorySize(false, false, false); + + if (IsPixelFormatASTC(pixel_format)) { + // ASTC is uncompressed in software, in emulated as RGBA8 + host_size_in_bytes = static_cast(width) * static_cast(height) * + static_cast(depth) * 4ULL; + } else { + host_size_in_bytes = GetInnerMemorySize(true, false, false); + } +} + +std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, + bool uncompressed) const { + const bool tiled{as_host_size ? false : is_tiled}; + const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())}; + const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())}; + const u32 depth{target == SurfaceTarget::Texture3D ? GetMipDepth(level) : 1U}; + return Tegra::Texture::CalculateSize(tiled, GetBytesPerPixel(), width, height, depth, + GetMipBlockHeight(level), GetMipBlockDepth(level)); +} + +std::size_t SurfaceParams::GetInnerMemorySize(bool as_host_size, bool layer_only, + bool uncompressed) const { + return GetLayerSize(as_host_size, uncompressed) * (layer_only ? 1U : num_layers); +} + +std::map> SurfaceParams::CreateViewOffsetMap() const { + std::map> view_offset_map; + switch (target) { + case SurfaceTarget::Texture1D: + case SurfaceTarget::Texture2D: + case SurfaceTarget::Texture3D: { + // TODO(Rodrigo): Add layer iterations for 3D textures + constexpr u32 layer = 0; + for (u32 level = 0; level < num_levels; ++level) { + const std::size_t offset{GetGuestMipmapLevelOffset(level)}; + view_offset_map.insert({offset, {layer, level}}); + } + break; + } + case SurfaceTarget::Texture1DArray: + case SurfaceTarget::Texture2DArray: + case SurfaceTarget::TextureCubemap: + case SurfaceTarget::TextureCubeArray: { + const std::size_t layer_size{GetGuestLayerSize()}; + for (u32 level = 0; level < num_levels; ++level) { + const std::size_t level_offset{GetGuestMipmapLevelOffset(level)}; + for (u32 layer = 0; layer < num_layers; ++layer) { + const auto layer_offset{static_cast(layer_size * layer)}; + const std::size_t offset{level_offset + layer_offset}; + view_offset_map.insert({offset, {layer, level}}); + } + } + break; + } + default: + UNIMPLEMENTED_MSG("Unimplemented surface target {}", static_cast(target)); + } + return view_offset_map; +} + +bool SurfaceParams::IsViewValid(const SurfaceParams& view_params, u32 layer, u32 level) const { + return IsDimensionValid(view_params, level) && IsDepthValid(view_params, level) && + IsInBounds(view_params, layer, level); +} + +bool SurfaceParams::IsDimensionValid(const SurfaceParams& view_params, u32 level) const { + return view_params.width == GetMipWidth(level) && view_params.height == GetMipHeight(level); +} + +bool SurfaceParams::IsDepthValid(const SurfaceParams& view_params, u32 level) const { + if (view_params.target != SurfaceTarget::Texture3D) { + return true; + } + return view_params.depth == GetMipDepth(level); +} + +bool SurfaceParams::IsInBounds(const SurfaceParams& view_params, u32 layer, u32 level) const { + return layer + view_params.num_layers <= num_layers && + level + view_params.num_levels <= num_levels; +} + +std::size_t HasheableSurfaceParams::Hash() const { + return static_cast( + Common::CityHash64(reinterpret_cast(this), sizeof(*this))); +} + +bool HasheableSurfaceParams::operator==(const HasheableSurfaceParams& rhs) const { + return std::tie(is_tiled, block_width, block_height, block_depth, tile_width_spacing, width, + height, depth, pitch, unaligned_height, num_levels, pixel_format, + component_type, type, target) == + std::tie(rhs.is_tiled, rhs.block_width, rhs.block_height, rhs.block_depth, + rhs.tile_width_spacing, rhs.width, rhs.height, rhs.depth, rhs.pitch, + rhs.unaligned_height, rhs.num_levels, rhs.pixel_format, rhs.component_type, + rhs.type, rhs.target); +} + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h new file mode 100644 index 000000000..77dc0ba66 --- /dev/null +++ b/src/video_core/texture_cache/surface_params.h @@ -0,0 +1,229 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/common_types.h" +#include "video_core/engines/fermi_2d.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/surface.h" + +namespace VideoCommon { + +class HasheableSurfaceParams { +public: + std::size_t Hash() const; + + bool operator==(const HasheableSurfaceParams& rhs) const; + + bool operator!=(const HasheableSurfaceParams& rhs) const { + return !operator==(rhs); + } + +protected: + // Avoid creation outside of a managed environment. + HasheableSurfaceParams() = default; + + bool is_tiled; + bool srgb_conversion; + u32 block_width; + u32 block_height; + u32 block_depth; + u32 tile_width_spacing; + u32 width; + u32 height; + u32 depth; + u32 pitch; + u32 unaligned_height; + u32 num_levels; + VideoCore::Surface::PixelFormat pixel_format; + VideoCore::Surface::ComponentType component_type; + VideoCore::Surface::SurfaceType type; + VideoCore::Surface::SurfaceTarget target; +}; + +class SurfaceParams final : public HasheableSurfaceParams { +public: + /// Creates SurfaceCachedParams from a texture configuration. + static SurfaceParams CreateForTexture(Core::System& system, + const Tegra::Texture::FullTextureInfo& config); + + /// Creates SurfaceCachedParams for a depth buffer configuration. + static SurfaceParams CreateForDepthBuffer( + Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format, + u32 block_width, u32 block_height, u32 block_depth, + Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type); + + /// Creates SurfaceCachedParams from a framebuffer configuration. + static SurfaceParams CreateForFramebuffer(Core::System& system, std::size_t index); + + /// Creates SurfaceCachedParams from a Fermi2D surface configuration. + static SurfaceParams CreateForFermiCopySurface( + const Tegra::Engines::Fermi2D::Regs::Surface& config); + + bool IsTiled() const { + return is_tiled; + } + + bool GetSrgbConversion() const { + return srgb_conversion; + } + + u32 GetBlockWidth() const { + return block_width; + } + + u32 GetTileWidthSpacing() const { + return tile_width_spacing; + } + + u32 GetWidth() const { + return width; + } + + u32 GetHeight() const { + return height; + } + + u32 GetDepth() const { + return depth; + } + + u32 GetPitch() const { + return pitch; + } + + u32 GetNumLevels() const { + return num_levels; + } + + VideoCore::Surface::PixelFormat GetPixelFormat() const { + return pixel_format; + } + + VideoCore::Surface::ComponentType GetComponentType() const { + return component_type; + } + + VideoCore::Surface::SurfaceTarget GetTarget() const { + return target; + } + + VideoCore::Surface::SurfaceType GetType() const { + return type; + } + + std::size_t GetGuestSizeInBytes() const { + return guest_size_in_bytes; + } + + std::size_t GetHostSizeInBytes() const { + return host_size_in_bytes; + } + + u32 GetNumLayers() const { + return num_layers; + } + + /// Returns the width of a given mipmap level. + u32 GetMipWidth(u32 level) const; + + /// Returns the height of a given mipmap level. + u32 GetMipHeight(u32 level) const; + + /// Returns the depth of a given mipmap level. + u32 GetMipDepth(u32 level) const; + + /// Returns true if these parameters are from a layered surface. + bool IsLayered() const; + + /// Returns the block height of a given mipmap level. + u32 GetMipBlockHeight(u32 level) const; + + /// Returns the block depth of a given mipmap level. + u32 GetMipBlockDepth(u32 level) const; + + /// Returns the offset in bytes in guest memory of a given mipmap level. + std::size_t GetGuestMipmapLevelOffset(u32 level) const; + + /// Returns the offset in bytes in host memory (linear) of a given mipmap level. + std::size_t GetHostMipmapLevelOffset(u32 level) const; + + /// Returns the size in bytes in host memory (linear) of a given mipmap level. + std::size_t GetHostMipmapSize(u32 level) const; + + /// Returns the size of a layer in bytes in guest memory. + std::size_t GetGuestLayerSize() const; + + /// Returns the size of a layer in bytes in host memory for a given mipmap level. + std::size_t GetHostLayerSize(u32 level) const; + + /// Returns the default block width. + u32 GetDefaultBlockWidth() const; + + /// Returns the default block height. + u32 GetDefaultBlockHeight() const; + + /// Returns the bits per pixel. + u32 GetBitsPerPixel() const; + + /// Returns the bytes per pixel. + u32 GetBytesPerPixel() const; + + /// Returns true if another surface can be familiar with this. This is a loosely defined term + /// that reflects the possibility of these two surface parameters potentially being part of a + /// bigger superset. + bool IsFamiliar(const SurfaceParams& view_params) const; + + /// Returns true if the pixel format is a depth and/or stencil format. + bool IsPixelFormatZeta() const; + + /// Creates a map that redirects an address difference to a layer and mipmap level. + std::map> CreateViewOffsetMap() const; + + /// Returns true if the passed surface view parameters is equal or a valid subset of this. + bool IsViewValid(const SurfaceParams& view_params, u32 layer, u32 level) const; + +private: + /// Calculates values that can be deduced from HasheableSurfaceParams. + void CalculateCachedValues(); + + /// Returns the size of a given mipmap level inside a layer. + std::size_t GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool uncompressed) const; + + /// Returns the size of all mipmap levels and aligns as needed. + std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const; + + /// Returns the size of a layer + std::size_t GetLayerSize(bool as_host_size, bool uncompressed) const; + + /// Returns true if the passed view width and height match the size of this params in a given + /// mipmap level. + bool IsDimensionValid(const SurfaceParams& view_params, u32 level) const; + + /// Returns true if the passed view depth match the size of this params in a given mipmap level. + bool IsDepthValid(const SurfaceParams& view_params, u32 level) const; + + /// Returns true if the passed view layers and mipmap levels are in bounds. + bool IsInBounds(const SurfaceParams& view_params, u32 layer, u32 level) const; + + std::size_t guest_size_in_bytes; + std::size_t host_size_in_bytes; + u32 num_layers; +}; + +} // namespace VideoCommon + +namespace std { + +template <> +struct hash { + std::size_t operator()(const VideoCommon::SurfaceParams& k) const noexcept { + return k.Hash(); + } +}; + +} // namespace std diff --git a/src/video_core/texture_cache/surface_view.cpp b/src/video_core/texture_cache/surface_view.cpp new file mode 100644 index 000000000..5f4cdbb1c --- /dev/null +++ b/src/video_core/texture_cache/surface_view.cpp @@ -0,0 +1,23 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/common_types.h" +#include "video_core/texture_cache/surface_view.h" + +namespace VideoCommon { + +std::size_t ViewKey::Hash() const { + return static_cast(base_layer) ^ static_cast(num_layers << 16) ^ + (static_cast(base_level) << 32) ^ + (static_cast(num_levels) << 48); +} + +bool ViewKey::operator==(const ViewKey& rhs) const { + return std::tie(base_layer, num_layers, base_level, num_levels) == + std::tie(rhs.base_layer, rhs.num_layers, rhs.base_level, rhs.num_levels); +} + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_view.h b/src/video_core/texture_cache/surface_view.h new file mode 100644 index 000000000..e73d8f6ae --- /dev/null +++ b/src/video_core/texture_cache/surface_view.h @@ -0,0 +1,35 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/common_types.h" + +namespace VideoCommon { + +struct ViewKey { + std::size_t Hash() const; + + bool operator==(const ViewKey& rhs) const; + + u32 base_layer{}; + u32 num_layers{}; + u32 base_level{}; + u32 num_levels{}; +}; + +} // namespace VideoCommon + +namespace std { + +template <> +struct hash { + std::size_t operator()(const VideoCommon::ViewKey& k) const noexcept { + return k.Hash(); + } +}; + +} // namespace std diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h new file mode 100644 index 000000000..fb43fa65e --- /dev/null +++ b/src/video_core/texture_cache/texture_cache.h @@ -0,0 +1,282 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "common/assert.h" +#include "common/common_types.h" +#include "core/memory.h" +#include "video_core/engines/fermi_2d.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/gpu.h" +#include "video_core/memory_manager.h" +#include "video_core/rasterizer_interface.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/surface_base.h" +#include "video_core/texture_cache/surface_params.h" +#include "video_core/texture_cache/surface_view.h" + +namespace Core { +class System; +} + +namespace Tegra::Texture { +struct FullTextureInfo; +} + +namespace VideoCore { +class RasterizerInterface; +} + +namespace VideoCommon { + +template +class TextureCache { + static_assert(std::is_trivially_copyable_v); + + using ResultType = std::tuple; + using IntervalMap = boost::icl::interval_map>>; + using IntervalType = typename IntervalMap::interval_type; + +public: + void InvalidateRegion(CacheAddr addr, std::size_t size) { + for (const auto& surface : GetSurfacesInRegion(addr, size)) { + if (!surface->IsRegistered()) { + // Skip duplicates + continue; + } + Unregister(surface); + } + } + + ResultType GetTextureSurface(TExecutionContext exctx, + const Tegra::Texture::FullTextureInfo& config) { + const auto gpu_addr{config.tic.Address()}; + if (!gpu_addr) { + return {{}, exctx}; + } + const auto params{SurfaceParams::CreateForTexture(system, config)}; + return GetSurfaceView(exctx, gpu_addr, params, true); + } + + ResultType GetDepthBufferSurface(TExecutionContext exctx, bool preserve_contents) { + const auto& regs{system.GPU().Maxwell3D().regs}; + const auto gpu_addr{regs.zeta.Address()}; + if (!gpu_addr || !regs.zeta_enable) { + return {{}, exctx}; + } + const auto depth_params{SurfaceParams::CreateForDepthBuffer( + system, regs.zeta_width, regs.zeta_height, regs.zeta.format, + regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height, + regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; + return GetSurfaceView(exctx, gpu_addr, depth_params, preserve_contents); + } + + ResultType GetColorBufferSurface(TExecutionContext exctx, std::size_t index, + bool preserve_contents) { + ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); + + const auto& regs{system.GPU().Maxwell3D().regs}; + if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || + regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { + return {{}, exctx}; + } + + auto& memory_manager{system.GPU().MemoryManager()}; + const auto& config{system.GPU().Maxwell3D().regs.rt[index]}; + const auto gpu_addr{config.Address() + + config.base_layer * config.layer_stride * sizeof(u32)}; + if (!gpu_addr) { + return {{}, exctx}; + } + + return GetSurfaceView(exctx, gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), + preserve_contents); + } + + ResultType GetFermiSurface(TExecutionContext exctx, + const Tegra::Engines::Fermi2D::Regs::Surface& config) { + return GetSurfaceView(exctx, config.Address(), + SurfaceParams::CreateForFermiCopySurface(config), true); + } + + std::shared_ptr TryFindFramebufferSurface(const u8* host_ptr) const { + const auto it{registered_surfaces.find(ToCacheAddr(host_ptr))}; + return it != registered_surfaces.end() ? *it->second.begin() : nullptr; + } + + u64 Tick() { + return ++ticks; + } + +protected: + TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) + : system{system}, rasterizer{rasterizer} {} + + ~TextureCache() = default; + + virtual ResultType TryFastGetSurfaceView( + TExecutionContext exctx, GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, + const SurfaceParams& params, bool preserve_contents, + const std::vector>& overlaps) = 0; + + virtual std::shared_ptr CreateSurface(const SurfaceParams& params) = 0; + + void Register(std::shared_ptr surface, GPUVAddr gpu_addr, VAddr cpu_addr, + u8* host_ptr) { + surface->Register(gpu_addr, cpu_addr, host_ptr); + registered_surfaces.add({GetSurfaceInterval(surface), {surface}}); + rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), 1); + } + + void Unregister(std::shared_ptr surface) { + registered_surfaces.subtract({GetSurfaceInterval(surface), {surface}}); + rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), -1); + surface->Unregister(); + } + + std::shared_ptr GetUncachedSurface(const SurfaceParams& params) { + if (const auto surface = TryGetReservedSurface(params); surface) + return surface; + // No reserved surface available, create a new one and reserve it + auto new_surface{CreateSurface(params)}; + ReserveSurface(params, new_surface); + return new_surface; + } + + Core::System& system; + +private: + ResultType GetSurfaceView(TExecutionContext exctx, GPUVAddr gpu_addr, + const SurfaceParams& params, bool preserve_contents) { + auto& memory_manager{system.GPU().MemoryManager()}; + const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; + DEBUG_ASSERT(cpu_addr); + + const auto host_ptr{memory_manager.GetPointer(gpu_addr)}; + const auto cache_addr{ToCacheAddr(host_ptr)}; + auto overlaps{GetSurfacesInRegion(cache_addr, params.GetGuestSizeInBytes())}; + if (overlaps.empty()) { + return LoadSurfaceView(exctx, gpu_addr, *cpu_addr, host_ptr, params, preserve_contents); + } + + if (overlaps.size() == 1) { + if (TView* view = overlaps[0]->TryGetView(gpu_addr, params); view) { + return {view, exctx}; + } + } + + TView* fast_view; + std::tie(fast_view, exctx) = TryFastGetSurfaceView(exctx, gpu_addr, *cpu_addr, host_ptr, + params, preserve_contents, overlaps); + + if (!fast_view) { + std::sort(overlaps.begin(), overlaps.end(), [](const auto& lhs, const auto& rhs) { + return lhs->GetModificationTick() < rhs->GetModificationTick(); + }); + } + + for (const auto& surface : overlaps) { + if (!fast_view) { + // Flush even when we don't care about the contents, to preserve memory not + // written by the new surface. + exctx = FlushSurface(exctx, surface); + } + Unregister(surface); + } + + if (fast_view) { + return {fast_view, exctx}; + } + + return LoadSurfaceView(exctx, gpu_addr, *cpu_addr, host_ptr, params, preserve_contents); + } + + ResultType LoadSurfaceView(TExecutionContext exctx, GPUVAddr gpu_addr, VAddr cpu_addr, + u8* host_ptr, const SurfaceParams& params, bool preserve_contents) { + const auto new_surface{GetUncachedSurface(params)}; + Register(new_surface, gpu_addr, cpu_addr, host_ptr); + if (preserve_contents) { + exctx = LoadSurface(exctx, new_surface); + } + return {new_surface->GetView(gpu_addr, params), exctx}; + } + + TExecutionContext LoadSurface(TExecutionContext exctx, + const std::shared_ptr& surface) { + surface->LoadBuffer(); + exctx = surface->UploadTexture(exctx); + surface->MarkAsModified(false); + return exctx; + } + + TExecutionContext FlushSurface(TExecutionContext exctx, + const std::shared_ptr& surface) { + if (!surface->IsModified()) { + return exctx; + } + exctx = surface->DownloadTexture(exctx); + surface->FlushBuffer(); + return exctx; + } + + std::vector> GetSurfacesInRegion(CacheAddr cache_addr, + std::size_t size) const { + if (size == 0) { + return {}; + } + const IntervalType interval{cache_addr, cache_addr + size}; + + std::vector> surfaces; + for (auto& pair : boost::make_iterator_range(registered_surfaces.equal_range(interval))) { + surfaces.push_back(*pair.second.begin()); + } + return surfaces; + } + + void ReserveSurface(const SurfaceParams& params, std::shared_ptr surface) { + surface_reserve[params].push_back(std::move(surface)); + } + + std::shared_ptr TryGetReservedSurface(const SurfaceParams& params) { + auto search{surface_reserve.find(params)}; + if (search == surface_reserve.end()) { + return {}; + } + for (auto& surface : search->second) { + if (!surface->IsRegistered()) { + return surface; + } + } + return {}; + } + + IntervalType GetSurfaceInterval(std::shared_ptr surface) const { + return IntervalType::right_open(surface->GetCacheAddr(), + surface->GetCacheAddr() + surface->GetSizeInBytes()); + } + + VideoCore::RasterizerInterface& rasterizer; + + u64 ticks{}; + + IntervalMap registered_surfaces; + + /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have + /// previously been used. This is to prevent surfaces from being constantly created and + /// destroyed when used with different surface parameters. + std::unordered_map>> surface_reserve; +}; + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/texture_cache_contextless.h b/src/video_core/texture_cache/texture_cache_contextless.h new file mode 100644 index 000000000..cd35a9fd4 --- /dev/null +++ b/src/video_core/texture_cache/texture_cache_contextless.h @@ -0,0 +1,93 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "video_core/texture_cache/texture_cache.h" + +namespace VideoCommon { + +struct DummyExecutionContext {}; + +template +class TextureCacheContextless : protected TextureCache { + using Base = TextureCache; + +public: + void InvalidateRegion(CacheAddr addr, std::size_t size) { + Base::InvalidateRegion(addr, size); + } + + TView* GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) { + return RemoveContext(Base::GetTextureSurface({}, config)); + } + + TView* GetDepthBufferSurface(bool preserve_contents) { + return RemoveContext(Base::GetDepthBufferSurface({}, preserve_contents)); + } + + TView* GetColorBufferSurface(std::size_t index, bool preserve_contents) { + return RemoveContext(Base::GetColorBufferSurface({}, index, preserve_contents)); + } + + TView* GetFermiSurface(const Tegra::Engines::Fermi2D::Regs::Surface& config) { + return RemoveContext(Base::GetFermiSurface({}, config)); + } + + std::shared_ptr TryFindFramebufferSurface(const u8* host_ptr) const { + return Base::TryFindFramebufferSurface(host_ptr); + } + + u64 Tick() { + return Base::Tick(); + } + +protected: + explicit TextureCacheContextless(Core::System& system, + VideoCore::RasterizerInterface& rasterizer) + : TextureCache{system, rasterizer} {} + + virtual TView* TryFastGetSurfaceView( + GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, const SurfaceParams& params, + bool preserve_contents, const std::vector>& overlaps) = 0; + +private: + std::tuple TryFastGetSurfaceView( + DummyExecutionContext, GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, + const SurfaceParams& params, bool preserve_contents, + const std::vector>& overlaps) { + return {TryFastGetSurfaceView(gpu_addr, cpu_addr, host_ptr, params, preserve_contents, + overlaps), + {}}; + } + + TView* RemoveContext(std::tuple return_value) { + const auto [view, exctx] = return_value; + return view; + } +}; + +template +class SurfaceBaseContextless : public SurfaceBase { +public: + DummyExecutionContext DownloadTexture(DummyExecutionContext) { + DownloadTextureImpl(); + return {}; + } + + DummyExecutionContext UploadTexture(DummyExecutionContext) { + UploadTextureImpl(); + return {}; + } + +protected: + explicit SurfaceBaseContextless(TTextureCache& texture_cache, const SurfaceParams& params) + : SurfaceBase{texture_cache, params} {} + + virtual void DownloadTextureImpl() = 0; + + virtual void UploadTextureImpl() = 0; +}; + +} // namespace VideoCommon -- cgit v1.2.3 From 6c410104f4f6953ac37095aa5e65804bf115c026 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 25 Apr 2019 13:41:57 -0300 Subject: texture_cache: Remove execution context copies from the texture cache This is done to simplify the OpenGL implementation, it is needed for Vulkan. --- .../renderer_opengl/gl_texture_cache.cpp | 16 ++-- src/video_core/renderer_opengl/gl_texture_cache.h | 17 ++-- src/video_core/renderer_opengl/utils.cpp | 2 + src/video_core/texture_cache/surface_base.cpp | 8 +- src/video_core/texture_cache/surface_base.h | 8 +- src/video_core/texture_cache/texture_cache.h | 83 ++++++++----------- .../texture_cache/texture_cache_contextless.h | 93 ---------------------- 7 files changed, 59 insertions(+), 168 deletions(-) delete mode 100644 src/video_core/texture_cache/texture_cache_contextless.h (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index ca007b797..f7c2f46aa 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -9,7 +9,7 @@ #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/utils.h" -#include "video_core/texture_cache/texture_cache_contextless.h" +#include "video_core/texture_cache/texture_cache.h" #include "video_core/textures/convert.h" #include "video_core/textures/texture.h" @@ -18,6 +18,10 @@ namespace OpenGL { using Tegra::Texture::SwizzleSource; using VideoCore::MortonSwizzleMode; +using VideoCore::Surface::ComponentType; +using VideoCore::Surface::PixelFormat; +using VideoCore::Surface::SurfaceTarget; + namespace { struct FormatTuple { @@ -209,8 +213,7 @@ OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum inte } // Anonymous namespace CachedSurface::CachedSurface(TextureCacheOpenGL& texture_cache, const SurfaceParams& params) - : VideoCommon::SurfaceBaseContextless{texture_cache, - params} { + : VideoCommon::SurfaceBase{texture_cache, params} { const auto& tuple{GetFormatTuple(params.GetPixelFormat(), params.GetComponentType())}; internal_format = tuple.internal_format; format = tuple.format; @@ -222,7 +225,7 @@ CachedSurface::CachedSurface(TextureCacheOpenGL& texture_cache, const SurfacePar CachedSurface::~CachedSurface() = default; -void CachedSurface::DownloadTextureImpl() { +void CachedSurface::DownloadTexture() { // TODO(Rodrigo): Optimize alignment glPixelStorei(GL_PACK_ALIGNMENT, 1); SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); }); @@ -241,7 +244,7 @@ void CachedSurface::DownloadTextureImpl() { } } -void CachedSurface::UploadTextureImpl() { +void CachedSurface::UploadTexture() { SCOPE_EXIT({ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); }); for (u32 level = 0; level < params.GetNumLevels(); ++level) { UploadTextureMipmap(level); @@ -321,7 +324,8 @@ void CachedSurface::UploadTextureMipmap(u32 level) { } void CachedSurface::DecorateSurfaceName() { - LabelGLObject(GL_TEXTURE, texture.handle, GetGpuAddr()); + LabelGLObject(GL_TEXTURE, texture.handle, GetGpuAddr(), + params.GetTarget() == SurfaceTarget::Texture3D ? "3D" : ""); } std::unique_ptr CachedSurface::CreateView(const ViewKey& view_key) { diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index b165187d9..c65e37153 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -14,32 +14,30 @@ #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/texture_cache/texture_cache_contextless.h" +#include "video_core/texture_cache/texture_cache.h" namespace OpenGL { using VideoCommon::SurfaceParams; using VideoCommon::ViewKey; -using VideoCore::Surface::ComponentType; -using VideoCore::Surface::PixelFormat; -using VideoCore::Surface::SurfaceTarget; -using VideoCore::Surface::SurfaceType; class CachedSurfaceView; class CachedSurface; class TextureCacheOpenGL; using Surface = std::shared_ptr; -using TextureCacheBase = VideoCommon::TextureCacheContextless; +using TextureCacheBase = VideoCommon::TextureCache; -class CachedSurface final - : public VideoCommon::SurfaceBaseContextless { +class CachedSurface final : public VideoCommon::SurfaceBase { friend CachedSurfaceView; public: explicit CachedSurface(TextureCacheOpenGL& texture_cache, const SurfaceParams& params); ~CachedSurface(); + void UploadTexture(); + void DownloadTexture(); + GLenum GetTarget() const { return target; } @@ -53,9 +51,6 @@ protected: std::unique_ptr CreateView(const ViewKey& view_key); - void UploadTextureImpl(); - void DownloadTextureImpl(); - private: void UploadTextureMipmap(u32 level); diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp index d9be61604..5994c0c61 100644 --- a/src/video_core/renderer_opengl/utils.cpp +++ b/src/video_core/renderer_opengl/utils.cpp @@ -18,7 +18,9 @@ namespace OpenGL { using Tegra::Shader::TextureType; using Tegra::Texture::SwizzleSource; + using VideoCore::Surface::SurfaceTarget; +using VideoCore::Surface::SurfaceType; BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {} diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 8680485b4..d0779b502 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -53,8 +53,8 @@ void SurfaceBaseImpl::LoadBuffer() { ASSERT_MSG(params.GetBlockWidth() == 1, "Block width is defined as {} on texture target {}", params.GetBlockWidth(), static_cast(params.GetTarget())); for (u32 level = 0; level < params.GetNumLevels(); ++level) { - u8* const buffer{GetStagingBufferLevelData(level)}; - SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params, buffer, level); + SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params, + GetStagingBufferLevelData(level), level); } } else { ASSERT_MSG(params.GetNumLevels() == 1, "Linear mipmap loading is not implemented"); @@ -89,8 +89,8 @@ void SurfaceBaseImpl::FlushBuffer() { ASSERT_MSG(params.GetBlockWidth() == 1, "Block width is defined as {}", params.GetBlockWidth()); for (u32 level = 0; level < params.GetNumLevels(); ++level) { - u8* const buffer = GetStagingBufferLevelData(level); - SwizzleFunc(MortonSwizzleMode::LinearToMorton, GetHostPtr(), params, buffer, level); + SwizzleFunc(MortonSwizzleMode::LinearToMorton, GetHostPtr(), params, + GetStagingBufferLevelData(level), level); } } else { UNIMPLEMENTED(); diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index d0142a9e6..eed8dc59d 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -89,14 +89,12 @@ private: std::vector staging_buffer; }; -template +template class SurfaceBase : public SurfaceBaseImpl { - static_assert(std::is_trivially_copyable_v); - public: - virtual TExecutionContext UploadTexture(TExecutionContext exctx) = 0; + virtual void UploadTexture() = 0; - virtual TExecutionContext DownloadTexture(TExecutionContext exctx) = 0; + virtual void DownloadTexture() = 0; TView* TryGetView(GPUVAddr view_addr, const SurfaceParams& view_params) { if (view_addr < GetGpuAddr() || !params.IsFamiliar(view_params)) { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index fb43fa65e..c5c01957a 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -8,7 +8,6 @@ #include #include #include -#include #include #include @@ -41,11 +40,8 @@ class RasterizerInterface; namespace VideoCommon { -template +template class TextureCache { - static_assert(std::is_trivially_copyable_v); - - using ResultType = std::tuple; using IntervalMap = boost::icl::interval_map>>; using IntervalType = typename IntervalMap::interval_type; @@ -60,37 +56,35 @@ public: } } - ResultType GetTextureSurface(TExecutionContext exctx, - const Tegra::Texture::FullTextureInfo& config) { + TView* GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) { const auto gpu_addr{config.tic.Address()}; if (!gpu_addr) { - return {{}, exctx}; + return {}; } const auto params{SurfaceParams::CreateForTexture(system, config)}; - return GetSurfaceView(exctx, gpu_addr, params, true); + return GetSurfaceView(gpu_addr, params, true); } - ResultType GetDepthBufferSurface(TExecutionContext exctx, bool preserve_contents) { + TView* GetDepthBufferSurface(bool preserve_contents) { const auto& regs{system.GPU().Maxwell3D().regs}; const auto gpu_addr{regs.zeta.Address()}; if (!gpu_addr || !regs.zeta_enable) { - return {{}, exctx}; + return {}; } const auto depth_params{SurfaceParams::CreateForDepthBuffer( system, regs.zeta_width, regs.zeta_height, regs.zeta.format, regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height, regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; - return GetSurfaceView(exctx, gpu_addr, depth_params, preserve_contents); + return GetSurfaceView(gpu_addr, depth_params, preserve_contents); } - ResultType GetColorBufferSurface(TExecutionContext exctx, std::size_t index, - bool preserve_contents) { + TView* GetColorBufferSurface(std::size_t index, bool preserve_contents) { ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); const auto& regs{system.GPU().Maxwell3D().regs}; if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { - return {{}, exctx}; + return {}; } auto& memory_manager{system.GPU().MemoryManager()}; @@ -98,17 +92,16 @@ public: const auto gpu_addr{config.Address() + config.base_layer * config.layer_stride * sizeof(u32)}; if (!gpu_addr) { - return {{}, exctx}; + return {}; } - return GetSurfaceView(exctx, gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), + return GetSurfaceView(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), preserve_contents); } - ResultType GetFermiSurface(TExecutionContext exctx, - const Tegra::Engines::Fermi2D::Regs::Surface& config) { - return GetSurfaceView(exctx, config.Address(), - SurfaceParams::CreateForFermiCopySurface(config), true); + TView* GetFermiSurface(const Tegra::Engines::Fermi2D::Regs::Surface& config) { + return GetSurfaceView(config.Address(), SurfaceParams::CreateForFermiCopySurface(config), + true); } std::shared_ptr TryFindFramebufferSurface(const u8* host_ptr) const { @@ -126,10 +119,9 @@ protected: ~TextureCache() = default; - virtual ResultType TryFastGetSurfaceView( - TExecutionContext exctx, GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, - const SurfaceParams& params, bool preserve_contents, - const std::vector>& overlaps) = 0; + virtual TView* TryFastGetSurfaceView( + GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, const SurfaceParams& params, + bool preserve_contents, const std::vector>& overlaps) = 0; virtual std::shared_ptr CreateSurface(const SurfaceParams& params) = 0; @@ -158,8 +150,7 @@ protected: Core::System& system; private: - ResultType GetSurfaceView(TExecutionContext exctx, GPUVAddr gpu_addr, - const SurfaceParams& params, bool preserve_contents) { + TView* GetSurfaceView(GPUVAddr gpu_addr, const SurfaceParams& params, bool preserve_contents) { auto& memory_manager{system.GPU().MemoryManager()}; const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; DEBUG_ASSERT(cpu_addr); @@ -168,18 +159,17 @@ private: const auto cache_addr{ToCacheAddr(host_ptr)}; auto overlaps{GetSurfacesInRegion(cache_addr, params.GetGuestSizeInBytes())}; if (overlaps.empty()) { - return LoadSurfaceView(exctx, gpu_addr, *cpu_addr, host_ptr, params, preserve_contents); + return LoadSurfaceView(gpu_addr, *cpu_addr, host_ptr, params, preserve_contents); } if (overlaps.size() == 1) { if (TView* view = overlaps[0]->TryGetView(gpu_addr, params); view) { - return {view, exctx}; + return view; } } - TView* fast_view; - std::tie(fast_view, exctx) = TryFastGetSurfaceView(exctx, gpu_addr, *cpu_addr, host_ptr, - params, preserve_contents, overlaps); + const auto fast_view{TryFastGetSurfaceView(gpu_addr, *cpu_addr, host_ptr, params, + preserve_contents, overlaps)}; if (!fast_view) { std::sort(overlaps.begin(), overlaps.end(), [](const auto& lhs, const auto& rhs) { @@ -191,44 +181,39 @@ private: if (!fast_view) { // Flush even when we don't care about the contents, to preserve memory not // written by the new surface. - exctx = FlushSurface(exctx, surface); + FlushSurface(surface); } Unregister(surface); } - if (fast_view) { - return {fast_view, exctx}; + return fast_view; } - return LoadSurfaceView(exctx, gpu_addr, *cpu_addr, host_ptr, params, preserve_contents); + return LoadSurfaceView(gpu_addr, *cpu_addr, host_ptr, params, preserve_contents); } - ResultType LoadSurfaceView(TExecutionContext exctx, GPUVAddr gpu_addr, VAddr cpu_addr, - u8* host_ptr, const SurfaceParams& params, bool preserve_contents) { + TView* LoadSurfaceView(GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, + const SurfaceParams& params, bool preserve_contents) { const auto new_surface{GetUncachedSurface(params)}; Register(new_surface, gpu_addr, cpu_addr, host_ptr); if (preserve_contents) { - exctx = LoadSurface(exctx, new_surface); + LoadSurface(new_surface); } - return {new_surface->GetView(gpu_addr, params), exctx}; + return new_surface->GetView(gpu_addr, params); } - TExecutionContext LoadSurface(TExecutionContext exctx, - const std::shared_ptr& surface) { + void LoadSurface(const std::shared_ptr& surface) { surface->LoadBuffer(); - exctx = surface->UploadTexture(exctx); + surface->UploadTexture(); surface->MarkAsModified(false); - return exctx; } - TExecutionContext FlushSurface(TExecutionContext exctx, - const std::shared_ptr& surface) { + void FlushSurface(const std::shared_ptr& surface) { if (!surface->IsModified()) { - return exctx; + return; } - exctx = surface->DownloadTexture(exctx); + surface->DownloadTexture(); surface->FlushBuffer(); - return exctx; } std::vector> GetSurfacesInRegion(CacheAddr cache_addr, diff --git a/src/video_core/texture_cache/texture_cache_contextless.h b/src/video_core/texture_cache/texture_cache_contextless.h deleted file mode 100644 index cd35a9fd4..000000000 --- a/src/video_core/texture_cache/texture_cache_contextless.h +++ /dev/null @@ -1,93 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "video_core/texture_cache/texture_cache.h" - -namespace VideoCommon { - -struct DummyExecutionContext {}; - -template -class TextureCacheContextless : protected TextureCache { - using Base = TextureCache; - -public: - void InvalidateRegion(CacheAddr addr, std::size_t size) { - Base::InvalidateRegion(addr, size); - } - - TView* GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) { - return RemoveContext(Base::GetTextureSurface({}, config)); - } - - TView* GetDepthBufferSurface(bool preserve_contents) { - return RemoveContext(Base::GetDepthBufferSurface({}, preserve_contents)); - } - - TView* GetColorBufferSurface(std::size_t index, bool preserve_contents) { - return RemoveContext(Base::GetColorBufferSurface({}, index, preserve_contents)); - } - - TView* GetFermiSurface(const Tegra::Engines::Fermi2D::Regs::Surface& config) { - return RemoveContext(Base::GetFermiSurface({}, config)); - } - - std::shared_ptr TryFindFramebufferSurface(const u8* host_ptr) const { - return Base::TryFindFramebufferSurface(host_ptr); - } - - u64 Tick() { - return Base::Tick(); - } - -protected: - explicit TextureCacheContextless(Core::System& system, - VideoCore::RasterizerInterface& rasterizer) - : TextureCache{system, rasterizer} {} - - virtual TView* TryFastGetSurfaceView( - GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, const SurfaceParams& params, - bool preserve_contents, const std::vector>& overlaps) = 0; - -private: - std::tuple TryFastGetSurfaceView( - DummyExecutionContext, GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, - const SurfaceParams& params, bool preserve_contents, - const std::vector>& overlaps) { - return {TryFastGetSurfaceView(gpu_addr, cpu_addr, host_ptr, params, preserve_contents, - overlaps), - {}}; - } - - TView* RemoveContext(std::tuple return_value) { - const auto [view, exctx] = return_value; - return view; - } -}; - -template -class SurfaceBaseContextless : public SurfaceBase { -public: - DummyExecutionContext DownloadTexture(DummyExecutionContext) { - DownloadTextureImpl(); - return {}; - } - - DummyExecutionContext UploadTexture(DummyExecutionContext) { - UploadTextureImpl(); - return {}; - } - -protected: - explicit SurfaceBaseContextless(TTextureCache& texture_cache, const SurfaceParams& params) - : SurfaceBase{texture_cache, params} {} - - virtual void DownloadTextureImpl() = 0; - - virtual void UploadTextureImpl() = 0; -}; - -} // namespace VideoCommon -- cgit v1.2.3 From 3d471e732d688c20aef73a506bdb6126002d3193 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 7 May 2019 10:56:45 -0400 Subject: Correct Surface Base and Views for new Texture Cache --- src/video_core/texture_cache/copy_params.h | 25 ++ src/video_core/texture_cache/surface_base.cpp | 108 +++++---- src/video_core/texture_cache/surface_base.h | 297 ++++++++++++++++++------ src/video_core/texture_cache/surface_params.cpp | 210 ++++++----------- src/video_core/texture_cache/surface_params.h | 159 ++++--------- src/video_core/texture_cache/surface_view.cpp | 12 +- src/video_core/texture_cache/surface_view.h | 35 ++- 7 files changed, 466 insertions(+), 380 deletions(-) create mode 100644 src/video_core/texture_cache/copy_params.h (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/copy_params.h b/src/video_core/texture_cache/copy_params.h new file mode 100644 index 000000000..75c2b1f05 --- /dev/null +++ b/src/video_core/texture_cache/copy_params.h @@ -0,0 +1,25 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" + +namespace VideoCommon { + +struct CopyParams { + u32 source_x; + u32 source_y; + u32 source_z; + u32 dest_x; + u32 dest_y; + u32 dest_z; + u32 source_level; + u32 dest_level; + u32 width; + u32 height; + u32 depth; +}; + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index d0779b502..5273fcb44 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -4,104 +4,120 @@ #include "common/assert.h" #include "common/common_types.h" -#include "video_core/morton.h" +#include "common/microprofile.h" +#include "video_core/memory_manager.h" #include "video_core/texture_cache/surface_base.h" #include "video_core/texture_cache/surface_params.h" #include "video_core/textures/convert.h" namespace VideoCommon { +MICROPROFILE_DEFINE(GPU_Load_Texture, "GPU", "Texture Load", MP_RGB(128, 192, 128)); +MICROPROFILE_DEFINE(GPU_Flush_Texture, "GPU", "Texture Flush", MP_RGB(128, 192, 128)); + using Tegra::Texture::ConvertFromGuestToHost; using VideoCore::MortonSwizzleMode; -namespace { -void SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, u8* buffer, - u32 level) { +SurfaceBaseImpl::SurfaceBaseImpl(const GPUVAddr gpu_vaddr, const SurfaceParams& params) + : gpu_addr{gpu_vaddr}, params{params}, mipmap_sizes{params.num_levels}, + mipmap_offsets{params.num_levels}, layer_size{params.GetGuestLayerSize()}, + memory_size{params.GetGuestSizeInBytes()}, host_memory_size{params.GetHostSizeInBytes()} { + u32 offset = 0; + mipmap_offsets.resize(params.num_levels); + mipmap_sizes.resize(params.num_levels); + gpu_addr_end = gpu_addr + memory_size; + for (u32 i = 0; i < params.num_levels; i++) { + mipmap_offsets[i] = offset; + mipmap_sizes[i] = params.GetGuestMipmapSize(i); + offset += mipmap_sizes[i]; + } +} + +void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, + u8* buffer, u32 level) { const u32 width{params.GetMipWidth(level)}; const u32 height{params.GetMipHeight(level)}; const u32 block_height{params.GetMipBlockHeight(level)}; const u32 block_depth{params.GetMipBlockDepth(level)}; - std::size_t guest_offset{params.GetGuestMipmapLevelOffset(level)}; - if (params.IsLayered()) { + std::size_t guest_offset{mipmap_offsets[level]}; + if (params.is_layered) { std::size_t host_offset{0}; - const std::size_t guest_stride = params.GetGuestLayerSize(); + const std::size_t guest_stride = layer_size; const std::size_t host_stride = params.GetHostLayerSize(level); - for (u32 layer = 0; layer < params.GetNumLayers(); layer++) { - MortonSwizzle(mode, params.GetPixelFormat(), width, block_height, height, block_depth, - 1, params.GetTileWidthSpacing(), buffer + host_offset, - memory + guest_offset); + for (u32 layer = 0; layer < params.depth; layer++) { + MortonSwizzle(mode, params.pixel_format, width, block_height, height, block_depth, 1, + params.tile_width_spacing, buffer + host_offset, memory + guest_offset); guest_offset += guest_stride; host_offset += host_stride; } } else { - MortonSwizzle(mode, params.GetPixelFormat(), width, block_height, height, block_depth, - params.GetMipDepth(level), params.GetTileWidthSpacing(), buffer, + MortonSwizzle(mode, params.pixel_format, width, block_height, height, block_depth, + params.GetMipDepth(level), params.tile_width_spacing, buffer, memory + guest_offset); } } -} // Anonymous namespace -SurfaceBaseImpl::SurfaceBaseImpl(const SurfaceParams& params) : params{params} { - staging_buffer.resize(params.GetHostSizeInBytes()); -} - -SurfaceBaseImpl::~SurfaceBaseImpl() = default; - -void SurfaceBaseImpl::LoadBuffer() { - if (params.IsTiled()) { - ASSERT_MSG(params.GetBlockWidth() == 1, "Block width is defined as {} on texture target {}", - params.GetBlockWidth(), static_cast(params.GetTarget())); - for (u32 level = 0; level < params.GetNumLevels(); ++level) { +void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, + std::vector& staging_buffer) { + MICROPROFILE_SCOPE(GPU_Load_Texture); + auto host_ptr = memory_manager.GetPointer(gpu_addr); + if (params.is_tiled) { + ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture target {}", + params.block_width, static_cast(params.target)); + for (u32 level = 0; level < params.num_levels; ++level) { + const u32 host_offset = params.GetHostMipmapLevelOffset(level); SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params, - GetStagingBufferLevelData(level), level); + staging_buffer.data() + host_offset, level); } } else { - ASSERT_MSG(params.GetNumLevels() == 1, "Linear mipmap loading is not implemented"); - const u32 bpp{GetFormatBpp(params.GetPixelFormat()) / CHAR_BIT}; + ASSERT_MSG(params.num_levels == 1, "Linear mipmap loading is not implemented"); + const u32 bpp{params.GetBytesPerPixel()}; const u32 block_width{params.GetDefaultBlockWidth()}; const u32 block_height{params.GetDefaultBlockHeight()}; - const u32 width{(params.GetWidth() + block_width - 1) / block_width}; - const u32 height{(params.GetHeight() + block_height - 1) / block_height}; + const u32 width{(params.width + block_width - 1) / block_width}; + const u32 height{(params.height + block_height - 1) / block_height}; const u32 copy_size{width * bpp}; - if (params.GetPitch() == copy_size) { + if (params.pitch == copy_size) { std::memcpy(staging_buffer.data(), host_ptr, params.GetHostSizeInBytes()); } else { const u8* start{host_ptr}; u8* write_to{staging_buffer.data()}; for (u32 h = height; h > 0; --h) { std::memcpy(write_to, start, copy_size); - start += params.GetPitch(); + start += params.pitch; write_to += copy_size; } } } - for (u32 level = 0; level < params.GetNumLevels(); ++level) { - ConvertFromGuestToHost(GetStagingBufferLevelData(level), params.GetPixelFormat(), + for (u32 level = 0; level < params.num_levels; ++level) { + const u32 host_offset = params.GetHostMipmapLevelOffset(level); + ConvertFromGuestToHost(staging_buffer.data() + host_offset, params.pixel_format, params.GetMipWidth(level), params.GetMipHeight(level), params.GetMipDepth(level), true, true); } } -void SurfaceBaseImpl::FlushBuffer() { - if (params.IsTiled()) { - ASSERT_MSG(params.GetBlockWidth() == 1, "Block width is defined as {}", - params.GetBlockWidth()); - for (u32 level = 0; level < params.GetNumLevels(); ++level) { - SwizzleFunc(MortonSwizzleMode::LinearToMorton, GetHostPtr(), params, - GetStagingBufferLevelData(level), level); +void SurfaceBaseImpl::FlushBuffer(std::vector& staging_buffer) { + MICROPROFILE_SCOPE(GPU_Flush_Texture); + if (params.is_tiled) { + ASSERT_MSG(params.block_width == 1, "Block width is defined as {}", params.block_width); + for (u32 level = 0; level < params.num_levels; ++level) { + const u32 host_offset = params.GetHostMipmapLevelOffset(level); + SwizzleFunc(MortonSwizzleMode::LinearToMorton, host_ptr, params, + staging_buffer.data() + host_offset, level); } } else { UNIMPLEMENTED(); /* - ASSERT(params.GetTarget() == SurfaceTarget::Texture2D); - ASSERT(params.GetNumLevels() == 1); + ASSERT(params.target == SurfaceTarget::Texture2D); + ASSERT(params.num_levels == 1); const u32 bpp{params.GetFormatBpp() / 8}; - const u32 copy_size{params.GetWidth() * bpp}; - if (params.GetPitch() == copy_size) { - std::memcpy(host_ptr, staging_buffer.data(), GetSizeInBytes()); + const u32 copy_size{params.width * bpp}; + if (params.pitch == copy_size) { + std::memcpy(host_ptr, staging_buffer.data(), memory_size); } else { u8* start{host_ptr}; const u8* read_to{staging_buffer.data()}; diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index eed8dc59d..5fd7add0a 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -4,166 +4,309 @@ #pragma once +#include #include +#include #include "common/assert.h" #include "common/common_types.h" #include "video_core/gpu.h" +#include "video_core/morton.h" +#include "video_core/texture_cache/copy_params.h" #include "video_core/texture_cache/surface_params.h" #include "video_core/texture_cache/surface_view.h" +template> +ForwardIt binary_find(ForwardIt first, ForwardIt last, const T& value, Compare comp={}) +{ + // Note: BOTH type T and the type after ForwardIt is dereferenced + // must be implicitly convertible to BOTH Type1 and Type2, used in Compare. + // This is stricter than lower_bound requirement (see above) + + first = std::lower_bound(first, last, value, comp); + return first != last && !comp(value, *first) ? first : last; +} + +namespace Tegra { +class MemoryManager; +} + namespace VideoCommon { +using VideoCore::Surface::SurfaceTarget; +using VideoCore::MortonSwizzleMode; + class SurfaceBaseImpl { public: - void LoadBuffer(); + void LoadBuffer(Tegra::MemoryManager& memory_manager, std::vector& staging_buffer); - void FlushBuffer(); + void FlushBuffer(std::vector& staging_buffer); GPUVAddr GetGpuAddr() const { - ASSERT(is_registered); return gpu_addr; } + GPUVAddr GetGpuAddrEnd() const { + return gpu_addr_end; + } + + bool Overlaps(const GPUVAddr start, const GPUVAddr end) const { + return (gpu_addr < end) && (gpu_addr_end > start); + } + + // Use only when recycling a surface + void SetGpuAddr(const GPUVAddr new_addr) { + gpu_addr = new_addr; + gpu_addr_end = new_addr + memory_size; + } + VAddr GetCpuAddr() const { - ASSERT(is_registered); - return cpu_addr; + return gpu_addr; + } + + void SetCpuAddr(const VAddr new_addr) { + cpu_addr = new_addr; } u8* GetHostPtr() const { - ASSERT(is_registered); return host_ptr; } - CacheAddr GetCacheAddr() const { - ASSERT(is_registered); - return cache_addr; + void SetHostPtr(u8* new_addr) { + host_ptr = new_addr; } const SurfaceParams& GetSurfaceParams() const { return params; } - void Register(GPUVAddr gpu_addr_, VAddr cpu_addr_, u8* host_ptr_) { - ASSERT(!is_registered); - is_registered = true; - gpu_addr = gpu_addr_; - cpu_addr = cpu_addr_; - host_ptr = host_ptr_; - cache_addr = ToCacheAddr(host_ptr_); - DecorateSurfaceName(); + std::size_t GetSizeInBytes() const { + return memory_size; } - void Unregister() { - ASSERT(is_registered); - is_registered = false; + std::size_t GetHostSizeInBytes() const { + return host_memory_size; } - bool IsRegistered() const { - return is_registered; + std::size_t GetMipmapSize(const u32 level) const { + return mipmap_sizes[level]; } - std::size_t GetSizeInBytes() const { - return params.GetGuestSizeInBytes(); + bool MatchFormat(VideoCore::Surface::PixelFormat pixel_format) const { + return params.pixel_format == pixel_format; + } + + bool MatchTarget(VideoCore::Surface::SurfaceTarget target) const { + return params.target == target; + } + + bool MatchesTopology(const SurfaceParams& rhs) const { + const u32 src_bpp = params.GetBytesPerPixel(); + const u32 dst_bpp = rhs.GetBytesPerPixel(); + return std::tie(src_bpp, params.is_tiled) == std::tie(dst_bpp, rhs.is_tiled); + } + + bool MatchesStructure(const SurfaceParams& rhs) const { + if (params.is_tiled) { + const u32 a_width1 = params.GetBlockAlignedWidth(); + const u32 a_width2 = rhs.GetBlockAlignedWidth(); + return std::tie(a_width1, params.height, params.depth, params.block_width, + params.block_height, params.block_depth, params.tile_width_spacing) == + std::tie(a_width2, rhs.height, rhs.depth, rhs.block_width, rhs.block_height, + rhs.block_depth, rhs.tile_width_spacing); + } else { + return std::tie(params.width, params.height, params.pitch) == + std::tie(rhs.width, rhs.height, rhs.pitch); + } + } + + std::optional> GetLayerMipmap(const GPUVAddr candidate_gpu_addr) const { + if (candidate_gpu_addr < gpu_addr) + return {}; + const GPUVAddr relative_address = candidate_gpu_addr - gpu_addr; + const u32 layer = relative_address / layer_size; + const GPUVAddr mipmap_address = relative_address - layer_size * layer; + const auto mipmap_it = binary_find(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); + if (mipmap_it != mipmap_offsets.end()) { + return {{layer, std::distance(mipmap_offsets.begin(), mipmap_it)}}; + } + return {}; } - u8* GetStagingBufferLevelData(u32 level) { - return staging_buffer.data() + params.GetHostMipmapLevelOffset(level); + std::vector BreakDown() const { + auto set_up_copy = [](CopyParams& cp, const SurfaceParams& params, const u32 depth, + const u32 level) { + cp.source_x = 0; + cp.source_y = 0; + cp.source_z = 0; + cp.dest_x = 0; + cp.dest_y = 0; + cp.dest_z = 0; + cp.source_level = level; + cp.dest_level = level; + cp.width = params.GetMipWidth(level); + cp.height = params.GetMipHeight(level); + cp.depth = depth; + }; + const u32 layers = params.depth; + const u32 mipmaps = params.num_levels; + if (params.is_layered) { + std::vector result{layers * mipmaps}; + for (std::size_t layer = 0; layer < layers; layer++) { + const u32 layer_offset = layer * mipmaps; + for (std::size_t level = 0; level < mipmaps; level++) { + CopyParams& cp = result[layer_offset + level]; + set_up_copy(cp, params, layer, level); + } + } + return result; + } else { + std::vector result{mipmaps}; + for (std::size_t level = 0; level < mipmaps; level++) { + CopyParams& cp = result[level]; + set_up_copy(cp, params, params.GetMipDepth(level), level); + } + return result; + } } protected: - explicit SurfaceBaseImpl(const SurfaceParams& params); - ~SurfaceBaseImpl(); // non-virtual is intended + explicit SurfaceBaseImpl(const GPUVAddr gpu_vaddr, const SurfaceParams& params); + ~SurfaceBaseImpl() = default; virtual void DecorateSurfaceName() = 0; const SurfaceParams params; - -private: GPUVAddr gpu_addr{}; - VAddr cpu_addr{}; - u8* host_ptr{}; - CacheAddr cache_addr{}; - bool is_registered{}; + GPUVAddr gpu_addr_end{}; + std::vector mipmap_sizes; + std::vector mipmap_offsets; + const std::size_t layer_size; + const std::size_t memory_size; + const std::size_t host_memory_size; + u8* host_ptr; + VAddr cpu_addr; - std::vector staging_buffer; +private: + void SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, u8* buffer, + u32 level); }; -template +template class SurfaceBase : public SurfaceBaseImpl { public: - virtual void UploadTexture() = 0; + virtual void UploadTexture(std::vector& staging_buffer) = 0; - virtual void DownloadTexture() = 0; + virtual void DownloadTexture(std::vector& staging_buffer) = 0; - TView* TryGetView(GPUVAddr view_addr, const SurfaceParams& view_params) { - if (view_addr < GetGpuAddr() || !params.IsFamiliar(view_params)) { - // It can't be a view if it's in a prior address. - return {}; - } + void MarkAsModified(const bool is_modified_, const u64 tick) { + is_modified = is_modified_ || is_protected; + modification_tick = tick; + } - const auto relative_offset{static_cast(view_addr - GetGpuAddr())}; - const auto it{view_offset_map.find(relative_offset)}; - if (it == view_offset_map.end()) { - // Couldn't find an aligned view. - return {}; - } - const auto [layer, level] = it->second; + void MarkAsProtected(const bool is_protected) { + this->is_protected = is_protected; + } - if (!params.IsViewValid(view_params, layer, level)) { - return {}; - } + void MarkAsPicked(const bool is_picked) { + this->is_picked = is_picked; + } - return GetView(layer, view_params.GetNumLayers(), level, view_params.GetNumLevels()); + bool IsModified() const { + return is_modified; } - void MarkAsModified(bool is_modified_) { - is_modified = is_modified_; - if (is_modified_) { - modification_tick = texture_cache.Tick(); - } + bool IsProtected() const { + return is_protected; } - TView* GetView(GPUVAddr view_addr, const SurfaceParams& view_params) { - TView* view{TryGetView(view_addr, view_params)}; - ASSERT(view != nullptr); - return view; + bool IsRegistered() const { + return is_registered; } - bool IsModified() const { - return is_modified; + bool IsPicked() const { + return is_picked; + } + + void MarkAsRegistered(bool is_reg) { + is_registered = is_reg; } u64 GetModificationTick() const { return modification_tick; } + TView EmplaceOverview(const SurfaceParams& overview_params) { + ViewParams vp{}; + vp.base_level = 0; + vp.num_levels = params.num_levels; + vp.target = overview_params.target; + if (params.is_layered && !overview_params.is_layered) { + vp.base_layer = 0; + vp.num_layers = 1; + } else { + vp.base_layer = 0; + vp.num_layers = params.depth; + } + return GetView(vp); + } + + std::optional EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr) { + if (view_addr < gpu_addr) + return {}; + if (params.target == SurfaceTarget::Texture3D || view_params.target == SurfaceTarget::Texture3D) { + return {}; + } + const std::size_t size = view_params.GetGuestSizeInBytes(); + const GPUVAddr relative_address = view_addr - gpu_addr; + auto layer_mipmap = GetLayerMipmap(relative_address); + if (!layer_mipmap) { + return {}; + } + const u32 layer = (*layer_mipmap).first; + const u32 mipmap = (*layer_mipmap).second; + if (GetMipmapSize(mipmap) != size) { + // TODO: the view may cover many mimaps, this case can still go on + return {}; + } + ViewParams vp{}; + vp.base_layer = layer; + vp.num_layers = 1; + vp.base_level = mipmap; + vp.num_levels = 1; + vp.target = params.target; + return {GetView(vp)}; + } + + TView GetMainView() const { + return main_view; + } + protected: - explicit SurfaceBase(TTextureCache& texture_cache, const SurfaceParams& params) - : SurfaceBaseImpl{params}, texture_cache{texture_cache}, - view_offset_map{params.CreateViewOffsetMap()} {} + explicit SurfaceBase(const GPUVAddr gpu_addr, const SurfaceParams& params) + : SurfaceBaseImpl(gpu_addr, params) {} ~SurfaceBase() = default; - virtual std::unique_ptr CreateView(const ViewKey& view_key) = 0; + virtual TView CreateView(const ViewParams& view_key) = 0; + + std::unordered_map views; + TView main_view; private: - TView* GetView(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels) { - const ViewKey key{base_layer, num_layers, base_level, num_levels}; + TView GetView(const ViewParams& key) { const auto [entry, is_cache_miss] = views.try_emplace(key); auto& view{entry->second}; if (is_cache_miss) { view = CreateView(key); } - return view.get(); + return view; } - TTextureCache& texture_cache; - const std::map> view_offset_map; - - std::unordered_map> views; - bool is_modified{}; + bool is_protected{}; + bool is_registered{}; + bool is_picked{}; u64 modification_tick{}; }; diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index d1f8c53d5..d9052152c 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -7,6 +7,7 @@ #include "common/cityhash.h" #include "common/alignment.h" #include "core/core.h" +#include "video_core/engines/shader_bytecode.h" #include "video_core/surface.h" #include "video_core/texture_cache/surface_params.h" #include "video_core/textures/decoders.h" @@ -22,6 +23,37 @@ using VideoCore::Surface::PixelFormatFromTextureFormat; using VideoCore::Surface::SurfaceTarget; using VideoCore::Surface::SurfaceTargetFromTextureType; +SurfaceTarget TextureType2SurfaceTarget(Tegra::Shader::TextureType type, bool is_array) { + switch (type) { + case Tegra::Shader::TextureType::Texture1D: { + if (is_array) + return SurfaceTarget::Texture1DArray; + else + return SurfaceTarget::Texture1D; + } + case Tegra::Shader::TextureType::Texture2D: { + if (is_array) + return SurfaceTarget::Texture2DArray; + else + return SurfaceTarget::Texture2D; + } + case Tegra::Shader::TextureType::Texture3D: { + ASSERT(!is_array); + return SurfaceTarget::Texture3D; + } + case Tegra::Shader::TextureType::TextureCube: { + if (is_array) + return SurfaceTarget::TextureCubeArray; + else + return SurfaceTarget::TextureCubemap; + } + default: { + UNREACHABLE(); + return SurfaceTarget::Texture2D; + } + } +} + namespace { constexpr u32 GetMipmapSize(bool uncompressed, u32 mip_size, u32 tile) { return uncompressed ? mip_size : std::max(1U, (mip_size + tile - 1) / tile); @@ -29,7 +61,8 @@ constexpr u32 GetMipmapSize(bool uncompressed, u32 mip_size, u32 tile) { } // Anonymous namespace SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, - const Tegra::Texture::FullTextureInfo& config) { + const Tegra::Texture::FullTextureInfo& config, + const VideoCommon::Shader::Sampler& entry) { SurfaceParams params; params.is_tiled = config.tic.IsTiled(); params.srgb_conversion = config.tic.IsSrgbConversionEnabled(); @@ -41,7 +74,8 @@ SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, params.srgb_conversion); params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value()); params.type = GetFormatType(params.pixel_format); - params.target = SurfaceTargetFromTextureType(config.tic.texture_type); + // TODO: on 1DBuffer we should use the tic info. + params.target = TextureType2SurfaceTarget(entry.GetType(), entry.IsArray()); params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format)); params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format)); params.depth = config.tic.Depth(); @@ -52,8 +86,7 @@ SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, params.pitch = params.is_tiled ? 0 : config.tic.Pitch(); params.unaligned_height = config.tic.Height(); params.num_levels = config.tic.max_mip_level + 1; - - params.CalculateCachedValues(); + params.is_layered = params.IsLayered(); return params; } @@ -77,8 +110,7 @@ SurfaceParams SurfaceParams::CreateForDepthBuffer( params.target = SurfaceTarget::Texture2D; params.depth = 1; params.num_levels = 1; - - params.CalculateCachedValues(); + params.is_layered = false; return params; } @@ -108,8 +140,7 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz params.unaligned_height = config.height; params.target = SurfaceTarget::Texture2D; params.num_levels = 1; - - params.CalculateCachedValues(); + params.is_layered = false; return params; } @@ -128,13 +159,13 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface( params.type = GetFormatType(params.pixel_format); params.width = config.width; params.height = config.height; + params.pitch = config.pitch; params.unaligned_height = config.height; // TODO(Rodrigo): Try to guess the surface target from depth and layer parameters params.target = SurfaceTarget::Texture2D; params.depth = 1; params.num_levels = 1; - - params.CalculateCachedValues(); + params.is_layered = params.IsLayered(); return params; } @@ -147,7 +178,7 @@ u32 SurfaceParams::GetMipHeight(u32 level) const { } u32 SurfaceParams::GetMipDepth(u32 level) const { - return IsLayered() ? depth : std::max(1U, depth >> level); + return is_layered ? depth : std::max(1U, depth >> level); } bool SurfaceParams::IsLayered() const { @@ -183,7 +214,7 @@ u32 SurfaceParams::GetMipBlockDepth(u32 level) const { if (level == 0) { return this->block_depth; } - if (IsLayered()) { + if (is_layered) { return 1; } @@ -216,6 +247,10 @@ std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level) const { return offset; } +std::size_t SurfaceParams::GetGuestMipmapSize(u32 level) const { + return GetInnerMipmapMemorySize(level, false, false); +} + std::size_t SurfaceParams::GetHostMipmapSize(u32 level) const { return GetInnerMipmapMemorySize(level, true, false) * GetNumLayers(); } @@ -229,7 +264,7 @@ std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) co for (u32 level = 0; level < num_levels; ++level) { size += GetInnerMipmapMemorySize(level, as_host_size, uncompressed); } - if (is_tiled && (IsLayered() || target == SurfaceTarget::Texture3D)) { + if (is_tiled && is_layered) { return Common::AlignUp(size, Tegra::Texture::GetGOBSize() * block_height * block_depth); } return size; @@ -256,150 +291,32 @@ u32 SurfaceParams::GetBytesPerPixel() const { return VideoCore::Surface::GetBytesPerPixel(pixel_format); } -bool SurfaceParams::IsFamiliar(const SurfaceParams& view_params) const { - if (std::tie(is_tiled, tile_width_spacing, pixel_format, component_type, type) != - std::tie(view_params.is_tiled, view_params.tile_width_spacing, view_params.pixel_format, - view_params.component_type, view_params.type)) { - return false; - } - - const SurfaceTarget view_target{view_params.target}; - if (view_target == target) { - return true; - } - - switch (target) { - case SurfaceTarget::Texture1D: - case SurfaceTarget::Texture2D: - case SurfaceTarget::Texture3D: - return false; - case SurfaceTarget::Texture1DArray: - return view_target == SurfaceTarget::Texture1D; - case SurfaceTarget::Texture2DArray: - return view_target == SurfaceTarget::Texture2D; - case SurfaceTarget::TextureCubemap: - return view_target == SurfaceTarget::Texture2D || - view_target == SurfaceTarget::Texture2DArray; - case SurfaceTarget::TextureCubeArray: - return view_target == SurfaceTarget::Texture2D || - view_target == SurfaceTarget::Texture2DArray || - view_target == SurfaceTarget::TextureCubemap; - default: - UNIMPLEMENTED_MSG("Unimplemented texture family={}", static_cast(target)); - return false; - } -} - bool SurfaceParams::IsPixelFormatZeta() const { return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat && pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat; } -void SurfaceParams::CalculateCachedValues() { - switch (target) { - case SurfaceTarget::Texture1D: - case SurfaceTarget::Texture2D: - case SurfaceTarget::Texture3D: - num_layers = 1; - break; - case SurfaceTarget::Texture1DArray: - case SurfaceTarget::Texture2DArray: - case SurfaceTarget::TextureCubemap: - case SurfaceTarget::TextureCubeArray: - num_layers = depth; - break; - default: - UNREACHABLE(); - } - - guest_size_in_bytes = GetInnerMemorySize(false, false, false); - - if (IsPixelFormatASTC(pixel_format)) { - // ASTC is uncompressed in software, in emulated as RGBA8 - host_size_in_bytes = static_cast(width) * static_cast(height) * - static_cast(depth) * 4ULL; - } else { - host_size_in_bytes = GetInnerMemorySize(true, false, false); - } -} - std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool uncompressed) const { const bool tiled{as_host_size ? false : is_tiled}; const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())}; const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())}; - const u32 depth{target == SurfaceTarget::Texture3D ? GetMipDepth(level) : 1U}; + const u32 depth{is_layered ? 1U : GetMipDepth(level)}; return Tegra::Texture::CalculateSize(tiled, GetBytesPerPixel(), width, height, depth, GetMipBlockHeight(level), GetMipBlockDepth(level)); } std::size_t SurfaceParams::GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const { - return GetLayerSize(as_host_size, uncompressed) * (layer_only ? 1U : num_layers); -} - -std::map> SurfaceParams::CreateViewOffsetMap() const { - std::map> view_offset_map; - switch (target) { - case SurfaceTarget::Texture1D: - case SurfaceTarget::Texture2D: - case SurfaceTarget::Texture3D: { - // TODO(Rodrigo): Add layer iterations for 3D textures - constexpr u32 layer = 0; - for (u32 level = 0; level < num_levels; ++level) { - const std::size_t offset{GetGuestMipmapLevelOffset(level)}; - view_offset_map.insert({offset, {layer, level}}); - } - break; - } - case SurfaceTarget::Texture1DArray: - case SurfaceTarget::Texture2DArray: - case SurfaceTarget::TextureCubemap: - case SurfaceTarget::TextureCubeArray: { - const std::size_t layer_size{GetGuestLayerSize()}; - for (u32 level = 0; level < num_levels; ++level) { - const std::size_t level_offset{GetGuestMipmapLevelOffset(level)}; - for (u32 layer = 0; layer < num_layers; ++layer) { - const auto layer_offset{static_cast(layer_size * layer)}; - const std::size_t offset{level_offset + layer_offset}; - view_offset_map.insert({offset, {layer, level}}); - } - } - break; - } - default: - UNIMPLEMENTED_MSG("Unimplemented surface target {}", static_cast(target)); - } - return view_offset_map; -} - -bool SurfaceParams::IsViewValid(const SurfaceParams& view_params, u32 layer, u32 level) const { - return IsDimensionValid(view_params, level) && IsDepthValid(view_params, level) && - IsInBounds(view_params, layer, level); + return GetLayerSize(as_host_size, uncompressed) * (layer_only ? 1U : depth); } -bool SurfaceParams::IsDimensionValid(const SurfaceParams& view_params, u32 level) const { - return view_params.width == GetMipWidth(level) && view_params.height == GetMipHeight(level); -} - -bool SurfaceParams::IsDepthValid(const SurfaceParams& view_params, u32 level) const { - if (view_params.target != SurfaceTarget::Texture3D) { - return true; - } - return view_params.depth == GetMipDepth(level); -} - -bool SurfaceParams::IsInBounds(const SurfaceParams& view_params, u32 layer, u32 level) const { - return layer + view_params.num_layers <= num_layers && - level + view_params.num_levels <= num_levels; -} - -std::size_t HasheableSurfaceParams::Hash() const { +std::size_t SurfaceParams::Hash() const { return static_cast( Common::CityHash64(reinterpret_cast(this), sizeof(*this))); } -bool HasheableSurfaceParams::operator==(const HasheableSurfaceParams& rhs) const { +bool SurfaceParams::operator==(const SurfaceParams& rhs) const { return std::tie(is_tiled, block_width, block_height, block_depth, tile_width_spacing, width, height, depth, pitch, unaligned_height, num_levels, pixel_format, component_type, type, target) == @@ -409,4 +326,27 @@ bool HasheableSurfaceParams::operator==(const HasheableSurfaceParams& rhs) const rhs.type, rhs.target); } +std::string SurfaceParams::TargetName() const { + switch (target) { + case SurfaceTarget::Texture1D: + return "1D"; + case SurfaceTarget::Texture2D: + return "2D"; + case SurfaceTarget::Texture3D: + return "3D"; + case SurfaceTarget::Texture1DArray: + return "1DArray"; + case SurfaceTarget::Texture2DArray: + return "2DArray"; + case SurfaceTarget::TextureCubemap: + return "Cube"; + case SurfaceTarget::TextureCubeArray: + return "CubeArray"; + default: + LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast(target)); + UNREACHABLE(); + return fmt::format("TUK({})", static_cast(target)); + } +} + } // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index 77dc0ba66..ec8efa210 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -6,50 +6,21 @@ #include +#include "common/alignment.h" #include "common/common_types.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/surface.h" +#include "video_core/shader/shader_ir.h" namespace VideoCommon { -class HasheableSurfaceParams { -public: - std::size_t Hash() const; - - bool operator==(const HasheableSurfaceParams& rhs) const; - - bool operator!=(const HasheableSurfaceParams& rhs) const { - return !operator==(rhs); - } - -protected: - // Avoid creation outside of a managed environment. - HasheableSurfaceParams() = default; - - bool is_tiled; - bool srgb_conversion; - u32 block_width; - u32 block_height; - u32 block_depth; - u32 tile_width_spacing; - u32 width; - u32 height; - u32 depth; - u32 pitch; - u32 unaligned_height; - u32 num_levels; - VideoCore::Surface::PixelFormat pixel_format; - VideoCore::Surface::ComponentType component_type; - VideoCore::Surface::SurfaceType type; - VideoCore::Surface::SurfaceTarget target; -}; - -class SurfaceParams final : public HasheableSurfaceParams { +class SurfaceParams { public: /// Creates SurfaceCachedParams from a texture configuration. static SurfaceParams CreateForTexture(Core::System& system, - const Tegra::Texture::FullTextureInfo& config); + const Tegra::Texture::FullTextureInfo& config, + const VideoCommon::Shader::Sampler& entry); /// Creates SurfaceCachedParams for a depth buffer configuration. static SurfaceParams CreateForDepthBuffer( @@ -64,68 +35,33 @@ public: static SurfaceParams CreateForFermiCopySurface( const Tegra::Engines::Fermi2D::Regs::Surface& config); - bool IsTiled() const { - return is_tiled; - } - - bool GetSrgbConversion() const { - return srgb_conversion; - } - - u32 GetBlockWidth() const { - return block_width; - } - - u32 GetTileWidthSpacing() const { - return tile_width_spacing; - } - - u32 GetWidth() const { - return width; - } - - u32 GetHeight() const { - return height; - } - - u32 GetDepth() const { - return depth; - } - - u32 GetPitch() const { - return pitch; - } - - u32 GetNumLevels() const { - return num_levels; - } - - VideoCore::Surface::PixelFormat GetPixelFormat() const { - return pixel_format; - } - - VideoCore::Surface::ComponentType GetComponentType() const { - return component_type; - } + std::size_t Hash() const; - VideoCore::Surface::SurfaceTarget GetTarget() const { - return target; - } + bool operator==(const SurfaceParams& rhs) const; - VideoCore::Surface::SurfaceType GetType() const { - return type; + bool operator!=(const SurfaceParams& rhs) const { + return !operator==(rhs); } std::size_t GetGuestSizeInBytes() const { - return guest_size_in_bytes; + return GetInnerMemorySize(false, false, false); } std::size_t GetHostSizeInBytes() const { + std::size_t host_size_in_bytes; + if (IsPixelFormatASTC(pixel_format)) { + // ASTC is uncompressed in software, in emulated as RGBA8 + host_size_in_bytes = static_cast(Common::AlignUp(width, GetDefaultBlockWidth())) * + static_cast(Common::AlignUp(height, GetDefaultBlockHeight())) * + static_cast(depth) * 4ULL; + } else { + host_size_in_bytes = GetInnerMemorySize(true, false, false); + } return host_size_in_bytes; } - u32 GetNumLayers() const { - return num_layers; + u32 GetBlockAlignedWidth() const { + return Common::AlignUp(width, 64 / GetBytesPerPixel()); } /// Returns the width of a given mipmap level. @@ -137,9 +73,6 @@ public: /// Returns the depth of a given mipmap level. u32 GetMipDepth(u32 level) const; - /// Returns true if these parameters are from a layered surface. - bool IsLayered() const; - /// Returns the block height of a given mipmap level. u32 GetMipBlockHeight(u32 level) const; @@ -152,6 +85,9 @@ public: /// Returns the offset in bytes in host memory (linear) of a given mipmap level. std::size_t GetHostMipmapLevelOffset(u32 level) const; + /// Returns the size in bytes in guest memory of a given mipmap level. + std::size_t GetGuestMipmapSize(u32 level) const; + /// Returns the size in bytes in host memory (linear) of a given mipmap level. std::size_t GetHostMipmapSize(u32 level) const; @@ -173,24 +109,30 @@ public: /// Returns the bytes per pixel. u32 GetBytesPerPixel() const; - /// Returns true if another surface can be familiar with this. This is a loosely defined term - /// that reflects the possibility of these two surface parameters potentially being part of a - /// bigger superset. - bool IsFamiliar(const SurfaceParams& view_params) const; - /// Returns true if the pixel format is a depth and/or stencil format. bool IsPixelFormatZeta() const; - /// Creates a map that redirects an address difference to a layer and mipmap level. - std::map> CreateViewOffsetMap() const; + std::string TargetName() const; - /// Returns true if the passed surface view parameters is equal or a valid subset of this. - bool IsViewValid(const SurfaceParams& view_params, u32 layer, u32 level) const; + bool is_tiled; + bool srgb_conversion; + bool is_layered; + u32 block_width; + u32 block_height; + u32 block_depth; + u32 tile_width_spacing; + u32 width; + u32 height; + u32 depth; + u32 pitch; + u32 unaligned_height; + u32 num_levels; + VideoCore::Surface::PixelFormat pixel_format; + VideoCore::Surface::ComponentType component_type; + VideoCore::Surface::SurfaceType type; + VideoCore::Surface::SurfaceTarget target; private: - /// Calculates values that can be deduced from HasheableSurfaceParams. - void CalculateCachedValues(); - /// Returns the size of a given mipmap level inside a layer. std::size_t GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool uncompressed) const; @@ -200,19 +142,12 @@ private: /// Returns the size of a layer std::size_t GetLayerSize(bool as_host_size, bool uncompressed) const; - /// Returns true if the passed view width and height match the size of this params in a given - /// mipmap level. - bool IsDimensionValid(const SurfaceParams& view_params, u32 level) const; - - /// Returns true if the passed view depth match the size of this params in a given mipmap level. - bool IsDepthValid(const SurfaceParams& view_params, u32 level) const; - - /// Returns true if the passed view layers and mipmap levels are in bounds. - bool IsInBounds(const SurfaceParams& view_params, u32 layer, u32 level) const; + std::size_t GetNumLayers() const { + return is_layered ? depth : 1; + } - std::size_t guest_size_in_bytes; - std::size_t host_size_in_bytes; - u32 num_layers; + /// Returns true if these parameters are from a layered surface. + bool IsLayered() const; }; } // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_view.cpp b/src/video_core/texture_cache/surface_view.cpp index 5f4cdbb1c..467696a4c 100644 --- a/src/video_core/texture_cache/surface_view.cpp +++ b/src/video_core/texture_cache/surface_view.cpp @@ -9,15 +9,15 @@ namespace VideoCommon { -std::size_t ViewKey::Hash() const { +std::size_t ViewParams::Hash() const { return static_cast(base_layer) ^ static_cast(num_layers << 16) ^ - (static_cast(base_level) << 32) ^ - (static_cast(num_levels) << 48); + (static_cast(base_level) << 24) ^ + (static_cast(num_levels) << 32) ^ (static_cast(target) << 36); } -bool ViewKey::operator==(const ViewKey& rhs) const { - return std::tie(base_layer, num_layers, base_level, num_levels) == - std::tie(rhs.base_layer, rhs.num_layers, rhs.base_level, rhs.num_levels); +bool ViewParams::operator==(const ViewParams& rhs) const { + return std::tie(base_layer, num_layers, base_level, num_levels, target) == + std::tie(rhs.base_layer, rhs.num_layers, rhs.base_level, rhs.num_levels, rhs.target); } } // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_view.h b/src/video_core/texture_cache/surface_view.h index e73d8f6ae..c122800a6 100644 --- a/src/video_core/texture_cache/surface_view.h +++ b/src/video_core/texture_cache/surface_view.h @@ -7,18 +7,45 @@ #include #include "common/common_types.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/surface_params.h" namespace VideoCommon { -struct ViewKey { +struct ViewParams { std::size_t Hash() const; - bool operator==(const ViewKey& rhs) const; + bool operator==(const ViewParams& rhs) const; u32 base_layer{}; u32 num_layers{}; u32 base_level{}; u32 num_levels{}; + VideoCore::Surface::SurfaceTarget target; + bool IsLayered() const { + switch (target) { + case VideoCore::Surface::SurfaceTarget::Texture1DArray: + case VideoCore::Surface::SurfaceTarget::Texture2DArray: + case VideoCore::Surface::SurfaceTarget::TextureCubemap: + case VideoCore::Surface::SurfaceTarget::TextureCubeArray: + return true; + default: + return false; + } + } +}; + +class ViewBase { +public: + ViewBase(const ViewParams& params) : params{params} {} + ~ViewBase() = default; + + const ViewParams& GetViewParams() const { + return params; + } + +protected: + ViewParams params; }; } // namespace VideoCommon @@ -26,8 +53,8 @@ struct ViewKey { namespace std { template <> -struct hash { - std::size_t operator()(const VideoCommon::ViewKey& k) const noexcept { +struct hash { + std::size_t operator()(const VideoCommon::ViewParams& k) const noexcept { return k.Hash(); } }; -- cgit v1.2.3 From bc930754cc9437ddd86e7d246b3eb4302540896a Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 7 May 2019 10:57:16 -0400 Subject: Implement Texture Cache V2 --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 69 +++- .../renderer_opengl/gl_texture_cache.cpp | 286 +++++-------- src/video_core/renderer_opengl/gl_texture_cache.h | 111 ++--- src/video_core/renderer_opengl/utils.cpp | 23 +- src/video_core/renderer_opengl/utils.h | 6 +- src/video_core/texture_cache/texture_cache.h | 454 +++++++++++++++++---- 6 files changed, 568 insertions(+), 381 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 027e9d293..482d0428c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -83,10 +83,10 @@ struct FramebufferCacheKey { bool stencil_enable = false; std::array color_attachments{}; - std::array colors{}; + std::array colors{}; u32 colors_count = 0; - CachedSurfaceView* zeta = nullptr; + View zeta = nullptr; auto Tie() const { return std::tie(is_single_buffer, stencil_enable, color_attachments, colors, colors_count, @@ -115,6 +115,10 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind RasterizerOpenGL::~RasterizerOpenGL() {} +void RasterizerOpenGL::InitMemoryMananger(Tegra::MemoryManager& memory_manager) { + texture_cache.InitMemoryMananger(memory_manager); +} + void RasterizerOpenGL::CheckExtensions() { if (!GLAD_GL_ARB_texture_filter_anisotropic && !GLAD_GL_EXT_texture_filter_anisotropic) { LOG_WARNING( @@ -474,9 +478,11 @@ std::pair RasterizerOpenGL::ConfigureFramebuffers( } current_framebuffer_config_state = fb_config_state; - CachedSurfaceView* depth_surface{}; + View depth_surface{}; if (using_depth_fb) { depth_surface = texture_cache.GetDepthBufferSurface(preserve_contents); + } else { + texture_cache.SetEmptyDepthBuffer(); } UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0); @@ -489,38 +495,41 @@ std::pair RasterizerOpenGL::ConfigureFramebuffers( if (using_color_fb) { if (single_color_target) { // Used when just a single color attachment is enabled, e.g. for clearing a color buffer - CachedSurfaceView* color_surface{ + View color_surface{ texture_cache.GetColorBufferSurface(*single_color_target, preserve_contents)}; if (color_surface) { // Assume that a surface will be written to if it is used as a framebuffer, even if // the shader doesn't actually write to it. - color_surface->MarkAsModified(true); + texture_cache.MarkColorBufferInUse(*single_color_target); // Workaround for and issue in nvidia drivers // https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/ - state.framebuffer_srgb.enabled |= - color_surface->GetSurfaceParams().GetSrgbConversion(); + state.framebuffer_srgb.enabled |= color_surface->GetSurfaceParams().srgb_conversion; } fbkey.is_single_buffer = true; fbkey.color_attachments[0] = GL_COLOR_ATTACHMENT0 + static_cast(*single_color_target); fbkey.colors[0] = color_surface; + for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { + if (index != *single_color_target) { + texture_cache.SetEmptyColorBuffer(index); + } + } } else { // Multiple color attachments are enabled for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { - CachedSurfaceView* color_surface{ - texture_cache.GetColorBufferSurface(index, preserve_contents)}; + View color_surface{texture_cache.GetColorBufferSurface(index, preserve_contents)}; if (color_surface) { // Assume that a surface will be written to if it is used as a framebuffer, even // if the shader doesn't actually write to it. - color_surface->MarkAsModified(true); + texture_cache.MarkColorBufferInUse(index); // Enable sRGB only for supported formats // Workaround for and issue in nvidia drivers // https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/ state.framebuffer_srgb.enabled |= - color_surface->GetSurfaceParams().GetSrgbConversion(); + color_surface->GetSurfaceParams().srgb_conversion; } fbkey.color_attachments[index] = @@ -538,11 +547,11 @@ std::pair RasterizerOpenGL::ConfigureFramebuffers( if (depth_surface) { // Assume that a surface will be written to if it is used as a framebuffer, even if // the shader doesn't actually write to it. - depth_surface->MarkAsModified(true); + texture_cache.MarkDepthBufferInUse(); fbkey.zeta = depth_surface; - fbkey.stencil_enable = regs.stencil_enable && depth_surface->GetSurfaceParams().GetType() == - SurfaceType::DepthStencil; + fbkey.stencil_enable = regs.stencil_enable && + depth_surface->GetSurfaceParams().type == SurfaceType::DepthStencil; } SetupCachedFramebuffer(fbkey, current_state); @@ -728,11 +737,27 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { buffer_cache.InvalidateRegion(addr, size); } +void RasterizerOpenGL::InvalidateRegionEx(GPUVAddr gpu_addr, CacheAddr addr, u64 size) { + MICROPROFILE_SCOPE(OpenGL_CacheManagement); + if (!addr || !size) { + return; + } + texture_cache.InvalidateRegionEx(gpu_addr, size); + shader_cache.InvalidateRegion(addr, size); + global_cache.InvalidateRegion(addr, size); + buffer_cache.InvalidateRegion(addr, size); +} + void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { FlushRegion(addr, size); InvalidateRegion(addr, size); } +void RasterizerOpenGL::FlushAndInvalidateRegionEx(GPUVAddr gpu_addr, CacheAddr addr, u64 size) { + FlushRegion(addr, size); + InvalidateRegionEx(gpu_addr, addr, size); +} + bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, const Tegra::Engines::Fermi2D::Regs::Surface& dst, const Common::Rectangle& src_rect, @@ -740,7 +765,7 @@ bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs MICROPROFILE_SCOPE(OpenGL_Blits); const auto src_surface{texture_cache.GetFermiSurface(src)}; const auto dst_surface{texture_cache.GetFermiSurface(dst)}; - blitter.Blit(src_surface, dst_surface, src_rect, dst_rect); + // blitter.Blit(src_surface, dst_surface, src_rect, dst_rect); return true; } @@ -762,10 +787,10 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, const auto& params{surface->GetSurfaceParams()}; const auto& pixel_format{ VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)}; - ASSERT_MSG(params.GetWidth() == config.width, "Framebuffer width is different"); - ASSERT_MSG(params.GetHeight() == config.height, "Framebuffer height is different"); + ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); + ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); - if (params.GetPixelFormat() != pixel_format) { + if (params.pixel_format != pixel_format) { LOG_WARNING(Render_OpenGL, "Framebuffer pixel_format is different"); } @@ -860,10 +885,10 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s state.texture_units[current_bindpoint].sampler = sampler_cache.GetSampler(texture.tsc); - if (const auto surface{texture_cache.GetTextureSurface(texture)}; surface) { - state.texture_units[current_bindpoint].texture = surface->GetTexture( - entry.GetType(), entry.IsArray(), texture.tic.x_source, texture.tic.y_source, - texture.tic.z_source, texture.tic.w_source); + if (const auto view{texture_cache.GetTextureSurface(texture, entry)}; view) { + view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, + texture.tic.w_source); + state.texture_units[current_bindpoint].texture = view->GetTexture(); } else { // Can occur when texture addr is null or its memory is unmapped/invalid state.texture_units[current_bindpoint].texture = 0; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index f7c2f46aa..871608f6d 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -4,7 +4,9 @@ #include "common/assert.h" #include "common/common_types.h" +#include "common/microprofile.h" #include "common/scope_exit.h" +#include "core/core.h" #include "video_core/morton.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_texture_cache.h" @@ -22,6 +24,9 @@ using VideoCore::Surface::ComponentType; using VideoCore::Surface::PixelFormat; using VideoCore::Surface::SurfaceTarget; +MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128)); +MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128)); + namespace { struct FormatTuple { @@ -129,8 +134,8 @@ const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType compon return format; } -GLenum GetTextureTarget(const SurfaceParams& params) { - switch (params.GetTarget()) { +GLenum GetTextureTarget(const SurfaceTarget& target) { + switch (target) { case SurfaceTarget::Texture1D: return GL_TEXTURE_1D; case SurfaceTarget::Texture2D: @@ -175,8 +180,8 @@ void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) { glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTextureParameteri(texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, params.GetNumLevels() - 1); - if (params.GetNumLevels() == 1) { + glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, params.num_levels - 1); + if (params.num_levels == 1) { glTextureParameterf(texture, GL_TEXTURE_LOD_BIAS, 1000.0f); } } @@ -185,21 +190,20 @@ OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum inte OGLTexture texture; texture.Create(target); - switch (params.GetTarget()) { + switch (params.target) { case SurfaceTarget::Texture1D: - glTextureStorage1D(texture.handle, params.GetNumLevels(), internal_format, - params.GetWidth()); + glTextureStorage1D(texture.handle, params.num_levels, internal_format, params.width); break; case SurfaceTarget::Texture2D: case SurfaceTarget::TextureCubemap: - glTextureStorage2D(texture.handle, params.GetNumLevels(), internal_format, - params.GetWidth(), params.GetHeight()); + glTextureStorage2D(texture.handle, params.num_levels, internal_format, params.width, + params.height); break; case SurfaceTarget::Texture3D: case SurfaceTarget::Texture2DArray: case SurfaceTarget::TextureCubeArray: - glTextureStorage3D(texture.handle, params.GetNumLevels(), internal_format, - params.GetWidth(), params.GetHeight(), params.GetDepth()); + glTextureStorage3D(texture.handle, params.num_levels, internal_format, params.width, + params.height, params.depth); break; default: UNREACHABLE(); @@ -212,54 +216,72 @@ OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum inte } // Anonymous namespace -CachedSurface::CachedSurface(TextureCacheOpenGL& texture_cache, const SurfaceParams& params) - : VideoCommon::SurfaceBase{texture_cache, params} { - const auto& tuple{GetFormatTuple(params.GetPixelFormat(), params.GetComponentType())}; +CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) + : VideoCommon::SurfaceBase(gpu_addr, params) { + const auto& tuple{GetFormatTuple(params.pixel_format, params.component_type)}; internal_format = tuple.internal_format; format = tuple.format; type = tuple.type; is_compressed = tuple.compressed; - target = GetTextureTarget(params); + target = GetTextureTarget(params.target); texture = CreateTexture(params, target, internal_format); + DecorateSurfaceName(); + ViewParams main{}; + main.num_levels = params.num_levels; + main.base_level = 0; + main.base_layer = 0; + main.num_layers = params.is_layered ? params.depth : 1; + main.target = params.target; + main_view = CreateView(main); + main_view->DecorateViewName(gpu_addr, params.TargetName() + "V:" + std::to_string(view_count++)); +} + +CachedSurface::~CachedSurface() { + views.clear(); + main_view = nullptr; } -CachedSurface::~CachedSurface() = default; +void CachedSurface::DownloadTexture(std::vector& staging_buffer) { + LOG_CRITICAL(Render_OpenGL, "Flushing"); + MICROPROFILE_SCOPE(OpenGL_Texture_Download); -void CachedSurface::DownloadTexture() { // TODO(Rodrigo): Optimize alignment glPixelStorei(GL_PACK_ALIGNMENT, 1); SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); }); - for (u32 level = 0; level < params.GetNumLevels(); ++level) { + for (u32 level = 0; level < params.num_levels; ++level) { glPixelStorei(GL_PACK_ROW_LENGTH, static_cast(params.GetMipWidth(level))); + const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level); if (is_compressed) { glGetCompressedTextureImage(texture.handle, level, static_cast(params.GetHostMipmapSize(level)), - GetStagingBufferLevelData(level)); + staging_buffer.data() + mip_offset); } else { glGetTextureImage(texture.handle, level, format, type, static_cast(params.GetHostMipmapSize(level)), - GetStagingBufferLevelData(level)); + staging_buffer.data() + mip_offset); } } } -void CachedSurface::UploadTexture() { +void CachedSurface::UploadTexture(std::vector& staging_buffer) { + MICROPROFILE_SCOPE(OpenGL_Texture_Upload); SCOPE_EXIT({ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); }); - for (u32 level = 0; level < params.GetNumLevels(); ++level) { - UploadTextureMipmap(level); + for (u32 level = 0; level < params.num_levels; ++level) { + UploadTextureMipmap(level, staging_buffer); } } -void CachedSurface::UploadTextureMipmap(u32 level) { +void CachedSurface::UploadTextureMipmap(u32 level, std::vector& staging_buffer) { // TODO(Rodrigo): Optimize alignment glPixelStorei(GL_UNPACK_ALIGNMENT, 1); glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast(params.GetMipWidth(level))); - u8* buffer{GetStagingBufferLevelData(level)}; + const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level); + u8* buffer{staging_buffer.data() + mip_offset}; if (is_compressed) { const auto image_size{static_cast(params.GetHostMipmapSize(level))}; - switch (params.GetTarget()) { + switch (params.target) { case SurfaceTarget::Texture2D: glCompressedTextureSubImage2D(texture.handle, level, 0, 0, static_cast(params.GetMipWidth(level)), @@ -277,7 +299,7 @@ void CachedSurface::UploadTextureMipmap(u32 level) { break; case SurfaceTarget::TextureCubemap: { const std::size_t layer_size{params.GetHostLayerSize(level)}; - for (std::size_t face = 0; face < params.GetDepth(); ++face) { + for (std::size_t face = 0; face < params.depth; ++face) { glCompressedTextureSubImage3D(texture.handle, level, 0, 0, static_cast(face), static_cast(params.GetMipWidth(level)), static_cast(params.GetMipHeight(level)), 1, @@ -291,7 +313,7 @@ void CachedSurface::UploadTextureMipmap(u32 level) { UNREACHABLE(); } } else { - switch (params.GetTarget()) { + switch (params.target) { case SurfaceTarget::Texture1D: glTextureSubImage1D(texture.handle, level, 0, params.GetMipWidth(level), format, type, buffer); @@ -310,7 +332,7 @@ void CachedSurface::UploadTextureMipmap(u32 level) { static_cast(params.GetMipDepth(level)), format, type, buffer); break; case SurfaceTarget::TextureCubemap: - for (std::size_t face = 0; face < params.GetDepth(); ++face) { + for (std::size_t face = 0; face < params.depth; ++face) { glTextureSubImage3D(texture.handle, level, 0, 0, static_cast(face), params.GetMipWidth(level), params.GetMipHeight(level), 1, format, type, buffer); @@ -324,61 +346,57 @@ void CachedSurface::UploadTextureMipmap(u32 level) { } void CachedSurface::DecorateSurfaceName() { - LabelGLObject(GL_TEXTURE, texture.handle, GetGpuAddr(), - params.GetTarget() == SurfaceTarget::Texture3D ? "3D" : ""); + LabelGLObject(GL_TEXTURE, texture.handle, GetGpuAddr(), params.TargetName()); +} + +void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, std::string prefix) { + LabelGLObject(GL_TEXTURE, texture_view.texture.handle, gpu_addr, prefix); } -std::unique_ptr CachedSurface::CreateView(const ViewKey& view_key) { - return std::make_unique(*this, view_key); +View CachedSurface::CreateView(const ViewParams& view_key) { + auto view = std::make_shared(*this, view_key); + views[view_key] = view; + view->DecorateViewName(gpu_addr, params.TargetName() + "V:" + std::to_string(view_count++)); + return view; } -CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, ViewKey key) - : surface{surface}, key{key}, params{surface.GetSurfaceParams()} {} +CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& params) + : VideoCommon::ViewBase(params), surface{surface} { + target = GetTextureTarget(params.target); + texture_view = CreateTextureView(); +} CachedSurfaceView::~CachedSurfaceView() = default; void CachedSurfaceView::Attach(GLenum attachment) const { - ASSERT(key.num_layers == 1 && key.num_levels == 1); + ASSERT(params.num_layers == 1 && params.num_levels == 1); - switch (params.GetTarget()) { + switch (params.target) { case SurfaceTarget::Texture1D: - glFramebufferTexture1D(GL_DRAW_FRAMEBUFFER, attachment, surface.GetTarget(), - surface.GetTexture(), key.base_level); + glFramebufferTexture1D(GL_DRAW_FRAMEBUFFER, attachment, target, + surface.GetTexture(), params.base_level); break; case SurfaceTarget::Texture2D: - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, attachment, surface.GetTarget(), - surface.GetTexture(), key.base_level); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, attachment, target, + surface.GetTexture(), params.base_level); break; case SurfaceTarget::Texture1DArray: case SurfaceTarget::Texture2DArray: case SurfaceTarget::TextureCubemap: case SurfaceTarget::TextureCubeArray: - glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, attachment, surface.GetTexture(), - key.base_level, key.base_layer); + glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, attachment, target, + params.base_level, params.base_layer); break; default: UNIMPLEMENTED(); } } -GLuint CachedSurfaceView::GetTexture(Tegra::Shader::TextureType texture_type, bool is_array, - SwizzleSource x_source, SwizzleSource y_source, +void CachedSurfaceView::ApplySwizzle(SwizzleSource x_source, SwizzleSource y_source, SwizzleSource z_source, SwizzleSource w_source) { - const auto [texture_view, target] = GetTextureView(texture_type, is_array); - if (texture_view.get().texture.handle == 0) { - texture_view.get() = std::move(CreateTextureView(target)); - } - ApplySwizzle(texture_view, x_source, y_source, z_source, w_source); - return texture_view.get().texture.handle; -} - -void CachedSurfaceView::ApplySwizzle(TextureView& texture_view, SwizzleSource x_source, - SwizzleSource y_source, SwizzleSource z_source, - SwizzleSource w_source) { - const std::array swizzle = {x_source, y_source, z_source, w_source}; - if (swizzle == texture_view.swizzle) { + u32 swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); + if (swizzle == texture_view.swizzle) return; - } const std::array gl_swizzle = {GetSwizzleSource(x_source), GetSwizzleSource(y_source), GetSwizzleSource(z_source), GetSwizzleSource(w_source)}; @@ -386,38 +404,25 @@ void CachedSurfaceView::ApplySwizzle(TextureView& texture_view, SwizzleSource x_ texture_view.swizzle = swizzle; } -CachedSurfaceView::TextureView CachedSurfaceView::CreateTextureView(GLenum target) const { +CachedSurfaceView::TextureView CachedSurfaceView::CreateTextureView() const { + const auto& owner_params = surface.GetSurfaceParams(); TextureView texture_view; - glGenTextures(1, &texture_view.texture.handle); + texture_view.texture.Create(); const GLuint handle{texture_view.texture.handle}; - const FormatTuple& tuple{GetFormatTuple(params.GetPixelFormat(), params.GetComponentType())}; + const FormatTuple& tuple{ + GetFormatTuple(owner_params.pixel_format, owner_params.component_type)}; - glTextureView(handle, target, surface.texture.handle, tuple.internal_format, key.base_level, - key.num_levels, key.base_layer, key.num_layers); - ApplyTextureDefaults(params, handle); + glTextureView(handle, target, surface.texture.handle, tuple.internal_format, params.base_level, + params.num_levels, params.base_layer, params.num_layers); - return texture_view; -} + ApplyTextureDefaults(owner_params, handle); -std::pair, GLenum> -CachedSurfaceView::GetTextureView(Tegra::Shader::TextureType texture_type, bool is_array) { - using Pair = std::pair, GLenum>; - switch (texture_type) { - case Tegra::Shader::TextureType::Texture1D: - return is_array ? Pair{texture_view_1d_array, GL_TEXTURE_1D_ARRAY} - : Pair{texture_view_1d, GL_TEXTURE_1D}; - case Tegra::Shader::TextureType::Texture2D: - return is_array ? Pair{texture_view_2d_array, GL_TEXTURE_2D_ARRAY} - : Pair{texture_view_2d, GL_TEXTURE_2D}; - case Tegra::Shader::TextureType::Texture3D: - ASSERT(!is_array); - return {texture_view_3d, GL_TEXTURE_3D}; - case Tegra::Shader::TextureType::TextureCube: - return is_array ? Pair{texture_view_cube_array, GL_TEXTURE_CUBE_MAP_ARRAY} - : Pair{texture_view_cube, GL_TEXTURE_CUBE_MAP}; - } - UNREACHABLE(); + u32 swizzle = + EncodeSwizzle(SwizzleSource::R, SwizzleSource::G, SwizzleSource::B, SwizzleSource::A); + texture_view.swizzle = swizzle; + + return texture_view; } TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system, @@ -426,106 +431,21 @@ TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system, TextureCacheOpenGL::~TextureCacheOpenGL() = default; -CachedSurfaceView* TextureCacheOpenGL::TryFastGetSurfaceView(GPUVAddr gpu_addr, VAddr cpu_addr, - u8* host_ptr, - const SurfaceParams& new_params, - bool preserve_contents, - const std::vector& overlaps) { - if (overlaps.size() > 1) { - return TryCopyAsViews(gpu_addr, cpu_addr, host_ptr, new_params, overlaps); - } - - const auto& old_surface{overlaps[0]}; - const auto& old_params{old_surface->GetSurfaceParams()}; - if (old_params.GetTarget() == new_params.GetTarget() && - old_params.GetDepth() == new_params.GetDepth() && old_params.GetDepth() == 1 && - old_params.GetNumLevels() == new_params.GetNumLevels() && - old_params.GetPixelFormat() == new_params.GetPixelFormat()) { - return SurfaceCopy(gpu_addr, cpu_addr, host_ptr, new_params, old_surface, old_params); - } - - return nullptr; -} - -CachedSurfaceView* TextureCacheOpenGL::SurfaceCopy(GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, - const SurfaceParams& new_params, - const Surface& old_surface, - const SurfaceParams& old_params) { - const auto new_surface{GetUncachedSurface(new_params)}; - Register(new_surface, gpu_addr, cpu_addr, host_ptr); - - const u32 min_width{ - std::max(old_params.GetDefaultBlockWidth(), new_params.GetDefaultBlockWidth())}; - const u32 min_height{ - std::max(old_params.GetDefaultBlockHeight(), new_params.GetDefaultBlockHeight())}; - for (u32 level = 0; level < old_params.GetNumLevels(); ++level) { - const u32 width{std::min(old_params.GetMipWidth(level), new_params.GetMipWidth(level))}; - const u32 height{std::min(old_params.GetMipHeight(level), new_params.GetMipHeight(level))}; - if (width < min_width || height < min_height) { - // Avoid copies that are too small to be handled in OpenGL - break; - } - glCopyImageSubData(old_surface->GetTexture(), old_surface->GetTarget(), level, 0, 0, 0, - new_surface->GetTexture(), new_surface->GetTarget(), level, 0, 0, 0, - width, height, 1); - } - - new_surface->MarkAsModified(true); - - // TODO(Rodrigo): Add an entry to directly get the superview - return new_surface->GetView(gpu_addr, new_params); -} - -CachedSurfaceView* TextureCacheOpenGL::TryCopyAsViews(GPUVAddr gpu_addr, VAddr cpu_addr, - u8* host_ptr, const SurfaceParams& new_params, - const std::vector& overlaps) { - if (new_params.GetTarget() == SurfaceTarget::Texture1D || - new_params.GetTarget() == SurfaceTarget::Texture1DArray || - new_params.GetTarget() == SurfaceTarget::Texture3D) { - // Non-2D textures are not handled at the moment in this fast path. - return nullptr; - } - - const auto new_surface{GetUncachedSurface(new_params)}; - // TODO(Rodrigo): Move this down - Register(new_surface, gpu_addr, cpu_addr, host_ptr); - - // TODO(Rodrigo): Find a way to avoid heap allocations here. - std::vector views; - views.reserve(overlaps.size()); - for (const auto& overlap : overlaps) { - const auto view{ - new_surface->TryGetView(overlap->GetGpuAddr(), overlap->GetSurfaceParams())}; - if (!view) { - // TODO(Rodrigo): Remove this - Unregister(new_surface); - return nullptr; - } - views.push_back(view); - } - - // TODO(Rodrigo): It's possible that these method leaves some unloaded textures if the data has - // been uploaded to guest memory but not used as a surface previously. - for (std::size_t i = 0; i < overlaps.size(); ++i) { - const auto& overlap{overlaps[i]}; - const auto& view{views[i]}; - for (u32 overlap_level = 0; overlap_level < view->GetNumLevels(); ++overlap_level) { - const u32 super_level{view->GetBaseLevel() + overlap_level}; - glCopyImageSubData(overlap->GetTexture(), overlap->GetTarget(), overlap_level, 0, 0, 0, - new_surface->GetTexture(), new_surface->GetTarget(), super_level, 0, - 0, view->GetBaseLayer(), view->GetWidth(), view->GetHeight(), - view->GetNumLayers()); - } - } - - new_surface->MarkAsModified(true); - - // TODO(Rodrigo): Add an entry to directly get the superview - return new_surface->GetView(gpu_addr, new_params); +Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) { + return std::make_shared(gpu_addr, params); } -Surface TextureCacheOpenGL::CreateSurface(const SurfaceParams& params) { - return std::make_unique(*this, params); +void TextureCacheOpenGL::ImageCopy(Surface src_surface, Surface dst_surface, + const VideoCommon::CopyParams& copy_params) { + const auto src_handle = src_surface->GetTexture(); + const auto src_target = src_surface->GetTarget(); + const auto dst_handle = dst_surface->GetTexture(); + const auto dst_target = dst_surface->GetTarget(); + glCopyImageSubData(src_handle, src_target, copy_params.source_level, copy_params.source_x, + copy_params.source_y, copy_params.source_z, dst_handle, dst_target, + copy_params.dest_level, copy_params.dest_x, copy_params.dest_y, + copy_params.dest_z, copy_params.width, copy_params.height, + copy_params.depth); } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index c65e37153..1722c1bbc 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -19,24 +19,25 @@ namespace OpenGL { using VideoCommon::SurfaceParams; -using VideoCommon::ViewKey; +using VideoCommon::ViewParams; class CachedSurfaceView; class CachedSurface; class TextureCacheOpenGL; using Surface = std::shared_ptr; -using TextureCacheBase = VideoCommon::TextureCache; +using View = std::shared_ptr; +using TextureCacheBase = VideoCommon::TextureCache; -class CachedSurface final : public VideoCommon::SurfaceBase { +class CachedSurface final : public VideoCommon::SurfaceBase { friend CachedSurfaceView; public: - explicit CachedSurface(TextureCacheOpenGL& texture_cache, const SurfaceParams& params); + explicit CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params); ~CachedSurface(); - void UploadTexture(); - void DownloadTexture(); + void UploadTexture(std::vector& staging_buffer) override; + void DownloadTexture(std::vector& staging_buffer) override; GLenum GetTarget() const { return target; @@ -49,99 +50,79 @@ public: protected: void DecorateSurfaceName(); - std::unique_ptr CreateView(const ViewKey& view_key); + View CreateView(const ViewParams& view_key) override; private: - void UploadTextureMipmap(u32 level); + void UploadTextureMipmap(u32 level, std::vector& staging_buffer); GLenum internal_format{}; GLenum format{}; GLenum type{}; bool is_compressed{}; GLenum target{}; + u32 view_count{}; OGLTexture texture; }; -class CachedSurfaceView final { +class CachedSurfaceView final : public VideoCommon::ViewBase { public: - explicit CachedSurfaceView(CachedSurface& surface, ViewKey key); + explicit CachedSurfaceView(CachedSurface& surface, const ViewParams& params); ~CachedSurfaceView(); /// Attaches this texture view to the current bound GL_DRAW_FRAMEBUFFER void Attach(GLenum attachment) const; - GLuint GetTexture(Tegra::Shader::TextureType texture_type, bool is_array, - Tegra::Texture::SwizzleSource x_source, - Tegra::Texture::SwizzleSource y_source, - Tegra::Texture::SwizzleSource z_source, - Tegra::Texture::SwizzleSource w_source); - - void MarkAsModified(bool is_modified) { - surface.MarkAsModified(is_modified); + GLuint GetTexture() { + return texture_view.texture.handle; } const SurfaceParams& GetSurfaceParams() const { - return params; + return surface.GetSurfaceParams(); } u32 GetWidth() const { - return params.GetMipWidth(GetBaseLevel()); + const auto owner_params = GetSurfaceParams(); + return owner_params.GetMipWidth(params.base_level); } u32 GetHeight() const { - return params.GetMipHeight(GetBaseLevel()); + const auto owner_params = GetSurfaceParams(); + return owner_params.GetMipHeight(params.base_level); } u32 GetDepth() const { - return params.GetMipDepth(GetBaseLevel()); - } - - u32 GetBaseLayer() const { - return key.base_layer; + const auto owner_params = GetSurfaceParams(); + return owner_params.GetMipDepth(params.base_level); } - u32 GetNumLayers() const { - return key.num_layers; - } - - u32 GetBaseLevel() const { - return key.base_level; - } + void ApplySwizzle(Tegra::Texture::SwizzleSource x_source, + Tegra::Texture::SwizzleSource y_source, + Tegra::Texture::SwizzleSource z_source, + Tegra::Texture::SwizzleSource w_source); - u32 GetNumLevels() const { - return key.num_levels; - } + void DecorateViewName(GPUVAddr gpu_addr, std::string prefix); private: struct TextureView { - OGLTexture texture; - std::array swizzle{ - Tegra::Texture::SwizzleSource::R, Tegra::Texture::SwizzleSource::G, - Tegra::Texture::SwizzleSource::B, Tegra::Texture::SwizzleSource::A}; + OGLTextureView texture; + u32 swizzle; }; - void ApplySwizzle(TextureView& texture_view, Tegra::Texture::SwizzleSource x_source, + u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source, Tegra::Texture::SwizzleSource y_source, Tegra::Texture::SwizzleSource z_source, - Tegra::Texture::SwizzleSource w_source); - - TextureView CreateTextureView(GLenum target) const; + Tegra::Texture::SwizzleSource w_source) const { + return (static_cast(x_source) << 24) | (static_cast(y_source) << 16) | + (static_cast(z_source) << 8) | static_cast(w_source); + } - std::pair, GLenum> GetTextureView( - Tegra::Shader::TextureType texture_type, bool is_array); + TextureView CreateTextureView() const; CachedSurface& surface; - const ViewKey key; - const SurfaceParams params; - - TextureView texture_view_1d; - TextureView texture_view_1d_array; - TextureView texture_view_2d; - TextureView texture_view_2d_array; - TextureView texture_view_3d; - TextureView texture_view_cube; - TextureView texture_view_cube_array; + GLenum target{}; + + TextureView texture_view; }; class TextureCacheOpenGL final : public TextureCacheBase { @@ -150,21 +131,9 @@ public: ~TextureCacheOpenGL(); protected: - CachedSurfaceView* TryFastGetSurfaceView(GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, - const SurfaceParams& new_params, - bool preserve_contents, - const std::vector& overlaps); - - Surface CreateSurface(const SurfaceParams& params); - -private: - CachedSurfaceView* SurfaceCopy(GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, - const SurfaceParams& new_params, const Surface& old_surface, - const SurfaceParams& old_params); - - CachedSurfaceView* TryCopyAsViews(GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, - const SurfaceParams& new_params, - const std::vector& overlaps); + Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override; + void ImageCopy(Surface src_surface, Surface dst_surface, + const VideoCommon::CopyParams& copy_params) override; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp index 5994c0c61..a9fa539a5 100644 --- a/src/video_core/renderer_opengl/utils.cpp +++ b/src/video_core/renderer_opengl/utils.cpp @@ -56,8 +56,7 @@ SurfaceBlitter::SurfaceBlitter() { SurfaceBlitter::~SurfaceBlitter() = default; -void SurfaceBlitter::Blit(CachedSurfaceView* src, CachedSurfaceView* dst, - const Common::Rectangle& src_rect, +void SurfaceBlitter::Blit(View src, View dst, const Common::Rectangle& src_rect, const Common::Rectangle& dst_rect) const { const auto& src_params{src->GetSurfaceParams()}; const auto& dst_params{dst->GetSurfaceParams()}; @@ -72,17 +71,13 @@ void SurfaceBlitter::Blit(CachedSurfaceView* src, CachedSurfaceView* dst, u32 buffers{}; - UNIMPLEMENTED_IF(src_params.GetTarget() != SurfaceTarget::Texture2D); - UNIMPLEMENTED_IF(dst_params.GetTarget() != SurfaceTarget::Texture2D); + UNIMPLEMENTED_IF(src_params.target != SurfaceTarget::Texture2D); + UNIMPLEMENTED_IF(dst_params.target != SurfaceTarget::Texture2D); - const auto GetTexture = [](CachedSurfaceView* view) { - return view->GetTexture(TextureType::Texture2D, false, SwizzleSource::R, SwizzleSource::G, - SwizzleSource::B, SwizzleSource::A); - }; - const GLuint src_texture{GetTexture(src)}; - const GLuint dst_texture{GetTexture(dst)}; + const GLuint src_texture{src->GetTexture()}; + const GLuint dst_texture{dst->GetTexture()}; - if (src_params.GetType() == SurfaceType::ColorTexture) { + if (src_params.type == SurfaceType::ColorTexture) { glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_texture, 0); glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, @@ -94,7 +89,7 @@ void SurfaceBlitter::Blit(CachedSurfaceView* src, CachedSurfaceView* dst, 0); buffers = GL_COLOR_BUFFER_BIT; - } else if (src_params.GetType() == SurfaceType::Depth) { + } else if (src_params.type == SurfaceType::Depth) { glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, src_texture, 0); @@ -106,7 +101,7 @@ void SurfaceBlitter::Blit(CachedSurfaceView* src, CachedSurfaceView* dst, glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); buffers = GL_DEPTH_BUFFER_BIT; - } else if (src_params.GetType() == SurfaceType::DepthStencil) { + } else if (src_params.type == SurfaceType::DepthStencil) { glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, src_texture, 0); @@ -148,4 +143,4 @@ void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_vie glObjectLabel(identifier, handle, -1, static_cast(object_label.c_str())); } -} // namespace OpenGL \ No newline at end of file +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h index e7726d14e..8977d2383 100644 --- a/src/video_core/renderer_opengl/utils.h +++ b/src/video_core/renderer_opengl/utils.h @@ -39,8 +39,8 @@ public: explicit SurfaceBlitter(); ~SurfaceBlitter(); - void Blit(CachedSurfaceView* src, CachedSurfaceView* dst, - const Common::Rectangle& src_rect, const Common::Rectangle& dst_rect) const; + void Blit(View src, View dst, const Common::Rectangle& src_rect, + const Common::Rectangle& dst_rect) const; private: OGLFramebuffer src_framebuffer; @@ -49,4 +49,4 @@ private: void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {}); -} // namespace OpenGL \ No newline at end of file +} // namespace OpenGL diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index c5c01957a..eb0d9bc10 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -22,6 +22,7 @@ #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" #include "video_core/surface.h" +#include "video_core/texture_cache/copy_params.h" #include "video_core/texture_cache/surface_base.h" #include "video_core/texture_cache/surface_params.h" #include "video_core/texture_cache/surface_view.h" @@ -40,32 +41,42 @@ class RasterizerInterface; namespace VideoCommon { +using VideoCore::Surface::SurfaceTarget; +using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; + template class TextureCache { - using IntervalMap = boost::icl::interval_map>>; + using IntervalMap = boost::icl::interval_map>; using IntervalType = typename IntervalMap::interval_type; public: + void InitMemoryMananger(Tegra::MemoryManager& memory_manager) { + this->memory_manager = &memory_manager; + } + void InvalidateRegion(CacheAddr addr, std::size_t size) { for (const auto& surface : GetSurfacesInRegion(addr, size)) { - if (!surface->IsRegistered()) { - // Skip duplicates - continue; - } Unregister(surface); } } - TView* GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) { + void InvalidateRegionEx(GPUVAddr addr, std::size_t size) { + for (const auto& surface : GetSurfacesInRegionInner(addr, size)) { + Unregister(surface); + } + } + + TView GetTextureSurface(const Tegra::Texture::FullTextureInfo& config, + const VideoCommon::Shader::Sampler& entry) { const auto gpu_addr{config.tic.Address()}; if (!gpu_addr) { return {}; } - const auto params{SurfaceParams::CreateForTexture(system, config)}; - return GetSurfaceView(gpu_addr, params, true); + const auto params{SurfaceParams::CreateForTexture(system, config, entry)}; + return GetSurface(gpu_addr, params, true).second; } - TView* GetDepthBufferSurface(bool preserve_contents) { + TView GetDepthBufferSurface(bool preserve_contents) { const auto& regs{system.GPU().Maxwell3D().regs}; const auto gpu_addr{regs.zeta.Address()}; if (!gpu_addr || !regs.zeta_enable) { @@ -75,36 +86,75 @@ public: system, regs.zeta_width, regs.zeta_height, regs.zeta.format, regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height, regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; - return GetSurfaceView(gpu_addr, depth_params, preserve_contents); + auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents); + if (depth_buffer.target) + depth_buffer.target->MarkAsProtected(false); + if (depth_buffer.target) + depth_buffer.target->MarkAsProtected(true); + return surface_view.second; } - TView* GetColorBufferSurface(std::size_t index, bool preserve_contents) { + TView GetColorBufferSurface(std::size_t index, bool preserve_contents) { ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); const auto& regs{system.GPU().Maxwell3D().regs}; if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { + SetEmptyColorBuffer(index); return {}; } - auto& memory_manager{system.GPU().MemoryManager()}; - const auto& config{system.GPU().Maxwell3D().regs.rt[index]}; - const auto gpu_addr{config.Address() + - config.base_layer * config.layer_stride * sizeof(u32)}; + const auto& config{regs.rt[index]}; + const auto gpu_addr{config.Address()}; if (!gpu_addr) { + SetEmptyColorBuffer(index); return {}; } - return GetSurfaceView(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), - preserve_contents); + auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), + preserve_contents); + if (render_targets[index].target) + render_targets[index].target->MarkAsProtected(false); + render_targets[index].target = surface_view.first; + if (render_targets[index].target) + render_targets[index].target->MarkAsProtected(true); + return surface_view.second; + } + + void MarkColorBufferInUse(std::size_t index) { + if (render_targets[index].target) + render_targets[index].target->MarkAsModified(true, Tick()); } - TView* GetFermiSurface(const Tegra::Engines::Fermi2D::Regs::Surface& config) { - return GetSurfaceView(config.Address(), SurfaceParams::CreateForFermiCopySurface(config), - true); + void MarkDepthBufferInUse() { + if (depth_buffer.target) + depth_buffer.target->MarkAsModified(true, Tick()); } - std::shared_ptr TryFindFramebufferSurface(const u8* host_ptr) const { + void SetEmptyDepthBuffer() { + if (depth_buffer.target != nullptr) { + depth_buffer.target->MarkAsProtected(false); + depth_buffer.target = nullptr; + depth_buffer.view = nullptr; + } + } + + void SetEmptyColorBuffer(std::size_t index) { + if (render_targets[index].target != nullptr) { + render_targets[index].target->MarkAsProtected(false); + std::memset(&render_targets[index].config, sizeof(RenderTargetConfig), 0); + render_targets[index].target = nullptr; + render_targets[index].view = nullptr; + } + } + + TView GetFermiSurface(const Tegra::Engines::Fermi2D::Regs::Surface& config) { + SurfaceParams params = SurfaceParams::CreateForFermiCopySurface(config); + const GPUVAddr gpu_addr = config.Address(); + return GetSurface(gpu_addr, params, true).second; + } + + TSurface TryFindFramebufferSurface(const u8* host_ptr) const { const auto it{registered_surfaces.find(ToCacheAddr(host_ptr))}; return it != registered_surfaces.end() ? *it->second.begin() : nullptr; } @@ -115,126 +165,334 @@ public: protected: TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) - : system{system}, rasterizer{rasterizer} {} + : system{system}, rasterizer{rasterizer} { + for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { + SetEmptyColorBuffer(i); + } + SetEmptyDepthBuffer(); + } ~TextureCache() = default; - virtual TView* TryFastGetSurfaceView( - GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, const SurfaceParams& params, - bool preserve_contents, const std::vector>& overlaps) = 0; + virtual TSurface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) = 0; - virtual std::shared_ptr CreateSurface(const SurfaceParams& params) = 0; + virtual void ImageCopy(TSurface src_surface, TSurface dst_surface, + const CopyParams& copy_params) = 0; - void Register(std::shared_ptr surface, GPUVAddr gpu_addr, VAddr cpu_addr, - u8* host_ptr) { - surface->Register(gpu_addr, cpu_addr, host_ptr); - registered_surfaces.add({GetSurfaceInterval(surface), {surface}}); - rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), 1); + void Register(TSurface surface) { + const GPUVAddr gpu_addr = surface->GetGpuAddr(); + u8* host_ptr = memory_manager->GetPointer(gpu_addr); + const std::size_t size = surface->GetSizeInBytes(); + const std::optional cpu_addr = memory_manager->GpuToCpuAddress(gpu_addr); + if (!host_ptr || !cpu_addr) { + LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", + gpu_addr); + return; + } + surface->SetHostPtr(host_ptr); + surface->SetCpuAddr(*cpu_addr); + registered_surfaces.add({GetInterval(host_ptr, size), {surface}}); + rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); + RegisterInnerCache(surface); + surface->MarkAsRegistered(true); } - void Unregister(std::shared_ptr surface) { - registered_surfaces.subtract({GetSurfaceInterval(surface), {surface}}); - rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), -1); - surface->Unregister(); + void Unregister(TSurface surface) { + if (surface->IsProtected()) + return; + const GPUVAddr gpu_addr = surface->GetGpuAddr(); + const void* host_ptr = surface->GetHostPtr(); + const std::size_t size = surface->GetSizeInBytes(); + const VAddr cpu_addr = surface->GetCpuAddr(); + registered_surfaces.erase(GetInterval(host_ptr, size)); + rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); + UnregisterInnerCache(surface); + surface->MarkAsRegistered(false); + ReserveSurface(surface->GetSurfaceParams(), surface); } - std::shared_ptr GetUncachedSurface(const SurfaceParams& params) { - if (const auto surface = TryGetReservedSurface(params); surface) + TSurface GetUncachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { + if (const auto surface = TryGetReservedSurface(params); surface) { + surface->SetGpuAddr(gpu_addr); return surface; + } // No reserved surface available, create a new one and reserve it - auto new_surface{CreateSurface(params)}; - ReserveSurface(params, new_surface); + auto new_surface{CreateSurface(gpu_addr, params)}; return new_surface; } Core::System& system; private: - TView* GetSurfaceView(GPUVAddr gpu_addr, const SurfaceParams& params, bool preserve_contents) { - auto& memory_manager{system.GPU().MemoryManager()}; - const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; - DEBUG_ASSERT(cpu_addr); - - const auto host_ptr{memory_manager.GetPointer(gpu_addr)}; - const auto cache_addr{ToCacheAddr(host_ptr)}; - auto overlaps{GetSurfacesInRegion(cache_addr, params.GetGuestSizeInBytes())}; - if (overlaps.empty()) { - return LoadSurfaceView(gpu_addr, *cpu_addr, host_ptr, params, preserve_contents); + enum class RecycleStrategy : u32 { + Ignore = 0, + Flush = 1, + BufferCopy = 3, + }; + + RecycleStrategy PickStrategy(std::vector& overlaps, const SurfaceParams& params, + const GPUVAddr gpu_addr, const bool untopological) { + // Untopological decision + if (untopological) { + return RecycleStrategy::Ignore; + } + // 3D Textures decision + if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) { + return RecycleStrategy::Flush; } + for (auto s : overlaps) { + const auto& s_params = s->GetSurfaceParams(); + if (s_params.block_depth > 1 || s_params.target == SurfaceTarget::Texture3D) { + return RecycleStrategy::Flush; + } + } + return RecycleStrategy::Ignore; + } - if (overlaps.size() == 1) { - if (TView* view = overlaps[0]->TryGetView(gpu_addr, params); view) { - return view; + std::pair RecycleSurface(std::vector& overlaps, + const SurfaceParams& params, const GPUVAddr gpu_addr, + const u8* host_ptr, const bool preserve_contents, + const bool untopological) { + for (auto surface : overlaps) { + Unregister(surface); + } + RecycleStrategy strategy = !Settings::values.use_accurate_gpu_emulation + ? PickStrategy(overlaps, params, gpu_addr, untopological) + : RecycleStrategy::Flush; + switch (strategy) { + case RecycleStrategy::Ignore: { + return InitializeSurface(gpu_addr, params, preserve_contents); + } + case RecycleStrategy::Flush: { + std::sort(overlaps.begin(), overlaps.end(), + [](const TSurface& a, const TSurface& b) -> bool { + return a->GetModificationTick() < b->GetModificationTick(); + }); + for (auto surface : overlaps) { + FlushSurface(surface); } + return InitializeSurface(gpu_addr, params, preserve_contents); } + default: { + UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!"); + return InitializeSurface(gpu_addr, params, preserve_contents); + } + } + } - const auto fast_view{TryFastGetSurfaceView(gpu_addr, *cpu_addr, host_ptr, params, - preserve_contents, overlaps)}; + std::pair RebuildMirage(TSurface current_surface, + const SurfaceParams& params) { + const auto gpu_addr = current_surface->GetGpuAddr(); + TSurface new_surface = GetUncachedSurface(gpu_addr, params); + std::vector bricks = current_surface->BreakDown(); + for (auto& brick : bricks) { + ImageCopy(current_surface, new_surface, brick); + } + Unregister(current_surface); + Register(new_surface); + return {new_surface, new_surface->GetMainView()}; + } - if (!fast_view) { - std::sort(overlaps.begin(), overlaps.end(), [](const auto& lhs, const auto& rhs) { - return lhs->GetModificationTick() < rhs->GetModificationTick(); - }); + std::pair ManageStructuralMatch(TSurface current_surface, + const SurfaceParams& params) { + const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); + if (is_mirage) { + return RebuildMirage(current_surface, params); } + const bool matches_target = current_surface->MatchTarget(params.target); + if (matches_target) { + return {current_surface, current_surface->GetMainView()}; + } + return {current_surface, current_surface->EmplaceOverview(params)}; + } - for (const auto& surface : overlaps) { - if (!fast_view) { - // Flush even when we don't care about the contents, to preserve memory not - // written by the new surface. - FlushSurface(surface); + std::optional> ReconstructSurface(std::vector& overlaps, + const SurfaceParams& params, + const GPUVAddr gpu_addr, + const u8* host_ptr) { + if (!params.is_layered || params.target == SurfaceTarget::Texture3D) { + return {}; + } + TSurface new_surface = GetUncachedSurface(gpu_addr, params); + for (auto surface : overlaps) { + const SurfaceParams& src_params = surface->GetSurfaceParams(); + if (src_params.is_layered || src_params.num_levels > 1) { + // We send this cases to recycle as they are more complex to handle + return {}; + } + const std::size_t candidate_size = src_params.GetGuestSizeInBytes(); + auto mipmap_layer = new_surface->GetLayerMipmap(surface->GetGpuAddr()); + if (!mipmap_layer) { + return {}; } + const u32 layer = (*mipmap_layer).first; + const u32 mipmap = (*mipmap_layer).second; + if (new_surface->GetMipmapSize(mipmap) != candidate_size) { + return {}; + } + // Now we got all the data set up + CopyParams copy_params{}; + const u32 dst_width = params.GetMipWidth(mipmap); + const u32 dst_height = params.GetMipHeight(mipmap); + copy_params.width = std::min(src_params.width, dst_width); + copy_params.height = std::min(src_params.height, dst_height); + copy_params.depth = 1; + copy_params.source_level = 0; + copy_params.dest_level = mipmap; + copy_params.source_z = 0; + copy_params.dest_z = layer; + ImageCopy(surface, new_surface, copy_params); + } + for (auto surface : overlaps) { Unregister(surface); } - if (fast_view) { - return fast_view; + Register(new_surface); + return {{new_surface, new_surface->GetMainView()}}; + } + + std::pair GetSurface(const GPUVAddr gpu_addr, const SurfaceParams& params, + bool preserve_contents) { + + const auto host_ptr{memory_manager->GetPointer(gpu_addr)}; + const auto cache_addr{ToCacheAddr(host_ptr)}; + const std::size_t candidate_size = params.GetGuestSizeInBytes(); + auto overlaps{GetSurfacesInRegionInner(gpu_addr, candidate_size)}; + if (overlaps.empty()) { + return InitializeSurface(gpu_addr, params, preserve_contents); + } + + for (auto surface : overlaps) { + if (!surface->MatchesTopology(params)) { + return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, + true); + } } - return LoadSurfaceView(gpu_addr, *cpu_addr, host_ptr, params, preserve_contents); + if (overlaps.size() == 1) { + TSurface current_surface = overlaps[0]; + if (current_surface->MatchesStructure(params) && + current_surface->GetGpuAddr() == gpu_addr && + (params.target != SurfaceTarget::Texture3D || + current_surface->MatchTarget(params.target))) { + return ManageStructuralMatch(current_surface, params); + } + if (current_surface->GetSizeInBytes() <= candidate_size) { + return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, + false); + } + std::optional view = current_surface->EmplaceView(params, gpu_addr); + if (view.has_value()) { + const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); + if (is_mirage) { + LOG_CRITICAL(HW_GPU, "Mirage View Unsupported"); + return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, + false); + } + return {current_surface, *view}; + } + return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, false); + } else { + std::optional> view = + ReconstructSurface(overlaps, params, gpu_addr, host_ptr); + if (view.has_value()) { + return *view; + } + return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, false); + } } - TView* LoadSurfaceView(GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, - const SurfaceParams& params, bool preserve_contents) { - const auto new_surface{GetUncachedSurface(params)}; - Register(new_surface, gpu_addr, cpu_addr, host_ptr); + std::pair InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params, + bool preserve_contents) { + auto new_surface{GetUncachedSurface(gpu_addr, params)}; + Register(new_surface); if (preserve_contents) { LoadSurface(new_surface); } - return new_surface->GetView(gpu_addr, params); + return {new_surface, new_surface->GetMainView()}; } - void LoadSurface(const std::shared_ptr& surface) { - surface->LoadBuffer(); - surface->UploadTexture(); - surface->MarkAsModified(false); + void LoadSurface(const TSurface& surface) { + staging_buffer.resize(surface->GetHostSizeInBytes()); + surface->LoadBuffer(*memory_manager, staging_buffer); + surface->UploadTexture(staging_buffer); + surface->MarkAsModified(false, Tick()); } - void FlushSurface(const std::shared_ptr& surface) { + void FlushSurface(const TSurface& surface) { if (!surface->IsModified()) { return; } - surface->DownloadTexture(); - surface->FlushBuffer(); + staging_buffer.resize(surface->GetHostSizeInBytes()); + surface->DownloadTexture(staging_buffer); + surface->FlushBuffer(staging_buffer); + surface->MarkAsModified(false, Tick()); } - std::vector> GetSurfacesInRegion(CacheAddr cache_addr, - std::size_t size) const { + std::vector GetSurfacesInRegion(CacheAddr cache_addr, std::size_t size) const { if (size == 0) { return {}; } const IntervalType interval{cache_addr, cache_addr + size}; - std::vector> surfaces; + std::vector surfaces; for (auto& pair : boost::make_iterator_range(registered_surfaces.equal_range(interval))) { - surfaces.push_back(*pair.second.begin()); + for (auto& s : pair.second) { + if (!s || !s->IsRegistered()) { + continue; + } + surfaces.push_back(s); + } } return surfaces; } - void ReserveSurface(const SurfaceParams& params, std::shared_ptr surface) { + void RegisterInnerCache(TSurface& surface) { + GPUVAddr start = surface->GetGpuAddr() >> inner_cache_page_bits; + const GPUVAddr end = (surface->GetGpuAddrEnd() - 1) >> inner_cache_page_bits; + while (start <= end) { + inner_cache[start].push_back(surface); + start++; + } + } + + void UnregisterInnerCache(TSurface& surface) { + GPUVAddr start = surface->GetGpuAddr() >> inner_cache_page_bits; + const GPUVAddr end = (surface->GetGpuAddrEnd() - 1) >> inner_cache_page_bits; + while (start <= end) { + inner_cache[start].remove(surface); + start++; + } + } + + std::vector GetSurfacesInRegionInner(const GPUVAddr gpu_addr, const std::size_t size) { + if (size == 0) { + return {}; + } + const GPUVAddr gpu_addr_end = gpu_addr + size; + GPUVAddr start = gpu_addr >> inner_cache_page_bits; + const GPUVAddr end = (gpu_addr_end - 1) >> inner_cache_page_bits; + std::vector surfaces; + while (start <= end) { + std::list& list = inner_cache[start]; + for (auto& s : list) { + if (!s->IsPicked() && s->Overlaps(gpu_addr, gpu_addr_end)) { + s->MarkAsPicked(true); + surfaces.push_back(s); + } + } + start++; + } + for (auto& s : surfaces) { + s->MarkAsPicked(false); + } + return surfaces; + } + + void ReserveSurface(const SurfaceParams& params, TSurface surface) { surface_reserve[params].push_back(std::move(surface)); } - std::shared_ptr TryGetReservedSurface(const SurfaceParams& params) { + TSurface TryGetReservedSurface(const SurfaceParams& params) { auto search{surface_reserve.find(params)}; if (search == surface_reserve.end()) { return {}; @@ -247,21 +505,41 @@ private: return {}; } - IntervalType GetSurfaceInterval(std::shared_ptr surface) const { - return IntervalType::right_open(surface->GetCacheAddr(), - surface->GetCacheAddr() + surface->GetSizeInBytes()); + IntervalType GetInterval(const void* host_ptr, const std::size_t size) const { + const CacheAddr addr = ToCacheAddr(host_ptr); + return IntervalType::right_open(addr, addr + size); } + struct RenderInfo { + RenderTargetConfig config; + TSurface target; + TView view; + }; + + struct DepthBufferInfo { + TSurface target; + TView view; + }; + VideoCore::RasterizerInterface& rasterizer; + Tegra::MemoryManager* memory_manager; u64 ticks{}; IntervalMap registered_surfaces; + static constexpr u64 inner_cache_page_bits{20}; + static constexpr u64 inner_cache_page_size{1 << inner_cache_page_bits}; + std::unordered_map> inner_cache; + /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have /// previously been used. This is to prevent surfaces from being constantly created and /// destroyed when used with different surface parameters. - std::unordered_map>> surface_reserve; + std::unordered_map> surface_reserve; + std::array render_targets; + DepthBufferInfo depth_buffer; + + std::vector staging_buffer; }; } // namespace VideoCommon -- cgit v1.2.3 From b711cdce782ee604edc3c52628eb76e6b9a08b72 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 7 May 2019 13:58:37 -0400 Subject: Corrections to Structural Matching The texture will now be reconstructed if the width only matches on GoB alignment. --- src/video_core/texture_cache/surface_base.h | 66 +++++++++++++++++++--------- src/video_core/texture_cache/texture_cache.h | 11 +++-- 2 files changed, 53 insertions(+), 24 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 5fd7add0a..9c048eb88 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -16,9 +16,8 @@ #include "video_core/texture_cache/surface_params.h" #include "video_core/texture_cache/surface_view.h" -template> -ForwardIt binary_find(ForwardIt first, ForwardIt last, const T& value, Compare comp={}) -{ +template > +ForwardIt binary_find(ForwardIt first, ForwardIt last, const T& value, Compare comp = {}) { // Note: BOTH type T and the type after ForwardIt is dereferenced // must be implicitly convertible to BOTH Type1 and Type2, used in Compare. // This is stricter than lower_bound requirement (see above) @@ -33,8 +32,14 @@ class MemoryManager; namespace VideoCommon { -using VideoCore::Surface::SurfaceTarget; using VideoCore::MortonSwizzleMode; +using VideoCore::Surface::SurfaceTarget; + +enum class MatchStructureResult : u32 { + FullMatch = 0, + SemiMatch = 1, + None = 2, +}; class SurfaceBaseImpl { public: @@ -106,17 +111,26 @@ public: return std::tie(src_bpp, params.is_tiled) == std::tie(dst_bpp, rhs.is_tiled); } - bool MatchesStructure(const SurfaceParams& rhs) const { + MatchStructureResult MatchesStructure(const SurfaceParams& rhs) const { if (params.is_tiled) { - const u32 a_width1 = params.GetBlockAlignedWidth(); - const u32 a_width2 = rhs.GetBlockAlignedWidth(); - return std::tie(a_width1, params.height, params.depth, params.block_width, - params.block_height, params.block_depth, params.tile_width_spacing) == - std::tie(a_width2, rhs.height, rhs.depth, rhs.block_width, rhs.block_height, - rhs.block_depth, rhs.tile_width_spacing); + if (std::tie(params.height, params.depth, params.block_width, params.block_height, + params.block_depth, params.tile_width_spacing) == + std::tie(rhs.height, rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth, + rhs.tile_width_spacing)) { + if (params.width == rhs.width) { + return MatchStructureResult::FullMatch; + } + if (params.GetBlockAlignedWidth() == rhs.GetBlockAlignedWidth()) { + return MatchStructureResult::SemiMatch; + } + } + return MatchStructureResult::None; } else { - return std::tie(params.width, params.height, params.pitch) == - std::tie(rhs.width, rhs.height, rhs.pitch); + if (std::tie(params.width, params.height, params.pitch) == + std::tie(rhs.width, rhs.height, rhs.pitch)) { + return MatchStructureResult::FullMatch; + } + return MatchStructureResult::None; } } @@ -126,15 +140,16 @@ public: const GPUVAddr relative_address = candidate_gpu_addr - gpu_addr; const u32 layer = relative_address / layer_size; const GPUVAddr mipmap_address = relative_address - layer_size * layer; - const auto mipmap_it = binary_find(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); + const auto mipmap_it = + binary_find(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); if (mipmap_it != mipmap_offsets.end()) { return {{layer, std::distance(mipmap_offsets.begin(), mipmap_it)}}; } return {}; } - std::vector BreakDown() const { - auto set_up_copy = [](CopyParams& cp, const SurfaceParams& params, const u32 depth, + std::vector BreakDown(const SurfaceParams& in_params) const { + auto set_up_copy = [](CopyParams& cp, const u32 width, const u32 height, const u32 depth, const u32 level) { cp.source_x = 0; cp.source_y = 0; @@ -144,8 +159,8 @@ public: cp.dest_z = 0; cp.source_level = level; cp.dest_level = level; - cp.width = params.GetMipWidth(level); - cp.height = params.GetMipHeight(level); + cp.width = width; + cp.height = height; cp.depth = depth; }; const u32 layers = params.depth; @@ -156,7 +171,11 @@ public: const u32 layer_offset = layer * mipmaps; for (std::size_t level = 0; level < mipmaps; level++) { CopyParams& cp = result[layer_offset + level]; - set_up_copy(cp, params, layer, level); + const u32 width = + std::min(params.GetMipWidth(level), in_params.GetMipWidth(level)); + const u32 height = + std::min(params.GetMipHeight(level), in_params.GetMipHeight(level)); + set_up_copy(cp, width, height, layer, level); } } return result; @@ -164,7 +183,11 @@ public: std::vector result{mipmaps}; for (std::size_t level = 0; level < mipmaps; level++) { CopyParams& cp = result[level]; - set_up_copy(cp, params, params.GetMipDepth(level), level); + const u32 width = std::min(params.GetMipWidth(level), in_params.GetMipWidth(level)); + const u32 height = + std::min(params.GetMipHeight(level), in_params.GetMipHeight(level)); + const u32 depth = std::min(params.GetMipDepth(level), in_params.GetMipDepth(level)); + set_up_copy(cp, width, height, depth, level); } return result; } @@ -254,7 +277,8 @@ public: std::optional EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr) { if (view_addr < gpu_addr) return {}; - if (params.target == SurfaceTarget::Texture3D || view_params.target == SurfaceTarget::Texture3D) { + if (params.target == SurfaceTarget::Texture3D || + view_params.target == SurfaceTarget::Texture3D) { return {}; } const std::size_t size = view_params.GetGuestSizeInBytes(); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index eb0d9bc10..f3b28453a 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -284,7 +284,7 @@ private: const SurfaceParams& params) { const auto gpu_addr = current_surface->GetGpuAddr(); TSurface new_surface = GetUncachedSurface(gpu_addr, params); - std::vector bricks = current_surface->BreakDown(); + std::vector bricks = current_surface->BreakDown(params); for (auto& brick : bricks) { ImageCopy(current_surface, new_surface, brick); } @@ -370,11 +370,16 @@ private: if (overlaps.size() == 1) { TSurface current_surface = overlaps[0]; - if (current_surface->MatchesStructure(params) && + MatchStructureResult s_result = current_surface->MatchesStructure(params); + if (s_result != MatchStructureResult::None && current_surface->GetGpuAddr() == gpu_addr && (params.target != SurfaceTarget::Texture3D || current_surface->MatchTarget(params.target))) { - return ManageStructuralMatch(current_surface, params); + if (s_result == MatchStructureResult::FullMatch) { + return ManageStructuralMatch(current_surface, params); + } else { + return RebuildMirage(current_surface, params); + } } if (current_surface->GetSizeInBytes() <= candidate_size) { return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, -- cgit v1.2.3 From d86f9cd70910d4b96ec301e7d532b11d18a290a4 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 7 May 2019 17:30:36 -0400 Subject: Change texture_cache chaching from GPUAddr to CacheAddr This also reverses the changes to make invalidation and flushing through the GPU address. --- src/video_core/memory_manager.cpp | 2 +- src/video_core/rasterizer_interface.h | 4 - src/video_core/renderer_opengl/gl_rasterizer.cpp | 16 ---- src/video_core/renderer_opengl/gl_rasterizer.h | 2 - src/video_core/texture_cache/surface_base.cpp | 5 +- src/video_core/texture_cache/surface_base.h | 30 +++---- src/video_core/texture_cache/texture_cache.h | 102 +++++++++-------------- 7 files changed, 60 insertions(+), 101 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 74a1441e3..5d8d126c1 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -69,7 +69,7 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) { const u64 aligned_size{Common::AlignUp(size, page_size)}; const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))}; - rasterizer.FlushAndInvalidateRegionEx(gpu_addr, cache_addr, aligned_size); + rasterizer.FlushAndInvalidateRegion(cache_addr, aligned_size); UnmapRange(gpu_addr, aligned_size); return gpu_addr; diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index d5505ef9c..3c18d3b1f 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -49,10 +49,6 @@ public: /// and invalidated virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; - /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory - /// and invalidated - virtual void FlushAndInvalidateRegionEx(GPUVAddr gpu_addr, CacheAddr addr, u64 size) = 0; - /// Attempt to use a faster method to perform a surface copy virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, const Tegra::Engines::Fermi2D::Regs::Surface& dst, diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 482d0428c..77ac963b4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -737,27 +737,11 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { buffer_cache.InvalidateRegion(addr, size); } -void RasterizerOpenGL::InvalidateRegionEx(GPUVAddr gpu_addr, CacheAddr addr, u64 size) { - MICROPROFILE_SCOPE(OpenGL_CacheManagement); - if (!addr || !size) { - return; - } - texture_cache.InvalidateRegionEx(gpu_addr, size); - shader_cache.InvalidateRegion(addr, size); - global_cache.InvalidateRegion(addr, size); - buffer_cache.InvalidateRegion(addr, size); -} - void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { FlushRegion(addr, size); InvalidateRegion(addr, size); } -void RasterizerOpenGL::FlushAndInvalidateRegionEx(GPUVAddr gpu_addr, CacheAddr addr, u64 size) { - FlushRegion(addr, size); - InvalidateRegionEx(gpu_addr, addr, size); -} - bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, const Tegra::Engines::Fermi2D::Regs::Surface& dst, const Common::Rectangle& src_rect, diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 971a38ab7..5c37d3bfa 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -64,9 +64,7 @@ public: void FlushAll() override; void FlushRegion(CacheAddr addr, u64 size) override; void InvalidateRegion(CacheAddr addr, u64 size) override; - void InvalidateRegionEx(GPUVAddr gpu_addr, CacheAddr addr, u64 size); void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; - void FlushAndInvalidateRegionEx(GPUVAddr gpu_addr, CacheAddr addr, u64 size) override; bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, const Tegra::Engines::Fermi2D::Regs::Surface& dst, const Common::Rectangle& src_rect, diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 5273fcb44..0de0bc656 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -25,7 +25,6 @@ SurfaceBaseImpl::SurfaceBaseImpl(const GPUVAddr gpu_vaddr, const SurfaceParams& u32 offset = 0; mipmap_offsets.resize(params.num_levels); mipmap_sizes.resize(params.num_levels); - gpu_addr_end = gpu_addr + memory_size; for (u32 i = 0; i < params.num_levels; i++) { mipmap_offsets[i] = offset; mipmap_sizes[i] = params.GetGuestMipmapSize(i); @@ -99,8 +98,10 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, } } -void SurfaceBaseImpl::FlushBuffer(std::vector& staging_buffer) { +void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, + std::vector& staging_buffer) { MICROPROFILE_SCOPE(GPU_Flush_Texture); + auto host_ptr = memory_manager.GetPointer(gpu_addr); if (params.is_tiled) { ASSERT_MSG(params.block_width == 1, "Block width is defined as {}", params.block_width); for (u32 level = 0; level < params.num_levels; ++level) { diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 9c048eb88..74be3237d 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -45,40 +45,40 @@ class SurfaceBaseImpl { public: void LoadBuffer(Tegra::MemoryManager& memory_manager, std::vector& staging_buffer); - void FlushBuffer(std::vector& staging_buffer); + void FlushBuffer(Tegra::MemoryManager& memory_manager, std::vector& staging_buffer); GPUVAddr GetGpuAddr() const { return gpu_addr; } - GPUVAddr GetGpuAddrEnd() const { - return gpu_addr_end; - } - - bool Overlaps(const GPUVAddr start, const GPUVAddr end) const { - return (gpu_addr < end) && (gpu_addr_end > start); + bool Overlaps(const CacheAddr start, const CacheAddr end) const { + return (cache_addr < end) && (cache_addr_end > start); } // Use only when recycling a surface void SetGpuAddr(const GPUVAddr new_addr) { gpu_addr = new_addr; - gpu_addr_end = new_addr + memory_size; } VAddr GetCpuAddr() const { - return gpu_addr; + return cpu_addr; } void SetCpuAddr(const VAddr new_addr) { cpu_addr = new_addr; } - u8* GetHostPtr() const { - return host_ptr; + CacheAddr GetCacheAddr() const { + return cache_addr; + } + + CacheAddr GetCacheAddrEnd() const { + return cache_addr_end; } - void SetHostPtr(u8* new_addr) { - host_ptr = new_addr; + void SetCacheAddr(const CacheAddr new_addr) { + cache_addr = new_addr; + cache_addr_end = new_addr + memory_size; } const SurfaceParams& GetSurfaceParams() const { @@ -201,13 +201,13 @@ protected: const SurfaceParams params; GPUVAddr gpu_addr{}; - GPUVAddr gpu_addr_end{}; std::vector mipmap_sizes; std::vector mipmap_offsets; const std::size_t layer_size; const std::size_t memory_size; const std::size_t host_memory_size; - u8* host_ptr; + CacheAddr cache_addr; + CacheAddr cache_addr_end{}; VAddr cpu_addr; private: diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index f3b28453a..43aaec011 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -60,12 +60,6 @@ public: } } - void InvalidateRegionEx(GPUVAddr addr, std::size_t size) { - for (const auto& surface : GetSurfacesInRegionInner(addr, size)) { - Unregister(surface); - } - } - TView GetTextureSurface(const Tegra::Texture::FullTextureInfo& config, const VideoCommon::Shader::Sampler& entry) { const auto gpu_addr{config.tic.Address()}; @@ -154,9 +148,19 @@ public: return GetSurface(gpu_addr, params, true).second; } - TSurface TryFindFramebufferSurface(const u8* host_ptr) const { - const auto it{registered_surfaces.find(ToCacheAddr(host_ptr))}; - return it != registered_surfaces.end() ? *it->second.begin() : nullptr; + TSurface TryFindFramebufferSurface(const u8* host_ptr) { + const CacheAddr cache_addr = ToCacheAddr(host_ptr); + if (!cache_addr) { + return nullptr; + } + const CacheAddr page = cache_addr >> registry_page_bits; + std::list& list = registry[page]; + for (auto& s : list) { + if (s->GetCacheAddr() == cache_addr) { + return s; + } + } + return nullptr; } u64 Tick() { @@ -181,30 +185,28 @@ protected: void Register(TSurface surface) { const GPUVAddr gpu_addr = surface->GetGpuAddr(); - u8* host_ptr = memory_manager->GetPointer(gpu_addr); + const CacheAddr cache_ptr = ToCacheAddr(memory_manager->GetPointer(gpu_addr)); const std::size_t size = surface->GetSizeInBytes(); const std::optional cpu_addr = memory_manager->GpuToCpuAddress(gpu_addr); - if (!host_ptr || !cpu_addr) { + if (!cache_ptr || !cpu_addr) { LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", gpu_addr); return; } - surface->SetHostPtr(host_ptr); + surface->SetCacheAddr(cache_ptr); surface->SetCpuAddr(*cpu_addr); - registered_surfaces.add({GetInterval(host_ptr, size), {surface}}); - rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); RegisterInnerCache(surface); surface->MarkAsRegistered(true); + rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); } void Unregister(TSurface surface) { if (surface->IsProtected()) return; const GPUVAddr gpu_addr = surface->GetGpuAddr(); - const void* host_ptr = surface->GetHostPtr(); + const CacheAddr cache_ptr = surface->GetCacheAddr(); const std::size_t size = surface->GetSizeInBytes(); const VAddr cpu_addr = surface->GetCpuAddr(); - registered_surfaces.erase(GetInterval(host_ptr, size)); rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); UnregisterInnerCache(surface); surface->MarkAsRegistered(false); @@ -280,7 +282,7 @@ private: } } - std::pair RebuildMirage(TSurface current_surface, + std::pair RebuildSurface(TSurface current_surface, const SurfaceParams& params) { const auto gpu_addr = current_surface->GetGpuAddr(); TSurface new_surface = GetUncachedSurface(gpu_addr, params); @@ -297,7 +299,7 @@ private: const SurfaceParams& params) { const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); if (is_mirage) { - return RebuildMirage(current_surface, params); + return RebuildSurface(current_surface, params); } const bool matches_target = current_surface->MatchTarget(params.target); if (matches_target) { @@ -356,7 +358,7 @@ private: const auto host_ptr{memory_manager->GetPointer(gpu_addr)}; const auto cache_addr{ToCacheAddr(host_ptr)}; const std::size_t candidate_size = params.GetGuestSizeInBytes(); - auto overlaps{GetSurfacesInRegionInner(gpu_addr, candidate_size)}; + auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)}; if (overlaps.empty()) { return InitializeSurface(gpu_addr, params, preserve_contents); } @@ -378,7 +380,7 @@ private: if (s_result == MatchStructureResult::FullMatch) { return ManageStructuralMatch(current_surface, params); } else { - return RebuildMirage(current_surface, params); + return RebuildSurface(current_surface, params); } } if (current_surface->GetSizeInBytes() <= candidate_size) { @@ -429,58 +431,40 @@ private: } staging_buffer.resize(surface->GetHostSizeInBytes()); surface->DownloadTexture(staging_buffer); - surface->FlushBuffer(staging_buffer); + surface->FlushBuffer(*memory_manager, staging_buffer); surface->MarkAsModified(false, Tick()); } - std::vector GetSurfacesInRegion(CacheAddr cache_addr, std::size_t size) const { - if (size == 0) { - return {}; - } - const IntervalType interval{cache_addr, cache_addr + size}; - - std::vector surfaces; - for (auto& pair : boost::make_iterator_range(registered_surfaces.equal_range(interval))) { - for (auto& s : pair.second) { - if (!s || !s->IsRegistered()) { - continue; - } - surfaces.push_back(s); - } - } - return surfaces; - } - void RegisterInnerCache(TSurface& surface) { - GPUVAddr start = surface->GetGpuAddr() >> inner_cache_page_bits; - const GPUVAddr end = (surface->GetGpuAddrEnd() - 1) >> inner_cache_page_bits; + CacheAddr start = surface->GetCacheAddr() >> registry_page_bits; + const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; while (start <= end) { - inner_cache[start].push_back(surface); + registry[start].push_back(surface); start++; } } void UnregisterInnerCache(TSurface& surface) { - GPUVAddr start = surface->GetGpuAddr() >> inner_cache_page_bits; - const GPUVAddr end = (surface->GetGpuAddrEnd() - 1) >> inner_cache_page_bits; + CacheAddr start = surface->GetCacheAddr() >> registry_page_bits; + const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; while (start <= end) { - inner_cache[start].remove(surface); + registry[start].remove(surface); start++; } } - std::vector GetSurfacesInRegionInner(const GPUVAddr gpu_addr, const std::size_t size) { + std::vector GetSurfacesInRegion(const CacheAddr cache_addr, const std::size_t size) { if (size == 0) { return {}; } - const GPUVAddr gpu_addr_end = gpu_addr + size; - GPUVAddr start = gpu_addr >> inner_cache_page_bits; - const GPUVAddr end = (gpu_addr_end - 1) >> inner_cache_page_bits; + const CacheAddr cache_addr_end = cache_addr + size; + CacheAddr start = cache_addr >> registry_page_bits; + const CacheAddr end = (cache_addr_end - 1) >> registry_page_bits; std::vector surfaces; while (start <= end) { - std::list& list = inner_cache[start]; + std::list& list = registry[start]; for (auto& s : list) { - if (!s->IsPicked() && s->Overlaps(gpu_addr, gpu_addr_end)) { + if (!s->IsPicked() && s->Overlaps(cache_addr, cache_addr_end)) { s->MarkAsPicked(true); surfaces.push_back(s); } @@ -510,11 +494,6 @@ private: return {}; } - IntervalType GetInterval(const void* host_ptr, const std::size_t size) const { - const CacheAddr addr = ToCacheAddr(host_ptr); - return IntervalType::right_open(addr, addr + size); - } - struct RenderInfo { RenderTargetConfig config; TSurface target; @@ -531,11 +510,12 @@ private: u64 ticks{}; - IntervalMap registered_surfaces; - - static constexpr u64 inner_cache_page_bits{20}; - static constexpr u64 inner_cache_page_size{1 << inner_cache_page_bits}; - std::unordered_map> inner_cache; + // The internal Cache is different for the Texture Cache. It's based on buckets + // of 1MB. This fits better for the purpose of this cache as textures are normaly + // large in size. + static constexpr u64 registry_page_bits{20}; + static constexpr u64 registry_page_size{1 << registry_page_bits}; + std::unordered_map> registry; /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have /// previously been used. This is to prevent surfaces from being constantly created and -- cgit v1.2.3 From 1af4414861fda5cad2549372e65ecda090caf2f8 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 7 May 2019 19:09:34 -0400 Subject: Correct Mipmaps View method in Texture Cache --- .../renderer_opengl/gl_texture_cache.cpp | 44 +++++++++++----------- src/video_core/renderer_opengl/gl_texture_cache.h | 12 ++---- src/video_core/texture_cache/surface_base.h | 5 +-- 3 files changed, 29 insertions(+), 32 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 871608f6d..575608266 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -233,7 +233,8 @@ CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& param main.num_layers = params.is_layered ? params.depth : 1; main.target = params.target; main_view = CreateView(main); - main_view->DecorateViewName(gpu_addr, params.TargetName() + "V:" + std::to_string(view_count++)); + main_view->DecorateViewName(gpu_addr, + params.TargetName() + "V:" + std::to_string(view_count++)); } CachedSurface::~CachedSurface() { @@ -350,7 +351,7 @@ void CachedSurface::DecorateSurfaceName() { } void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, std::string prefix) { - LabelGLObject(GL_TEXTURE, texture_view.texture.handle, gpu_addr, prefix); + LabelGLObject(GL_TEXTURE, texture_view.handle, gpu_addr, prefix); } View CachedSurface::CreateView(const ViewParams& view_key) { @@ -364,6 +365,7 @@ CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& p : VideoCommon::ViewBase(params), surface{surface} { target = GetTextureTarget(params.target); texture_view = CreateTextureView(); + swizzle = EncodeSwizzle(SwizzleSource::R, SwizzleSource::G, SwizzleSource::B, SwizzleSource::A); } CachedSurfaceView::~CachedSurfaceView() = default; @@ -371,20 +373,24 @@ CachedSurfaceView::~CachedSurfaceView() = default; void CachedSurfaceView::Attach(GLenum attachment) const { ASSERT(params.num_layers == 1 && params.num_levels == 1); - switch (params.target) { + const auto& owner_params = surface.GetSurfaceParams(); + + switch (owner_params.target) { case SurfaceTarget::Texture1D: - glFramebufferTexture1D(GL_DRAW_FRAMEBUFFER, attachment, target, - surface.GetTexture(), params.base_level); + glFramebufferTexture1D(GL_DRAW_FRAMEBUFFER, attachment, surface.GetTarget(), + surface.GetTexture(), + params.base_level); break; case SurfaceTarget::Texture2D: - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, attachment, target, - surface.GetTexture(), params.base_level); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, attachment, surface.GetTarget(), + surface.GetTexture(), + params.base_level); break; case SurfaceTarget::Texture1DArray: case SurfaceTarget::Texture2DArray: case SurfaceTarget::TextureCubemap: case SurfaceTarget::TextureCubeArray: - glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, attachment, target, + glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, attachment, surface.GetTexture(), params.base_level, params.base_layer); break; default: @@ -394,22 +400,22 @@ void CachedSurfaceView::Attach(GLenum attachment) const { void CachedSurfaceView::ApplySwizzle(SwizzleSource x_source, SwizzleSource y_source, SwizzleSource z_source, SwizzleSource w_source) { - u32 swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); - if (swizzle == texture_view.swizzle) + u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); + if (new_swizzle == swizzle) return; + swizzle = new_swizzle; const std::array gl_swizzle = {GetSwizzleSource(x_source), GetSwizzleSource(y_source), GetSwizzleSource(z_source), GetSwizzleSource(w_source)}; - glTextureParameteriv(texture_view.texture.handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data()); - texture_view.swizzle = swizzle; + glTextureParameteriv(texture_view.handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data()); } -CachedSurfaceView::TextureView CachedSurfaceView::CreateTextureView() const { +OGLTextureView CachedSurfaceView::CreateTextureView() const { const auto& owner_params = surface.GetSurfaceParams(); - TextureView texture_view; - texture_view.texture.Create(); + OGLTextureView tv; + tv.Create(); - const GLuint handle{texture_view.texture.handle}; + const GLuint handle{tv.handle}; const FormatTuple& tuple{ GetFormatTuple(owner_params.pixel_format, owner_params.component_type)}; @@ -418,11 +424,7 @@ CachedSurfaceView::TextureView CachedSurfaceView::CreateTextureView() const { ApplyTextureDefaults(owner_params, handle); - u32 swizzle = - EncodeSwizzle(SwizzleSource::R, SwizzleSource::G, SwizzleSource::B, SwizzleSource::A); - texture_view.swizzle = swizzle; - - return texture_view; + return tv; } TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system, diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 1722c1bbc..083b5406b 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -74,7 +74,7 @@ public: void Attach(GLenum attachment) const; GLuint GetTexture() { - return texture_view.texture.handle; + return texture_view.handle; } const SurfaceParams& GetSurfaceParams() const { @@ -104,11 +104,6 @@ public: void DecorateViewName(GPUVAddr gpu_addr, std::string prefix); private: - struct TextureView { - OGLTextureView texture; - u32 swizzle; - }; - u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source, Tegra::Texture::SwizzleSource y_source, Tegra::Texture::SwizzleSource z_source, @@ -117,12 +112,13 @@ private: (static_cast(z_source) << 8) | static_cast(w_source); } - TextureView CreateTextureView() const; + OGLTextureView CreateTextureView() const; CachedSurface& surface; GLenum target{}; - TextureView texture_view; + OGLTextureView texture_view; + u32 swizzle; }; class TextureCacheOpenGL final : public TextureCacheBase { diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 74be3237d..486585c9c 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -282,8 +282,7 @@ public: return {}; } const std::size_t size = view_params.GetGuestSizeInBytes(); - const GPUVAddr relative_address = view_addr - gpu_addr; - auto layer_mipmap = GetLayerMipmap(relative_address); + auto layer_mipmap = GetLayerMipmap(view_addr); if (!layer_mipmap) { return {}; } @@ -298,7 +297,7 @@ public: vp.num_layers = 1; vp.base_level = mipmap; vp.num_levels = 1; - vp.target = params.target; + vp.target = view_params.target; return {GetView(vp)}; } -- cgit v1.2.3 From 03d10ea3b420c923c14a11c86b47e2f00bc30e00 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 7 May 2019 21:28:31 -0300 Subject: copy_params: Use constructor instead of C-like initialization --- src/video_core/texture_cache/copy_params.h | 10 ++++++ src/video_core/texture_cache/surface_base.h | 53 +++++++++++----------------- src/video_core/texture_cache/texture_cache.h | 23 +++++------- 3 files changed, 39 insertions(+), 47 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/copy_params.h b/src/video_core/texture_cache/copy_params.h index 75c2b1f05..8cf010142 100644 --- a/src/video_core/texture_cache/copy_params.h +++ b/src/video_core/texture_cache/copy_params.h @@ -9,6 +9,16 @@ namespace VideoCommon { struct CopyParams { + CopyParams(u32 source_x, u32 source_y, u32 source_z, u32 dest_x, u32 dest_y, u32 dest_z, + u32 source_level, u32 dest_level, u32 width, u32 height, u32 depth) + : source_x{source_x}, source_y{source_y}, source_z{source_z}, dest_x{dest_x}, + dest_y{dest_y}, dest_z{dest_z}, source_level{source_level}, + dest_level{dest_level}, width{width}, height{height}, depth{depth} {} + + CopyParams(u32 width, u32 height, u32 depth, u32 level) + : source_x{}, source_y{}, source_z{}, dest_x{}, dest_y{}, dest_z{}, source_level{level}, + dest_level{level}, width{width}, height{height}, depth{depth} {} + u32 source_x; u32 source_y; u32 source_z; diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 486585c9c..029cfb055 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -149,45 +149,32 @@ public: } std::vector BreakDown(const SurfaceParams& in_params) const { - auto set_up_copy = [](CopyParams& cp, const u32 width, const u32 height, const u32 depth, - const u32 level) { - cp.source_x = 0; - cp.source_y = 0; - cp.source_z = 0; - cp.dest_x = 0; - cp.dest_y = 0; - cp.dest_z = 0; - cp.source_level = level; - cp.dest_level = level; - cp.width = width; - cp.height = height; - cp.depth = depth; - }; - const u32 layers = params.depth; - const u32 mipmaps = params.num_levels; + std::vector result; + const u32 layers{params.depth}; + const u32 mipmaps{params.num_levels}; + if (params.is_layered) { - std::vector result{layers * mipmaps}; - for (std::size_t layer = 0; layer < layers; layer++) { - const u32 layer_offset = layer * mipmaps; - for (std::size_t level = 0; level < mipmaps; level++) { - CopyParams& cp = result[layer_offset + level]; - const u32 width = - std::min(params.GetMipWidth(level), in_params.GetMipWidth(level)); - const u32 height = - std::min(params.GetMipHeight(level), in_params.GetMipHeight(level)); - set_up_copy(cp, width, height, layer, level); + result.reserve(static_cast(layers) * static_cast(mipmaps)); + for (u32 layer = 0; layer < layers; layer++) { + const u32 layer_offset{layer * mipmaps}; + for (u32 level = 0; level < mipmaps; level++) { + const u32 width{ + std::min(params.GetMipWidth(level), in_params.GetMipWidth(level))}; + const u32 height{ + std::min(params.GetMipHeight(level), in_params.GetMipHeight(level))}; + result.emplace_back(width, height, layer, level); } } return result; + } else { - std::vector result{mipmaps}; + result.reserve(mipmaps); for (std::size_t level = 0; level < mipmaps; level++) { - CopyParams& cp = result[level]; - const u32 width = std::min(params.GetMipWidth(level), in_params.GetMipWidth(level)); - const u32 height = - std::min(params.GetMipHeight(level), in_params.GetMipHeight(level)); - const u32 depth = std::min(params.GetMipDepth(level), in_params.GetMipDepth(level)); - set_up_copy(cp, width, height, depth, level); + const u32 width{std::min(params.GetMipWidth(level), in_params.GetMipWidth(level))}; + const u32 height{ + std::min(params.GetMipHeight(level), in_params.GetMipHeight(level))}; + const u32 depth{std::min(params.GetMipDepth(level), in_params.GetMipDepth(level))}; + result.emplace_back(width, height, depth, level); } return result; } diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 43aaec011..c9a648bbd 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -283,7 +283,7 @@ private: } std::pair RebuildSurface(TSurface current_surface, - const SurfaceParams& params) { + const SurfaceParams& params) { const auto gpu_addr = current_surface->GetGpuAddr(); TSurface new_surface = GetUncachedSurface(gpu_addr, params); std::vector bricks = current_surface->BreakDown(params); @@ -323,26 +323,21 @@ private: return {}; } const std::size_t candidate_size = src_params.GetGuestSizeInBytes(); - auto mipmap_layer = new_surface->GetLayerMipmap(surface->GetGpuAddr()); + auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; if (!mipmap_layer) { return {}; } - const u32 layer = (*mipmap_layer).first; - const u32 mipmap = (*mipmap_layer).second; + const u32 layer{mipmap_layer->first}; + const u32 mipmap{mipmap_layer->second}; if (new_surface->GetMipmapSize(mipmap) != candidate_size) { return {}; } // Now we got all the data set up - CopyParams copy_params{}; - const u32 dst_width = params.GetMipWidth(mipmap); - const u32 dst_height = params.GetMipHeight(mipmap); - copy_params.width = std::min(src_params.width, dst_width); - copy_params.height = std::min(src_params.height, dst_height); - copy_params.depth = 1; - copy_params.source_level = 0; - copy_params.dest_level = mipmap; - copy_params.source_z = 0; - copy_params.dest_z = layer; + const u32 dst_width{params.GetMipWidth(mipmap)}; + const u32 dst_height{params.GetMipHeight(mipmap)}; + const CopyParams copy_params(0, 0, 0, 0, 0, layer, 0, mipmap, + std::min(src_params.width, dst_width), + std::min(src_params.height, dst_height), 1); ImageCopy(surface, new_surface, copy_params); } for (auto surface : overlaps) { -- cgit v1.2.3 From 2b30000a1ed1972e0701a8525182104b4544caa4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 7 May 2019 21:48:02 -0300 Subject: surface_base: Silence truncation warnings and minor renames and reordering --- src/video_core/texture_cache/surface_base.cpp | 34 ++++++++++++++------------ src/video_core/texture_cache/surface_base.h | 35 +++++++++++++++------------ 2 files changed, 37 insertions(+), 32 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 0de0bc656..5e994cf08 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -18,17 +18,19 @@ MICROPROFILE_DEFINE(GPU_Flush_Texture, "GPU", "Texture Flush", MP_RGB(128, 192, using Tegra::Texture::ConvertFromGuestToHost; using VideoCore::MortonSwizzleMode; -SurfaceBaseImpl::SurfaceBaseImpl(const GPUVAddr gpu_vaddr, const SurfaceParams& params) - : gpu_addr{gpu_vaddr}, params{params}, mipmap_sizes{params.num_levels}, - mipmap_offsets{params.num_levels}, layer_size{params.GetGuestLayerSize()}, - memory_size{params.GetGuestSizeInBytes()}, host_memory_size{params.GetHostSizeInBytes()} { - u32 offset = 0; - mipmap_offsets.resize(params.num_levels); - mipmap_sizes.resize(params.num_levels); - for (u32 i = 0; i < params.num_levels; i++) { - mipmap_offsets[i] = offset; - mipmap_sizes[i] = params.GetGuestMipmapSize(i); - offset += mipmap_sizes[i]; +SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params) + : params{params}, gpu_addr{gpu_addr}, layer_size{params.GetGuestLayerSize()}, + guest_memory_size{params.GetGuestSizeInBytes()}, host_memory_size{ + params.GetHostSizeInBytes()} { + mipmap_offsets.reserve(params.num_levels); + mipmap_sizes.reserve(params.num_levels); + + std::size_t offset = 0; + for (u32 level = 0; level < params.num_levels; ++level) { + const std::size_t mipmap_size{params.GetGuestMipmapSize(level)}; + mipmap_sizes.push_back(mipmap_size); + mipmap_offsets.push_back(offset); + offset += mipmap_size; } } @@ -44,7 +46,7 @@ void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, const Surf std::size_t host_offset{0}; const std::size_t guest_stride = layer_size; const std::size_t host_stride = params.GetHostLayerSize(level); - for (u32 layer = 0; layer < params.depth; layer++) { + for (u32 layer = 0; layer < params.depth; ++layer) { MortonSwizzle(mode, params.pixel_format, width, block_height, height, block_depth, 1, params.tile_width_spacing, buffer + host_offset, memory + guest_offset); guest_offset += guest_stride; @@ -60,12 +62,12 @@ void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, const Surf void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, std::vector& staging_buffer) { MICROPROFILE_SCOPE(GPU_Load_Texture); - auto host_ptr = memory_manager.GetPointer(gpu_addr); + const auto host_ptr{memory_manager.GetPointer(gpu_addr)}; if (params.is_tiled) { ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture target {}", params.block_width, static_cast(params.target)); for (u32 level = 0; level < params.num_levels; ++level) { - const u32 host_offset = params.GetHostMipmapLevelOffset(level); + const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)}; SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params, staging_buffer.data() + host_offset, level); } @@ -91,7 +93,7 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, } for (u32 level = 0; level < params.num_levels; ++level) { - const u32 host_offset = params.GetHostMipmapLevelOffset(level); + const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)}; ConvertFromGuestToHost(staging_buffer.data() + host_offset, params.pixel_format, params.GetMipWidth(level), params.GetMipHeight(level), params.GetMipDepth(level), true, true); @@ -105,7 +107,7 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, if (params.is_tiled) { ASSERT_MSG(params.block_width == 1, "Block width is defined as {}", params.block_width); for (u32 level = 0; level < params.num_levels; ++level) { - const u32 host_offset = params.GetHostMipmapLevelOffset(level); + const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)}; SwizzleFunc(MortonSwizzleMode::LinearToMorton, host_ptr, params, staging_buffer.data() + host_offset, level); } diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 029cfb055..7cc122158 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -78,7 +78,7 @@ public: void SetCacheAddr(const CacheAddr new_addr) { cache_addr = new_addr; - cache_addr_end = new_addr + memory_size; + cache_addr_end = new_addr + guest_memory_size; } const SurfaceParams& GetSurfaceParams() const { @@ -86,7 +86,7 @@ public: } std::size_t GetSizeInBytes() const { - return memory_size; + return guest_memory_size; } std::size_t GetHostSizeInBytes() const { @@ -135,17 +135,19 @@ public: } std::optional> GetLayerMipmap(const GPUVAddr candidate_gpu_addr) const { - if (candidate_gpu_addr < gpu_addr) + if (candidate_gpu_addr < gpu_addr) { return {}; - const GPUVAddr relative_address = candidate_gpu_addr - gpu_addr; - const u32 layer = relative_address / layer_size; + } + const auto relative_address{static_cast(candidate_gpu_addr - gpu_addr)}; + const auto layer{static_cast(relative_address / layer_size)}; const GPUVAddr mipmap_address = relative_address - layer_size * layer; const auto mipmap_it = binary_find(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); - if (mipmap_it != mipmap_offsets.end()) { - return {{layer, std::distance(mipmap_offsets.begin(), mipmap_it)}}; + if (mipmap_it == mipmap_offsets.end()) { + return {}; } - return {}; + const auto level{static_cast(std::distance(mipmap_offsets.begin(), mipmap_it))}; + return std::make_pair(layer, level); } std::vector BreakDown(const SurfaceParams& in_params) const { @@ -169,7 +171,7 @@ public: } else { result.reserve(mipmaps); - for (std::size_t level = 0; level < mipmaps; level++) { + for (u32 level = 0; level < mipmaps; level++) { const u32 width{std::min(params.GetMipWidth(level), in_params.GetMipWidth(level))}; const u32 height{ std::min(params.GetMipHeight(level), in_params.GetMipHeight(level))}; @@ -181,21 +183,22 @@ public: } protected: - explicit SurfaceBaseImpl(const GPUVAddr gpu_vaddr, const SurfaceParams& params); + explicit SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params); ~SurfaceBaseImpl() = default; virtual void DecorateSurfaceName() = 0; const SurfaceParams params; - GPUVAddr gpu_addr{}; - std::vector mipmap_sizes; - std::vector mipmap_offsets; const std::size_t layer_size; - const std::size_t memory_size; + const std::size_t guest_memory_size; const std::size_t host_memory_size; - CacheAddr cache_addr; + GPUVAddr gpu_addr{}; + CacheAddr cache_addr{}; CacheAddr cache_addr_end{}; - VAddr cpu_addr; + VAddr cpu_addr{}; + + std::vector mipmap_sizes; + std::vector mipmap_offsets; private: void SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, u8* buffer, -- cgit v1.2.3 From 16e8625a301b1f43ecebe459a40bf33f89322032 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 7 May 2019 21:55:55 -0300 Subject: surface_base: Split BreakDown into layered and non-layered variants --- src/video_core/texture_cache/surface_base.h | 93 +++++++++++++++-------------- 1 file changed, 48 insertions(+), 45 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 7cc122158..0cfb835d9 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -106,32 +106,32 @@ public: } bool MatchesTopology(const SurfaceParams& rhs) const { - const u32 src_bpp = params.GetBytesPerPixel(); - const u32 dst_bpp = rhs.GetBytesPerPixel(); + const u32 src_bpp{params.GetBytesPerPixel()}; + const u32 dst_bpp{rhs.GetBytesPerPixel()}; return std::tie(src_bpp, params.is_tiled) == std::tie(dst_bpp, rhs.is_tiled); } MatchStructureResult MatchesStructure(const SurfaceParams& rhs) const { - if (params.is_tiled) { - if (std::tie(params.height, params.depth, params.block_width, params.block_height, - params.block_depth, params.tile_width_spacing) == - std::tie(rhs.height, rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth, - rhs.tile_width_spacing)) { - if (params.width == rhs.width) { - return MatchStructureResult::FullMatch; - } - if (params.GetBlockAlignedWidth() == rhs.GetBlockAlignedWidth()) { - return MatchStructureResult::SemiMatch; - } - } - return MatchStructureResult::None; - } else { + if (!params.is_tiled) { if (std::tie(params.width, params.height, params.pitch) == std::tie(rhs.width, rhs.height, rhs.pitch)) { return MatchStructureResult::FullMatch; } return MatchStructureResult::None; } + // Tiled surface + if (std::tie(params.height, params.depth, params.block_width, params.block_height, + params.block_depth, params.tile_width_spacing) == + std::tie(rhs.height, rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth, + rhs.tile_width_spacing)) { + if (params.width == rhs.width) { + return MatchStructureResult::FullMatch; + } + if (params.GetBlockAlignedWidth() == rhs.GetBlockAlignedWidth()) { + return MatchStructureResult::SemiMatch; + } + } + return MatchStructureResult::None; } std::optional> GetLayerMipmap(const GPUVAddr candidate_gpu_addr) const { @@ -151,35 +151,7 @@ public: } std::vector BreakDown(const SurfaceParams& in_params) const { - std::vector result; - const u32 layers{params.depth}; - const u32 mipmaps{params.num_levels}; - - if (params.is_layered) { - result.reserve(static_cast(layers) * static_cast(mipmaps)); - for (u32 layer = 0; layer < layers; layer++) { - const u32 layer_offset{layer * mipmaps}; - for (u32 level = 0; level < mipmaps; level++) { - const u32 width{ - std::min(params.GetMipWidth(level), in_params.GetMipWidth(level))}; - const u32 height{ - std::min(params.GetMipHeight(level), in_params.GetMipHeight(level))}; - result.emplace_back(width, height, layer, level); - } - } - return result; - - } else { - result.reserve(mipmaps); - for (u32 level = 0; level < mipmaps; level++) { - const u32 width{std::min(params.GetMipWidth(level), in_params.GetMipWidth(level))}; - const u32 height{ - std::min(params.GetMipHeight(level), in_params.GetMipHeight(level))}; - const u32 depth{std::min(params.GetMipDepth(level), in_params.GetMipDepth(level))}; - result.emplace_back(width, height, depth, level); - } - return result; - } + return params.is_layered ? BreakDownLayered(in_params) : BreakDownNonLayered(in_params); } protected: @@ -203,6 +175,37 @@ protected: private: void SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, u8* buffer, u32 level); + + std::vector BreakDownLayered(const SurfaceParams& in_params) const { + const u32 layers{params.depth}; + const u32 mipmaps{params.num_levels}; + std::vector result; + result.reserve(static_cast(layers) * static_cast(mipmaps)); + + for (u32 layer = 0; layer < layers; layer++) { + for (u32 level = 0; level < mipmaps; level++) { + const u32 width{std::min(params.GetMipWidth(level), in_params.GetMipWidth(level))}; + const u32 height{ + std::min(params.GetMipHeight(level), in_params.GetMipHeight(level))}; + result.emplace_back(width, height, layer, level); + } + } + return result; + } + + std::vector BreakDownNonLayered(const SurfaceParams& in_params) const { + const u32 mipmaps{params.num_levels}; + std::vector result; + result.reserve(mipmaps); + + for (u32 level = 0; level < mipmaps; level++) { + const u32 width{std::min(params.GetMipWidth(level), in_params.GetMipWidth(level))}; + const u32 height{std::min(params.GetMipHeight(level), in_params.GetMipHeight(level))}; + const u32 depth{std::min(params.GetMipDepth(level), in_params.GetMipDepth(level))}; + result.emplace_back(width, height, depth, level); + } + return result; + } }; template -- cgit v1.2.3 From 549fd18ac44c6bcefdf6584484d775f0129e3fe3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 7 May 2019 22:03:33 -0300 Subject: surface_view: Add constructor for ViewParams --- .../renderer_opengl/gl_texture_cache.cpp | 15 +++------ src/video_core/texture_cache/surface_base.h | 39 +++++++--------------- src/video_core/texture_cache/surface_view.h | 8 ++++- 3 files changed, 23 insertions(+), 39 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 575608266..c6990ad21 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -226,13 +226,8 @@ CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& param target = GetTextureTarget(params.target); texture = CreateTexture(params, target, internal_format); DecorateSurfaceName(); - ViewParams main{}; - main.num_levels = params.num_levels; - main.base_level = 0; - main.base_layer = 0; - main.num_layers = params.is_layered ? params.depth : 1; - main.target = params.target; - main_view = CreateView(main); + main_view = CreateView( + ViewParams(params.target, 0, params.is_layered ? params.depth : 1, 0, params.num_levels)); main_view->DecorateViewName(gpu_addr, params.TargetName() + "V:" + std::to_string(view_count++)); } @@ -378,13 +373,11 @@ void CachedSurfaceView::Attach(GLenum attachment) const { switch (owner_params.target) { case SurfaceTarget::Texture1D: glFramebufferTexture1D(GL_DRAW_FRAMEBUFFER, attachment, surface.GetTarget(), - surface.GetTexture(), - params.base_level); + surface.GetTexture(), params.base_level); break; case SurfaceTarget::Texture2D: glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, attachment, surface.GetTarget(), - surface.GetTexture(), - params.base_level); + surface.GetTexture(), params.base_level); break; case SurfaceTarget::Texture1DArray: case SurfaceTarget::Texture2DArray: diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 0cfb835d9..f469ab498 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -253,45 +253,30 @@ public: } TView EmplaceOverview(const SurfaceParams& overview_params) { - ViewParams vp{}; - vp.base_level = 0; - vp.num_levels = params.num_levels; - vp.target = overview_params.target; - if (params.is_layered && !overview_params.is_layered) { - vp.base_layer = 0; - vp.num_layers = 1; - } else { - vp.base_layer = 0; - vp.num_layers = params.depth; - } - return GetView(vp); + const u32 num_layers{params.is_layered && !overview_params.is_layered ? 1 : params.depth}; + const ViewParams view_params(overview_params.target, 0, num_layers, 0, params.num_levels); + return GetView(view_params); } std::optional EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr) { - if (view_addr < gpu_addr) - return {}; - if (params.target == SurfaceTarget::Texture3D || + if (view_addr < gpu_addr || params.target == SurfaceTarget::Texture3D || view_params.target == SurfaceTarget::Texture3D) { return {}; } - const std::size_t size = view_params.GetGuestSizeInBytes(); - auto layer_mipmap = GetLayerMipmap(view_addr); + const std::size_t size{view_params.GetGuestSizeInBytes()}; + const auto layer_mipmap{GetLayerMipmap(view_addr)}; if (!layer_mipmap) { return {}; } - const u32 layer = (*layer_mipmap).first; - const u32 mipmap = (*layer_mipmap).second; + const u32 layer{layer_mipmap->first}; + const u32 mipmap{layer_mipmap->second}; if (GetMipmapSize(mipmap) != size) { - // TODO: the view may cover many mimaps, this case can still go on + // TODO: The view may cover many mimaps, this case can still go on. + // This edge-case can be safely be ignored since it will just result in worse + // performance. return {}; } - ViewParams vp{}; - vp.base_layer = layer; - vp.num_layers = 1; - vp.base_level = mipmap; - vp.num_levels = 1; - vp.target = view_params.target; - return {GetView(vp)}; + return GetView(ViewParams(params.target, layer, 1, mipmap, 1)); } TView GetMainView() const { diff --git a/src/video_core/texture_cache/surface_view.h b/src/video_core/texture_cache/surface_view.h index c122800a6..1ef4509ce 100644 --- a/src/video_core/texture_cache/surface_view.h +++ b/src/video_core/texture_cache/surface_view.h @@ -13,15 +13,21 @@ namespace VideoCommon { struct ViewParams { + ViewParams(VideoCore::Surface::SurfaceTarget target, u32 base_layer, u32 num_layers, + u32 base_level, u32 num_levels) + : target{target}, base_layer{base_layer}, num_layers{num_layers}, base_level{base_level}, + num_levels{num_levels} {} + std::size_t Hash() const; bool operator==(const ViewParams& rhs) const; + VideoCore::Surface::SurfaceTarget target{}; u32 base_layer{}; u32 num_layers{}; u32 base_level{}; u32 num_levels{}; - VideoCore::Surface::SurfaceTarget target; + bool IsLayered() const { switch (target) { case VideoCore::Surface::SurfaceTarget::Texture1DArray: -- cgit v1.2.3 From 324e470879e63423844a687f7d675a0536006f07 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 7 May 2019 23:13:05 -0400 Subject: Texture Cache: Implement Blitting and Fermi Copies --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 4 +- src/video_core/renderer_opengl/gl_rasterizer.h | 1 - .../renderer_opengl/gl_texture_cache.cpp | 70 ++++++++++++++++++- src/video_core/renderer_opengl/gl_texture_cache.h | 8 +++ src/video_core/renderer_opengl/utils.cpp | 78 ---------------------- src/video_core/renderer_opengl/utils.h | 13 ---- src/video_core/texture_cache/texture_cache.h | 19 ++++-- 7 files changed, 93 insertions(+), 100 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 77ac963b4..d0e7b61e7 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -747,9 +747,7 @@ bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs const Common::Rectangle& src_rect, const Common::Rectangle& dst_rect) { MICROPROFILE_SCOPE(OpenGL_Blits); - const auto src_surface{texture_cache.GetFermiSurface(src)}; - const auto dst_surface{texture_cache.GetFermiSurface(dst)}; - // blitter.Blit(src_surface, dst_surface, src_rect, dst_rect); + texture_cache.DoFermiCopy(src, dst, src_rect, dst_rect); return true; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 5c37d3bfa..d872e5110 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -208,7 +208,6 @@ private: static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; OGLBufferCache buffer_cache; - SurfaceBlitter blitter; BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index c6990ad21..a58e3a816 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -9,6 +9,7 @@ #include "core/core.h" #include "video_core/morton.h" #include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/utils.h" #include "video_core/texture_cache/texture_cache.h" @@ -23,6 +24,7 @@ using VideoCore::MortonSwizzleMode; using VideoCore::Surface::ComponentType; using VideoCore::Surface::PixelFormat; using VideoCore::Surface::SurfaceTarget; +using VideoCore::Surface::SurfaceType; MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128)); MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128)); @@ -422,7 +424,10 @@ OGLTextureView CachedSurfaceView::CreateTextureView() const { TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer) - : TextureCacheBase{system, rasterizer} {} + : TextureCacheBase{system, rasterizer} { + src_framebuffer.Create(); + dst_framebuffer.Create(); +} TextureCacheOpenGL::~TextureCacheOpenGL() = default; @@ -443,4 +448,67 @@ void TextureCacheOpenGL::ImageCopy(Surface src_surface, Surface dst_surface, copy_params.depth); } +void TextureCacheOpenGL::ImageBlit(Surface src_surface, Surface dst_surface, + const Common::Rectangle& src_rect, + const Common::Rectangle& dst_rect) { + const auto& src_params{src_surface->GetSurfaceParams()}; + const auto& dst_params{dst_surface->GetSurfaceParams()}; + + OpenGLState prev_state{OpenGLState::GetCurState()}; + SCOPE_EXIT({ prev_state.Apply(); }); + + OpenGLState state; + state.draw.read_framebuffer = src_framebuffer.handle; + state.draw.draw_framebuffer = dst_framebuffer.handle; + state.ApplyFramebufferState(); + + u32 buffers{}; + + UNIMPLEMENTED_IF(src_params.target != SurfaceTarget::Texture2D); + UNIMPLEMENTED_IF(dst_params.target != SurfaceTarget::Texture2D); + + const GLuint src_texture{src_surface->GetTexture()}; + const GLuint dst_texture{dst_surface->GetTexture()}; + + if (src_params.type == SurfaceType::ColorTexture) { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + src_texture, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, + 0); + + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + dst_texture, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, + 0); + + buffers = GL_COLOR_BUFFER_BIT; + } else if (src_params.type == SurfaceType::Depth) { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, src_texture, + 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_texture, + 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + + buffers = GL_DEPTH_BUFFER_BIT; + } else if (src_params.type == SurfaceType::DepthStencil) { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + src_texture, 0); + + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + dst_texture, 0); + + buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; + } + + glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left, + dst_rect.top, dst_rect.right, dst_rect.bottom, buffers, + buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST); +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 083b5406b..1ad01137b 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -128,8 +128,16 @@ public: protected: Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override; + void ImageCopy(Surface src_surface, Surface dst_surface, const VideoCommon::CopyParams& copy_params) override; + + void ImageBlit(Surface src_surface, Surface dst_surface, const Common::Rectangle& src_rect, + const Common::Rectangle& dst_rect) override; + +private: + OGLFramebuffer src_framebuffer; + OGLFramebuffer dst_framebuffer; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp index a9fa539a5..68c36988d 100644 --- a/src/video_core/renderer_opengl/utils.cpp +++ b/src/video_core/renderer_opengl/utils.cpp @@ -9,19 +9,10 @@ #include "common/assert.h" #include "common/common_types.h" #include "common/scope_exit.h" -#include "video_core/renderer_opengl/gl_state.h" -#include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/utils.h" -#include "video_core/surface.h" namespace OpenGL { -using Tegra::Shader::TextureType; -using Tegra::Texture::SwizzleSource; - -using VideoCore::Surface::SurfaceTarget; -using VideoCore::Surface::SurfaceType; - BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {} BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default; @@ -49,75 +40,6 @@ void BindBuffersRangePushBuffer::Bind() const { sizes.data()); } -SurfaceBlitter::SurfaceBlitter() { - src_framebuffer.Create(); - dst_framebuffer.Create(); -} - -SurfaceBlitter::~SurfaceBlitter() = default; - -void SurfaceBlitter::Blit(View src, View dst, const Common::Rectangle& src_rect, - const Common::Rectangle& dst_rect) const { - const auto& src_params{src->GetSurfaceParams()}; - const auto& dst_params{dst->GetSurfaceParams()}; - - OpenGLState prev_state{OpenGLState::GetCurState()}; - SCOPE_EXIT({ prev_state.Apply(); }); - - OpenGLState state; - state.draw.read_framebuffer = src_framebuffer.handle; - state.draw.draw_framebuffer = dst_framebuffer.handle; - state.ApplyFramebufferState(); - - u32 buffers{}; - - UNIMPLEMENTED_IF(src_params.target != SurfaceTarget::Texture2D); - UNIMPLEMENTED_IF(dst_params.target != SurfaceTarget::Texture2D); - - const GLuint src_texture{src->GetTexture()}; - const GLuint dst_texture{dst->GetTexture()}; - - if (src_params.type == SurfaceType::ColorTexture) { - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, - src_texture, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, - 0); - - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, - dst_texture, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, - 0); - - buffers = GL_COLOR_BUFFER_BIT; - } else if (src_params.type == SurfaceType::Depth) { - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, src_texture, - 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_texture, - 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - - buffers = GL_DEPTH_BUFFER_BIT; - } else if (src_params.type == SurfaceType::DepthStencil) { - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - src_texture, 0); - - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - dst_texture, 0); - - buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; - } - - glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left, - dst_rect.top, dst_rect.right, dst_rect.bottom, buffers, - buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST); -} - void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info) { if (!GLAD_GL_KHR_debug) { // We don't need to throw an error as this is just for debugging diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h index 8977d2383..77e8d53ba 100644 --- a/src/video_core/renderer_opengl/utils.h +++ b/src/video_core/renderer_opengl/utils.h @@ -34,19 +34,6 @@ private: std::vector sizes; }; -class SurfaceBlitter { -public: - explicit SurfaceBlitter(); - ~SurfaceBlitter(); - - void Blit(View src, View dst, const Common::Rectangle& src_rect, - const Common::Rectangle& dst_rect) const; - -private: - OGLFramebuffer src_framebuffer; - OGLFramebuffer dst_framebuffer; -}; - void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {}); } // namespace OpenGL diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index c9a648bbd..bb5a50ab9 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -15,6 +15,7 @@ #include "common/assert.h" #include "common/common_types.h" +#include "common/math_util.h" #include "core/memory.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/maxwell_3d.h" @@ -142,10 +143,11 @@ public: } } - TView GetFermiSurface(const Tegra::Engines::Fermi2D::Regs::Surface& config) { - SurfaceParams params = SurfaceParams::CreateForFermiCopySurface(config); - const GPUVAddr gpu_addr = config.Address(); - return GetSurface(gpu_addr, params, true).second; + void DoFermiCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, + const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, + const Common::Rectangle& src_rect, + const Common::Rectangle& dst_rect) { + ImageBlit(GetFermiSurface(src_config), GetFermiSurface(dst_config), src_rect, dst_rect); } TSurface TryFindFramebufferSurface(const u8* host_ptr) { @@ -183,6 +185,9 @@ protected: virtual void ImageCopy(TSurface src_surface, TSurface dst_surface, const CopyParams& copy_params) = 0; + virtual void ImageBlit(TSurface src, TSurface dst, const Common::Rectangle& src_rect, + const Common::Rectangle& dst_rect) = 0; + void Register(TSurface surface) { const GPUVAddr gpu_addr = surface->GetGpuAddr(); const CacheAddr cache_ptr = ToCacheAddr(memory_manager->GetPointer(gpu_addr)); @@ -223,6 +228,12 @@ protected: return new_surface; } + TSurface GetFermiSurface(const Tegra::Engines::Fermi2D::Regs::Surface& config) { + SurfaceParams params = SurfaceParams::CreateForFermiCopySurface(config); + const GPUVAddr gpu_addr = config.Address(); + return GetSurface(gpu_addr, params, true).first; + } + Core::System& system; private: -- cgit v1.2.3 From e0002599accc783be1bda5853df377c84ee6219a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 8 May 2019 03:51:54 -0300 Subject: surface_base: Add parenthesis to EmplaceOverview's predicate --- src/video_core/texture_cache/surface_base.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index f469ab498..c11998249 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -253,9 +253,8 @@ public: } TView EmplaceOverview(const SurfaceParams& overview_params) { - const u32 num_layers{params.is_layered && !overview_params.is_layered ? 1 : params.depth}; - const ViewParams view_params(overview_params.target, 0, num_layers, 0, params.num_levels); - return GetView(view_params); + const u32 num_layers{(params.is_layered && !overview_params.is_layered) ? 1 : params.depth}; + return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels)); } std::optional EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr) { -- cgit v1.2.3 From de0b1cb2b2199bd8efff78938d385fa74652cdfb Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 8 May 2019 07:09:02 -0400 Subject: Fixes to mipmap's process and reconstruct process --- src/video_core/texture_cache/surface_base.h | 4 ++-- src/video_core/texture_cache/texture_cache.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index c11998249..017ee999e 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -262,20 +262,20 @@ public: view_params.target == SurfaceTarget::Texture3D) { return {}; } - const std::size_t size{view_params.GetGuestSizeInBytes()}; const auto layer_mipmap{GetLayerMipmap(view_addr)}; if (!layer_mipmap) { return {}; } const u32 layer{layer_mipmap->first}; const u32 mipmap{layer_mipmap->second}; + const std::size_t size{view_params.GetGuestSizeInBytes()}; if (GetMipmapSize(mipmap) != size) { // TODO: The view may cover many mimaps, this case can still go on. // This edge-case can be safely be ignored since it will just result in worse // performance. return {}; } - return GetView(ViewParams(params.target, layer, 1, mipmap, 1)); + return GetView(ViewParams(view_params.target, layer, 1, mipmap, 1)); } TView GetMainView() const { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index bb5a50ab9..554b9a228 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -323,7 +323,7 @@ private: const SurfaceParams& params, const GPUVAddr gpu_addr, const u8* host_ptr) { - if (!params.is_layered || params.target == SurfaceTarget::Texture3D) { + if (params.target == SurfaceTarget::Texture3D) { return {}; } TSurface new_surface = GetUncachedSurface(gpu_addr, params); -- cgit v1.2.3 From ba677ccb5a8ae0c889751fcdd40b0c9e818ad992 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 8 May 2019 10:32:30 -0400 Subject: texture_cache: Implement guest flushing --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 6 ++++-- src/video_core/texture_cache/surface_base.cpp | 19 +++++++++++-------- src/video_core/texture_cache/texture_cache.h | 14 ++++++++++++++ 3 files changed, 29 insertions(+), 10 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index d0e7b61e7..63ee83391 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -722,7 +722,7 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { if (!addr || !size) { return; } - // texture_cache.FlushRegion(addr, size); + texture_cache.FlushRegion(addr, size); global_cache.FlushRegion(addr, size); } @@ -738,7 +738,9 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { } void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { - FlushRegion(addr, size); + if (Settings::values.use_accurate_gpu_emulation) { + FlushRegion(addr, size); + } InvalidateRegion(addr, size); } diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 5e994cf08..dc5013240 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -63,6 +63,9 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, std::vector& staging_buffer) { MICROPROFILE_SCOPE(GPU_Load_Texture); const auto host_ptr{memory_manager.GetPointer(gpu_addr)}; + if (!host_ptr) { + return; + } if (params.is_tiled) { ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture target {}", params.block_width, static_cast(params.target)); @@ -103,7 +106,10 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, std::vector& staging_buffer) { MICROPROFILE_SCOPE(GPU_Flush_Texture); - auto host_ptr = memory_manager.GetPointer(gpu_addr); + const auto host_ptr{memory_manager.GetPointer(gpu_addr)}; + if (!host_ptr) { + return; + } if (params.is_tiled) { ASSERT_MSG(params.block_width == 1, "Block width is defined as {}", params.block_width); for (u32 level = 0; level < params.num_levels; ++level) { @@ -112,25 +118,22 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, staging_buffer.data() + host_offset, level); } } else { - UNIMPLEMENTED(); - /* ASSERT(params.target == SurfaceTarget::Texture2D); ASSERT(params.num_levels == 1); - const u32 bpp{params.GetFormatBpp() / 8}; + const u32 bpp{params.GetBytesPerPixel()}; const u32 copy_size{params.width * bpp}; if (params.pitch == copy_size) { - std::memcpy(host_ptr, staging_buffer.data(), memory_size); + std::memcpy(host_ptr, staging_buffer.data(), guest_memory_size); } else { u8* start{host_ptr}; const u8* read_to{staging_buffer.data()}; - for (u32 h = params.GetHeight(); h > 0; --h) { + for (u32 h = params.height; h > 0; --h) { std::memcpy(start, read_to, copy_size); - start += params.GetPitch(); + start += params.pitch; read_to += copy_size; } } - */ } } diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 554b9a228..422bf3e58 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -61,6 +61,20 @@ public: } } + void FlushRegion(CacheAddr addr, std::size_t size) { + auto surfaces = GetSurfacesInRegion(addr, size); + if (surfaces.empty()) { + return; + } + std::sort(surfaces.begin(), surfaces.end(), + [](const TSurface& a, const TSurface& b) -> bool { + return a->GetModificationTick() < b->GetModificationTick(); + }); + for (const auto& surface : surfaces) { + FlushSurface(surface); + } + } + TView GetTextureSurface(const Tegra::Texture::FullTextureInfo& config, const VideoCommon::Shader::Sampler& entry) { const auto gpu_addr{config.tic.Address()}; -- cgit v1.2.3 From 4e2071b6d9b414fa0152deb5e9d55674d636afe4 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 8 May 2019 17:45:59 -0400 Subject: texture_cache: Correct premature texceptions Due to our current infrastructure, it is possible for a mipmap to be set on as a render target before a texception of that mipmap's superset be set afterwards. This is problematic as we rely on texture views to set up texceptions and protecting render targets targets for 3D texture rendering. One simple solution is to configure framebuffers after texture setup but this brings other problems. This solution, forces a reconfiguration of the framebuffers after such event happens. --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 15 +++++++++----- src/video_core/renderer_opengl/gl_rasterizer.h | 7 +++++-- src/video_core/texture_cache/surface_base.h | 17 +++++++++++++--- src/video_core/texture_cache/texture_cache.h | 26 ++++++++++++++++++++---- 4 files changed, 51 insertions(+), 14 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 63ee83391..3baf1522d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -461,15 +461,15 @@ void RasterizerOpenGL::LoadDiskResources(const std::atomic_bool& stop_loading, } std::pair RasterizerOpenGL::ConfigureFramebuffers( - OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents, - std::optional single_color_target) { + OpenGLState& current_state, bool must_reconfigure, bool using_color_fb, bool using_depth_fb, + bool preserve_contents, std::optional single_color_target) { MICROPROFILE_SCOPE(OpenGL_Framebuffer); auto& gpu = system.GPU().Maxwell3D(); const auto& regs = gpu.regs; const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents, single_color_target}; - if (fb_config_state == current_framebuffer_config_state && + if (!must_reconfigure && fb_config_state == current_framebuffer_config_state && gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) { // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or // single color targets). This is done because the guest registers may not change but the @@ -622,8 +622,9 @@ void RasterizerOpenGL::Clear() { return; } - const auto [clear_depth, clear_stencil] = ConfigureFramebuffers( - clear_state, use_color, use_depth || use_stencil, false, regs.clear_buffers.RT.Value()); + const auto [clear_depth, clear_stencil] = + ConfigureFramebuffers(clear_state, false, use_color, use_depth || use_stencil, false, + regs.clear_buffers.RT.Value()); if (regs.clear_flags.scissor) { SyncScissorTest(clear_state); } @@ -705,6 +706,10 @@ void RasterizerOpenGL::DrawArrays() { DrawParameters params = SetupDraw(); SetupShaders(params.primitive_mode); + if (texture_cache.ConsumeReconfigurationFlag()) { + ConfigureFramebuffers(state, true); + } + buffer_cache.Unmap(); shader_program_manager->ApplyTo(state); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index d872e5110..970637efa 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -101,6 +101,8 @@ private: /** * Configures the color and depth framebuffer states. + * @param must_reconfigure If true, tells the framebuffer to skip the cache and reconfigure + * again. Used by the texture cache to solve texception conflicts * @param use_color_fb If true, configure color framebuffers. * @param using_depth_fb If true, configure the depth/stencil framebuffer. * @param preserve_contents If true, tries to preserve data from a previously used framebuffer. @@ -109,8 +111,9 @@ private: * (requires using_depth_fb to be true) */ std::pair ConfigureFramebuffers( - OpenGLState& current_state, bool use_color_fb = true, bool using_depth_fb = true, - bool preserve_contents = true, std::optional single_color_target = {}); + OpenGLState& current_state, bool must_reconfigure = false, bool use_color_fb = true, + bool using_depth_fb = true, bool preserve_contents = true, + std::optional single_color_target = {}); /// Configures the current constbuffers to use for the draw command. void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 017ee999e..179e80ddb 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -55,6 +55,11 @@ public: return (cache_addr < end) && (cache_addr_end > start); } + bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) { + const GPUVAddr gpu_addr_end = gpu_addr + guest_memory_size; + return (gpu_addr <= other_start && other_end <= gpu_addr_end); + } + // Use only when recycling a surface void SetGpuAddr(const GPUVAddr new_addr) { gpu_addr = new_addr; @@ -105,6 +110,12 @@ public: return params.target == target; } + bool MatchesSubTexture(const SurfaceParams& rhs, const GPUVAddr other_gpu_addr) const { + return std::tie(gpu_addr, params.target, params.num_levels) == + std::tie(other_gpu_addr, rhs.target, rhs.num_levels) && + params.target == SurfaceTarget::Texture2D && params.num_levels == 1; + } + bool MatchesTopology(const SurfaceParams& rhs) const { const u32 src_bpp{params.GetBytesPerPixel()}; const u32 dst_bpp{rhs.GetBytesPerPixel()}; @@ -121,9 +132,9 @@ public: } // Tiled surface if (std::tie(params.height, params.depth, params.block_width, params.block_height, - params.block_depth, params.tile_width_spacing) == + params.block_depth, params.tile_width_spacing, params.num_levels) == std::tie(rhs.height, rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth, - rhs.tile_width_spacing)) { + rhs.tile_width_spacing, rhs.num_levels)) { if (params.width == rhs.width) { return MatchStructureResult::FullMatch; } @@ -259,7 +270,7 @@ public: std::optional EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr) { if (view_addr < gpu_addr || params.target == SurfaceTarget::Texture3D || - view_params.target == SurfaceTarget::Texture3D) { + params.num_levels == 1 || view_params.target == SurfaceTarget::Texture3D) { return {}; } const auto layer_mipmap{GetLayerMipmap(view_addr)}; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 422bf3e58..96d108147 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -120,6 +120,10 @@ public: return {}; } + if (regs.color_mask[index].raw == 0) { + return {}; + } + auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), preserve_contents); if (render_targets[index].target) @@ -183,6 +187,12 @@ public: return ++ticks; } + bool ConsumeReconfigurationFlag() { + const bool result = force_reconfiguration; + force_reconfiguration = false; + return result; + } + protected: TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) : system{system}, rasterizer{rasterizer} { @@ -219,9 +229,10 @@ protected: rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); } - void Unregister(TSurface surface) { - if (surface->IsProtected()) + void Unregister(TSurface surface, const bool force_unregister = false) { + if (surface->IsProtected() && !force_unregister) { return; + } const GPUVAddr gpu_addr = surface->GetGpuAddr(); const CacheAddr cache_ptr = surface->GetCacheAddr(); const std::size_t size = surface->GetSizeInBytes(); @@ -365,8 +376,10 @@ private: std::min(src_params.height, dst_height), 1); ImageCopy(surface, new_surface, copy_params); } + force_reconfiguration = false; for (auto surface : overlaps) { - Unregister(surface); + force_reconfiguration |= surface->IsProtected(); + Unregister(surface, true); } Register(new_surface); return {{new_surface, new_surface->GetMainView()}}; @@ -379,6 +392,7 @@ private: const auto cache_addr{ToCacheAddr(host_ptr)}; const std::size_t candidate_size = params.GetGuestSizeInBytes(); auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)}; + if (overlaps.empty()) { return InitializeSurface(gpu_addr, params, preserve_contents); } @@ -403,7 +417,7 @@ private: return RebuildSurface(current_surface, params); } } - if (current_surface->GetSizeInBytes() <= candidate_size) { + if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, false); } @@ -530,6 +544,10 @@ private: u64 ticks{}; + // Sometimes Setup Textures can hit a surface that's on the render target, when this happens + // we force a reconfiguration of the frame buffer after setup. + bool force_reconfiguration; + // The internal Cache is different for the Texture Cache. It's based on buckets // of 1MB. This fits better for the purpose of this cache as textures are normaly // large in size. -- cgit v1.2.3 From b347543e8341ae323ea232d47df2c144fe21c739 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 8 May 2019 18:27:29 -0400 Subject: Reduce amount of size calculations. --- src/common/common_funcs.h | 11 +++++ .../renderer_opengl/gl_texture_cache.cpp | 1 - src/video_core/renderer_opengl/gl_texture_cache.h | 2 +- src/video_core/texture_cache/surface_base.cpp | 22 +++++++--- src/video_core/texture_cache/surface_base.h | 28 +++++------- src/video_core/texture_cache/surface_params.cpp | 31 +------------- src/video_core/texture_cache/surface_params.h | 50 +++++++++++++++++----- src/video_core/texture_cache/texture_cache.h | 40 ++++++++--------- 8 files changed, 97 insertions(+), 88 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/common/common_funcs.h b/src/common/common_funcs.h index 8b0d34da6..00a5698f3 100644 --- a/src/common/common_funcs.h +++ b/src/common/common_funcs.h @@ -4,6 +4,7 @@ #pragma once +#include #include #if !defined(ARCHITECTURE_x86_64) @@ -60,4 +61,14 @@ constexpr u32 MakeMagic(char a, char b, char c, char d) { return a | b << 8 | c << 16 | d << 24; } +template > +ForwardIt BinaryFind(ForwardIt first, ForwardIt last, const T& value, Compare comp = {}) { + // Note: BOTH type T and the type after ForwardIt is dereferenced + // must be implicitly convertible to BOTH Type1 and Type2, used in Compare. + // This is stricter than lower_bound requirement (see above) + + first = std::lower_bound(first, last, value, comp); + return first != last && !comp(value, *first) ? first : last; +} + } // namespace Common diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index a58e3a816..32cb08963 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -240,7 +240,6 @@ CachedSurface::~CachedSurface() { } void CachedSurface::DownloadTexture(std::vector& staging_buffer) { - LOG_CRITICAL(Render_OpenGL, "Flushing"); MICROPROFILE_SCOPE(OpenGL_Texture_Download); // TODO(Rodrigo): Optimize alignment diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 1ad01137b..0a1b57014 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -133,7 +133,7 @@ protected: const VideoCommon::CopyParams& copy_params) override; void ImageBlit(Surface src_surface, Surface dst_surface, const Common::Rectangle& src_rect, - const Common::Rectangle& dst_rect) override; + const Common::Rectangle& dst_rect) override; private: OGLFramebuffer src_framebuffer; diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index dc5013240..36ca72b4a 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -19,19 +19,27 @@ using Tegra::Texture::ConvertFromGuestToHost; using VideoCore::MortonSwizzleMode; SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params) - : params{params}, gpu_addr{gpu_addr}, layer_size{params.GetGuestLayerSize()}, - guest_memory_size{params.GetGuestSizeInBytes()}, host_memory_size{ - params.GetHostSizeInBytes()} { - mipmap_offsets.reserve(params.num_levels); - mipmap_sizes.reserve(params.num_levels); + : params{params}, mipmap_sizes(params.num_levels), + mipmap_offsets(params.num_levels), gpu_addr{gpu_addr}, host_memory_size{ + params.GetHostSizeInBytes()} { std::size_t offset = 0; for (u32 level = 0; level < params.num_levels; ++level) { const std::size_t mipmap_size{params.GetGuestMipmapSize(level)}; - mipmap_sizes.push_back(mipmap_size); - mipmap_offsets.push_back(offset); + mipmap_sizes[level] = mipmap_size; + mipmap_offsets[level] = offset; offset += mipmap_size; } + layer_size = offset; + if (params.is_layered) { + if (params.is_tiled) { + layer_size = + SurfaceParams::AlignLayered(layer_size, params.block_height, params.block_depth); + } + guest_memory_size = layer_size * params.depth; + } else { + guest_memory_size = layer_size; + } } void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 179e80ddb..095deb602 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -9,6 +9,7 @@ #include #include "common/assert.h" +#include "common/common_funcs.h" #include "common/common_types.h" #include "video_core/gpu.h" #include "video_core/morton.h" @@ -16,16 +17,6 @@ #include "video_core/texture_cache/surface_params.h" #include "video_core/texture_cache/surface_view.h" -template > -ForwardIt binary_find(ForwardIt first, ForwardIt last, const T& value, Compare comp = {}) { - // Note: BOTH type T and the type after ForwardIt is dereferenced - // must be implicitly convertible to BOTH Type1 and Type2, used in Compare. - // This is stricter than lower_bound requirement (see above) - - first = std::lower_bound(first, last, value, comp); - return first != last && !comp(value, *first) ? first : last; -} - namespace Tegra { class MemoryManager; } @@ -153,7 +144,7 @@ public: const auto layer{static_cast(relative_address / layer_size)}; const GPUVAddr mipmap_address = relative_address - layer_size * layer; const auto mipmap_it = - binary_find(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); + Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); if (mipmap_it == mipmap_offsets.end()) { return {}; } @@ -172,8 +163,8 @@ protected: virtual void DecorateSurfaceName() = 0; const SurfaceParams params; - const std::size_t layer_size; - const std::size_t guest_memory_size; + std::size_t layer_size; + std::size_t guest_memory_size; const std::size_t host_memory_size; GPUVAddr gpu_addr{}; CacheAddr cache_addr{}; @@ -268,9 +259,11 @@ public: return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels)); } - std::optional EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr) { - if (view_addr < gpu_addr || params.target == SurfaceTarget::Texture3D || - params.num_levels == 1 || view_params.target == SurfaceTarget::Texture3D) { + std::optional EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr, + const std::size_t candidate_size) { + if (params.target == SurfaceTarget::Texture3D || + (params.num_levels == 1 && !params.is_layered) || + view_params.target == SurfaceTarget::Texture3D) { return {}; } const auto layer_mipmap{GetLayerMipmap(view_addr)}; @@ -279,8 +272,7 @@ public: } const u32 layer{layer_mipmap->first}; const u32 mipmap{layer_mipmap->second}; - const std::size_t size{view_params.GetGuestSizeInBytes()}; - if (GetMipmapSize(mipmap) != size) { + if (GetMipmapSize(mipmap) != candidate_size) { // TODO: The view may cover many mimaps, this case can still go on. // This edge-case can be safely be ignored since it will just result in worse // performance. diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index d9052152c..b537b26e2 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -4,13 +4,12 @@ #include -#include "common/cityhash.h" #include "common/alignment.h" +#include "common/cityhash.h" #include "core/core.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/surface.h" #include "video_core/texture_cache/surface_params.h" -#include "video_core/textures/decoders.h" namespace VideoCommon { @@ -169,18 +168,6 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface( return params; } -u32 SurfaceParams::GetMipWidth(u32 level) const { - return std::max(1U, width >> level); -} - -u32 SurfaceParams::GetMipHeight(u32 level) const { - return std::max(1U, height >> level); -} - -u32 SurfaceParams::GetMipDepth(u32 level) const { - return is_layered ? depth : std::max(1U, depth >> level); -} - bool SurfaceParams::IsLayered() const { switch (target) { case SurfaceTarget::Texture1DArray: @@ -275,22 +262,6 @@ std::size_t SurfaceParams::GetHostLayerSize(u32 level) const { return GetInnerMipmapMemorySize(level, true, false); } -u32 SurfaceParams::GetDefaultBlockWidth() const { - return VideoCore::Surface::GetDefaultBlockWidth(pixel_format); -} - -u32 SurfaceParams::GetDefaultBlockHeight() const { - return VideoCore::Surface::GetDefaultBlockHeight(pixel_format); -} - -u32 SurfaceParams::GetBitsPerPixel() const { - return VideoCore::Surface::GetFormatBpp(pixel_format); -} - -u32 SurfaceParams::GetBytesPerPixel() const { - return VideoCore::Surface::GetBytesPerPixel(pixel_format); -} - bool SurfaceParams::IsPixelFormatZeta() const { return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat && pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat; diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index ec8efa210..e0ec1be0e 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -10,8 +10,9 @@ #include "common/common_types.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/surface.h" #include "video_core/shader/shader_ir.h" +#include "video_core/surface.h" +#include "video_core/textures/decoders.h" namespace VideoCommon { @@ -50,10 +51,17 @@ public: std::size_t GetHostSizeInBytes() const { std::size_t host_size_in_bytes; if (IsPixelFormatASTC(pixel_format)) { + constexpr std::size_t rgb8_bpp = 4ULL; // ASTC is uncompressed in software, in emulated as RGBA8 - host_size_in_bytes = static_cast(Common::AlignUp(width, GetDefaultBlockWidth())) * - static_cast(Common::AlignUp(height, GetDefaultBlockHeight())) * - static_cast(depth) * 4ULL; + host_size_in_bytes = 0; + for (std::size_t level = 0; level < num_levels; level++) { + const std::size_t width = + Common::AlignUp(GetMipWidth(level), GetDefaultBlockWidth()); + const std::size_t height = + Common::AlignUp(GetMipHeight(level), GetDefaultBlockHeight()); + const std::size_t depth = is_layered ? depth : GetMipDepth(level); + host_size_in_bytes += width * height * depth * rgb8_bpp; + } } else { host_size_in_bytes = GetInnerMemorySize(true, false, false); } @@ -65,13 +73,19 @@ public: } /// Returns the width of a given mipmap level. - u32 GetMipWidth(u32 level) const; + u32 GetMipWidth(u32 level) const { + return std::max(1U, width >> level); + } /// Returns the height of a given mipmap level. - u32 GetMipHeight(u32 level) const; + u32 GetMipHeight(u32 level) const { + return std::max(1U, height >> level); + } /// Returns the depth of a given mipmap level. - u32 GetMipDepth(u32 level) const; + u32 GetMipDepth(u32 level) const { + return is_layered ? depth : std::max(1U, depth >> level); + } /// Returns the block height of a given mipmap level. u32 GetMipBlockHeight(u32 level) const; @@ -79,6 +93,12 @@ public: /// Returns the block depth of a given mipmap level. u32 GetMipBlockDepth(u32 level) const; + // Helper used for out of class size calculations + static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height, + const u32 block_depth) { + return Common::AlignUp(out_size, Tegra::Texture::GetGOBSize() * block_height * block_depth); + } + /// Returns the offset in bytes in guest memory of a given mipmap level. std::size_t GetGuestMipmapLevelOffset(u32 level) const; @@ -98,16 +118,24 @@ public: std::size_t GetHostLayerSize(u32 level) const; /// Returns the default block width. - u32 GetDefaultBlockWidth() const; + u32 GetDefaultBlockWidth() const { + return VideoCore::Surface::GetDefaultBlockWidth(pixel_format); + } /// Returns the default block height. - u32 GetDefaultBlockHeight() const; + u32 GetDefaultBlockHeight() const { + return VideoCore::Surface::GetDefaultBlockHeight(pixel_format); + } /// Returns the bits per pixel. - u32 GetBitsPerPixel() const; + u32 GetBitsPerPixel() const { + return VideoCore::Surface::GetFormatBpp(pixel_format); + } /// Returns the bytes per pixel. - u32 GetBytesPerPixel() const; + u32 GetBytesPerPixel() const { + return VideoCore::Surface::GetBytesPerPixel(pixel_format); + } /// Returns true if the pixel format is a depth and/or stencil format. bool IsPixelFormatZeta() const; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 96d108147..fbfd1ff0b 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -120,10 +120,6 @@ public: return {}; } - if (regs.color_mask[index].raw == 0) { - return {}; - } - auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), preserve_contents); if (render_targets[index].target) @@ -165,7 +161,9 @@ public: const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, const Common::Rectangle& src_rect, const Common::Rectangle& dst_rect) { - ImageBlit(GetFermiSurface(src_config), GetFermiSurface(dst_config), src_rect, dst_rect); + TSurface dst_surface = GetFermiSurface(dst_config); + ImageBlit(GetFermiSurface(src_config), dst_surface, src_rect, dst_rect); + dst_surface->MarkAsModified(true, Tick()); } TSurface TryFindFramebufferSurface(const u8* host_ptr) { @@ -270,10 +268,6 @@ private: RecycleStrategy PickStrategy(std::vector& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, const bool untopological) { - // Untopological decision - if (untopological) { - return RecycleStrategy::Ignore; - } // 3D Textures decision if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) { return RecycleStrategy::Flush; @@ -284,12 +278,16 @@ private: return RecycleStrategy::Flush; } } + // Untopological decision + if (untopological) { + return RecycleStrategy::Ignore; + } return RecycleStrategy::Ignore; } std::pair RecycleSurface(std::vector& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, - const u8* host_ptr, const bool preserve_contents, + const bool preserve_contents, const bool untopological) { for (auto surface : overlaps) { Unregister(surface); @@ -328,6 +326,7 @@ private: } Unregister(current_surface); Register(new_surface); + new_surface->MarkAsModified(current_surface->IsModified(), Tick()); return {new_surface, new_surface->GetMainView()}; } @@ -351,6 +350,7 @@ private: if (params.target == SurfaceTarget::Texture3D) { return {}; } + bool modified = false; TSurface new_surface = GetUncachedSurface(gpu_addr, params); for (auto surface : overlaps) { const SurfaceParams& src_params = surface->GetSurfaceParams(); @@ -358,7 +358,7 @@ private: // We send this cases to recycle as they are more complex to handle return {}; } - const std::size_t candidate_size = src_params.GetGuestSizeInBytes(); + const std::size_t candidate_size = surface->GetSizeInBytes(); auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; if (!mipmap_layer) { return {}; @@ -368,6 +368,7 @@ private: if (new_surface->GetMipmapSize(mipmap) != candidate_size) { return {}; } + modified |= surface->IsModified(); // Now we got all the data set up const u32 dst_width{params.GetMipWidth(mipmap)}; const u32 dst_height{params.GetMipHeight(mipmap)}; @@ -381,6 +382,7 @@ private: force_reconfiguration |= surface->IsProtected(); Unregister(surface, true); } + new_surface->MarkAsModified(modified, Tick()); Register(new_surface); return {{new_surface, new_surface->GetMainView()}}; } @@ -399,8 +401,7 @@ private: for (auto surface : overlaps) { if (!surface->MatchesTopology(params)) { - return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, - true); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, true); } } @@ -418,27 +419,26 @@ private: } } if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { - return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, - false); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } - std::optional view = current_surface->EmplaceView(params, gpu_addr); + std::optional view = + current_surface->EmplaceView(params, gpu_addr, candidate_size); if (view.has_value()) { const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); if (is_mirage) { LOG_CRITICAL(HW_GPU, "Mirage View Unsupported"); - return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, - false); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } return {current_surface, *view}; } - return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, false); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } else { std::optional> view = ReconstructSurface(overlaps, params, gpu_addr, host_ptr); if (view.has_value()) { return *view; } - return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, false); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } } -- cgit v1.2.3 From 28d7c2f5a5089051410d37a03d5a4a42e4230842 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 10 May 2019 01:10:16 -0300 Subject: texture_cache: Change internal cache from lists to vectors --- src/video_core/texture_cache/texture_cache.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index fbfd1ff0b..1c2b63dae 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -4,11 +4,11 @@ #pragma once -#include #include #include #include #include +#include #include #include @@ -172,7 +172,7 @@ public: return nullptr; } const CacheAddr page = cache_addr >> registry_page_bits; - std::list& list = registry[page]; + std::vector& list = registry[page]; for (auto& s : list) { if (s->GetCacheAddr() == cache_addr) { return s; @@ -482,7 +482,8 @@ private: CacheAddr start = surface->GetCacheAddr() >> registry_page_bits; const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; while (start <= end) { - registry[start].remove(surface); + auto& reg{registry[start]}; + reg.erase(std::find(reg.begin(), reg.end(), surface)); start++; } } @@ -496,7 +497,7 @@ private: const CacheAddr end = (cache_addr_end - 1) >> registry_page_bits; std::vector surfaces; while (start <= end) { - std::list& list = registry[start]; + std::vector& list = registry[start]; for (auto& s : list) { if (!s->IsPicked() && s->Overlaps(cache_addr, cache_addr_end)) { s->MarkAsPicked(true); @@ -553,12 +554,12 @@ private: // large in size. static constexpr u64 registry_page_bits{20}; static constexpr u64 registry_page_size{1 << registry_page_bits}; - std::unordered_map> registry; + std::unordered_map> registry; /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have /// previously been used. This is to prevent surfaces from being constantly created and /// destroyed when used with different surface parameters. - std::unordered_map> surface_reserve; + std::unordered_map> surface_reserve; std::array render_targets; DepthBufferInfo depth_buffer; -- cgit v1.2.3 From 345e73f2feb0701e3c3099d002a1c21fb524eae4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 10 May 2019 04:17:48 -0300 Subject: video_core: Use un-shifted block sizes to avoid integer divisions Instead of storing all block width, height and depths in their shifted form: block_width = 1U << block_shift; Store them like they are provided by the emulated hardware (their block_shift form). This way we can avoid doing the costly Common::AlignUp operation to align texture sizes and drop CPU integer divisions with bitwise logic (defined in Common::AlignBits). --- src/common/alignment.h | 5 +++ src/video_core/engines/fermi_2d.h | 9 ++-- src/video_core/engines/maxwell_dma.h | 4 +- src/video_core/texture_cache/surface_base.cpp | 3 +- src/video_core/texture_cache/surface_params.cpp | 39 +++++++++--------- src/video_core/texture_cache/surface_params.h | 7 ++-- src/video_core/texture_cache/texture_cache.h | 3 ++ src/video_core/textures/decoders.cpp | 55 ++++++++++++++++--------- src/video_core/textures/decoders.h | 4 +- src/video_core/textures/texture.h | 9 ++-- 10 files changed, 78 insertions(+), 60 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/common/alignment.h b/src/common/alignment.h index d94a2291f..3379a6967 100644 --- a/src/common/alignment.h +++ b/src/common/alignment.h @@ -19,6 +19,11 @@ constexpr T AlignDown(T value, std::size_t size) { return static_cast(value - value % size); } +template +constexpr T AlignBits(T value, T align) { + return (value + ((1 << align) - 1)) >> align << align; +} + template constexpr bool Is4KBAligned(T value) { static_assert(std::is_unsigned_v, "T must be an unsigned value."); diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 45f59a4d9..3d28afa91 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -63,18 +63,15 @@ public: } u32 BlockWidth() const { - // The block width is stored in log2 format. - return 1 << block_width; + return block_width; } u32 BlockHeight() const { - // The block height is stored in log2 format. - return 1 << block_height; + return block_height; } u32 BlockDepth() const { - // The block depth is stored in log2 format. - return 1 << block_depth; + return block_depth; } }; static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size"); diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index e5942f671..522fa97dc 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -59,11 +59,11 @@ public: }; u32 BlockHeight() const { - return 1 << block_height; + return block_height; } u32 BlockDepth() const { - return 1 << block_depth; + return block_depth; } }; diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 36ca72b4a..510d1aef5 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -22,7 +22,6 @@ SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params) : params{params}, mipmap_sizes(params.num_levels), mipmap_offsets(params.num_levels), gpu_addr{gpu_addr}, host_memory_size{ params.GetHostSizeInBytes()} { - std::size_t offset = 0; for (u32 level = 0; level < params.num_levels; ++level) { const std::size_t mipmap_size{params.GetGuestMipmapSize(level)}; @@ -75,7 +74,7 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, return; } if (params.is_tiled) { - ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture target {}", + ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}", params.block_width, static_cast(params.target)); for (u32 level = 0; level < params.num_levels; ++level) { const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)}; diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index b537b26e2..3a47f404d 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -96,9 +96,9 @@ SurfaceParams SurfaceParams::CreateForDepthBuffer( SurfaceParams params; params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; params.srgb_conversion = false; - params.block_width = 1 << std::min(block_width, 5U); - params.block_height = 1 << std::min(block_height, 5U); - params.block_depth = 1 << std::min(block_depth, 5U); + params.block_width = std::min(block_width, 5U); + params.block_height = std::min(block_height, 5U); + params.block_depth = std::min(block_depth, 5U); params.tile_width_spacing = 1; params.pixel_format = PixelFormatFromDepthFormat(format); params.component_type = ComponentTypeFromDepthFormat(format); @@ -120,9 +120,9 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; - params.block_width = 1 << config.memory_layout.block_width; - params.block_height = 1 << config.memory_layout.block_height; - params.block_depth = 1 << config.memory_layout.block_depth; + params.block_width = config.memory_layout.block_width; + params.block_height = config.memory_layout.block_height; + params.block_depth = config.memory_layout.block_depth; params.tile_width_spacing = 1; params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); params.component_type = ComponentTypeFromRenderTarget(config.format); @@ -149,9 +149,9 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface( params.is_tiled = !config.linear; params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; - params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 32U) : 0, - params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 32U) : 0, - params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 32U) : 0, + params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 5U) : 0, + params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 5U) : 0, + params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 5U) : 0, params.tile_width_spacing = 1; params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); params.component_type = ComponentTypeFromRenderTarget(config.format); @@ -190,9 +190,9 @@ u32 SurfaceParams::GetMipBlockHeight(u32 level) const { const u32 height{GetMipHeight(level)}; const u32 default_block_height{GetDefaultBlockHeight()}; const u32 blocks_in_y{(height + default_block_height - 1) / default_block_height}; - u32 block_height = 16; - while (block_height > 1 && blocks_in_y <= block_height * 4) { - block_height >>= 1; + u32 block_height = 4; + while (block_height > 0 && blocks_in_y <= (1U << block_height) * 4) { + --block_height; } return block_height; } @@ -202,17 +202,17 @@ u32 SurfaceParams::GetMipBlockDepth(u32 level) const { return this->block_depth; } if (is_layered) { - return 1; + return 0; } const u32 depth{GetMipDepth(level)}; - u32 block_depth = 32; - while (block_depth > 1 && depth * 2 <= block_depth) { - block_depth >>= 1; + u32 block_depth = 5; + while (block_depth > 0 && depth * 2 <= (1U << block_depth)) { + --block_depth; } - if (block_depth == 32 && GetMipBlockHeight(level) >= 4) { - return 16; + if (block_depth == 5 && GetMipBlockHeight(level) >= 2) { + return 4; } return block_depth; @@ -252,7 +252,8 @@ std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) co size += GetInnerMipmapMemorySize(level, as_host_size, uncompressed); } if (is_tiled && is_layered) { - return Common::AlignUp(size, Tegra::Texture::GetGOBSize() * block_height * block_depth); + return Common::AlignBits(size, + Tegra::Texture::GetGOBSizeShift() + block_height + block_depth); } return size; } diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index e0ec1be0e..7c48782c7 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -54,12 +54,12 @@ public: constexpr std::size_t rgb8_bpp = 4ULL; // ASTC is uncompressed in software, in emulated as RGBA8 host_size_in_bytes = 0; - for (std::size_t level = 0; level < num_levels; level++) { + for (u32 level = 0; level < num_levels; ++level) { const std::size_t width = Common::AlignUp(GetMipWidth(level), GetDefaultBlockWidth()); const std::size_t height = Common::AlignUp(GetMipHeight(level), GetDefaultBlockHeight()); - const std::size_t depth = is_layered ? depth : GetMipDepth(level); + const std::size_t depth = is_layered ? this->depth : GetMipDepth(level); host_size_in_bytes += width * height * depth * rgb8_bpp; } } else { @@ -96,7 +96,8 @@ public: // Helper used for out of class size calculations static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height, const u32 block_depth) { - return Common::AlignUp(out_size, Tegra::Texture::GetGOBSize() * block_height * block_depth); + return Common::AlignBits(out_size, + Tegra::Texture::GetGOBSizeShift() + block_height + block_depth); } /// Returns the offset in bytes in guest memory of a given mipmap level. diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 1c2b63dae..f35d0c88f 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -81,6 +81,9 @@ public: if (!gpu_addr) { return {}; } + if (gpu_addr == 0x1b7ec0000) { + // __debugbreak(); + } const auto params{SurfaceParams::CreateForTexture(system, config, entry)}; return GetSurface(gpu_addr, params, true).second; } diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 217805386..f45fd175a 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -36,10 +36,16 @@ struct alignas(64) SwizzleTable { std::array, N> values{}; }; -constexpr u32 gob_size_x = 64; -constexpr u32 gob_size_y = 8; -constexpr u32 gob_size_z = 1; -constexpr u32 gob_size = gob_size_x * gob_size_y * gob_size_z; +constexpr u32 gob_size_x_shift = 6; +constexpr u32 gob_size_y_shift = 3; +constexpr u32 gob_size_z_shift = 0; +constexpr u32 gob_size_shift = gob_size_x_shift + gob_size_y_shift + gob_size_z_shift; + +constexpr u32 gob_size_x = 1U << gob_size_x_shift; +constexpr u32 gob_size_y = 1U << gob_size_y_shift; +constexpr u32 gob_size_z = 1U << gob_size_z_shift; +constexpr u32 gob_size = 1U << gob_size_shift; + constexpr u32 fast_swizzle_align = 16; constexpr auto legacy_swizzle_table = SwizzleTable(); @@ -171,14 +177,16 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, u32 out_bytes_per_pixel, u8* const swizzled_data, u8* const unswizzled_data, bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) { + const u32 block_height_size{1U << block_height}; + const u32 block_depth_size{1U << block_depth}; if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % fast_swizzle_align == 0) { SwizzledData(swizzled_data, unswizzled_data, unswizzle, width, height, depth, - bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth, - width_spacing); + bytes_per_pixel, out_bytes_per_pixel, block_height_size, + block_depth_size, width_spacing); } else { SwizzledData(swizzled_data, unswizzled_data, unswizzle, width, height, depth, - bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth, - width_spacing); + bytes_per_pixel, out_bytes_per_pixel, block_height_size, + block_depth_size, width_spacing); } } @@ -249,16 +257,18 @@ std::vector UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height) { + const u32 block_height_size{1U << block_height}; const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) / gob_size_x}; for (u32 line = 0; line < subrect_height; ++line) { const u32 gob_address_y = - (line / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs + - ((line % (gob_size_y * block_height)) / gob_size_y) * gob_size; + (line / (gob_size_y * block_height_size)) * gob_size * block_height_size * + image_width_in_gobs + + ((line % (gob_size_y * block_height_size)) / gob_size_y) * gob_size; const auto& table = legacy_swizzle_table[line % gob_size_y]; for (u32 x = 0; x < subrect_width; ++x) { const u32 gob_address = - gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height; + gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height_size; const u32 swizzled_offset = gob_address + table[(x * bytes_per_pixel) % gob_size_x]; u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel; u8* dest_addr = swizzled_data + swizzled_offset; @@ -271,14 +281,17 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, u32 offset_x, u32 offset_y) { + const u32 block_height_size{1U << block_height}; for (u32 line = 0; line < subrect_height; ++line) { const u32 y2 = line + offset_y; - const u32 gob_address_y = (y2 / (gob_size_y * block_height)) * gob_size * block_height + - ((y2 % (gob_size_y * block_height)) / gob_size_y) * gob_size; + const u32 gob_address_y = + (y2 / (gob_size_y * block_height_size)) * gob_size * block_height_size + + ((y2 % (gob_size_y * block_height_size)) / gob_size_y) * gob_size; const auto& table = legacy_swizzle_table[y2 % gob_size_y]; for (u32 x = 0; x < subrect_width; ++x) { const u32 x2 = (x + offset_x) * bytes_per_pixel; - const u32 gob_address = gob_address_y + (x2 / gob_size_x) * gob_size * block_height; + const u32 gob_address = + gob_address_y + (x2 / gob_size_x) * gob_size * block_height_size; const u32 swizzled_offset = gob_address + table[x2 % gob_size_x]; u8* dest_line = unswizzled_data + line * dest_pitch + x * bytes_per_pixel; u8* source_addr = swizzled_data + swizzled_offset; @@ -291,16 +304,18 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y, const u32 block_height, const std::size_t copy_size, const u8* source_data, u8* swizzle_data) { + const u32 block_height_size{1U << block_height}; const u32 image_width_in_gobs{(width + gob_size_x - 1) / gob_size_x}; std::size_t count = 0; for (std::size_t y = dst_y; y < height && count < copy_size; ++y) { const std::size_t gob_address_y = - (y / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs + - ((y % (gob_size_y * block_height)) / gob_size_y) * gob_size; + (y / (gob_size_y * block_height_size)) * gob_size * block_height_size * + image_width_in_gobs + + ((y % (gob_size_y * block_height_size)) / gob_size_y) * gob_size; const auto& table = legacy_swizzle_table[y % gob_size_y]; for (std::size_t x = dst_x; x < width && count < copy_size; ++x) { const std::size_t gob_address = - gob_address_y + (x / gob_size_x) * gob_size * block_height; + gob_address_y + (x / gob_size_x) * gob_size * block_height_size; const std::size_t swizzled_offset = gob_address + table[x % gob_size_x]; const u8* source_line = source_data + count; u8* dest_addr = swizzle_data + swizzled_offset; @@ -356,9 +371,9 @@ std::vector DecodeTexture(const std::vector& texture_data, TextureFormat std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth) { if (tiled) { - const u32 aligned_width = Common::AlignUp(width * bytes_per_pixel, gob_size_x); - const u32 aligned_height = Common::AlignUp(height, gob_size_y * block_height); - const u32 aligned_depth = Common::AlignUp(depth, gob_size_z * block_depth); + const u32 aligned_width = Common::AlignBits(width * bytes_per_pixel, gob_size_x_shift); + const u32 aligned_height = Common::AlignBits(height, gob_size_y_shift + block_height); + const u32 aligned_depth = Common::AlignBits(depth, gob_size_z_shift + block_depth); return aligned_width * aligned_height * aligned_depth; } else { return width * height * depth * bytes_per_pixel; diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index e072d8401..eaec9b5a5 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h @@ -12,8 +12,8 @@ namespace Tegra::Texture { // GOBSize constant. Calculated by 64 bytes in x multiplied by 8 y coords, represents // an small rect of (64/bytes_per_pixel)X8. -inline std::size_t GetGOBSize() { - return 512; +inline std::size_t GetGOBSizeShift() { + return 9; } /// Unswizzles a swizzled texture without changing its format. diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index 219bfd559..f22b4e7c7 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h @@ -219,20 +219,17 @@ struct TICEntry { u32 BlockWidth() const { ASSERT(IsTiled()); - // The block height is stored in log2 format. - return 1 << block_width; + return block_width; } u32 BlockHeight() const { ASSERT(IsTiled()); - // The block height is stored in log2 format. - return 1 << block_height; + return block_height; } u32 BlockDepth() const { ASSERT(IsTiled()); - // The block height is stored in log2 format. - return 1 << block_depth; + return block_depth; } bool IsTiled() const { -- cgit v1.2.3 From a4a58be2d46e95df4cead2916b6efbd658a0deaa Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 10 May 2019 17:59:18 -0400 Subject: texture_cache: Implement L1_Inner_cache --- src/video_core/texture_cache/texture_cache.h | 43 +++++++++++++++++++--------- 1 file changed, 30 insertions(+), 13 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index f35d0c88f..ad0fbd7ce 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -395,6 +395,26 @@ private: const auto host_ptr{memory_manager->GetPointer(gpu_addr)}; const auto cache_addr{ToCacheAddr(host_ptr)}; + + if (l1_cache.count(cache_addr) > 0) { + TSurface current_surface = l1_cache[cache_addr]; + if (!current_surface->MatchesTopology(params)) { + std::vector overlaps{current_surface}; + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, true); + } + MatchStructureResult s_result = current_surface->MatchesStructure(params); + if (s_result != MatchStructureResult::None && + current_surface->GetGpuAddr() == gpu_addr && + (params.target != SurfaceTarget::Texture3D || + current_surface->MatchTarget(params.target))) { + if (s_result == MatchStructureResult::FullMatch) { + return ManageStructuralMatch(current_surface, params); + } else { + return RebuildSurface(current_surface, params); + } + } + } + const std::size_t candidate_size = params.GetGuestSizeInBytes(); auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)}; @@ -410,17 +430,6 @@ private: if (overlaps.size() == 1) { TSurface current_surface = overlaps[0]; - MatchStructureResult s_result = current_surface->MatchesStructure(params); - if (s_result != MatchStructureResult::None && - current_surface->GetGpuAddr() == gpu_addr && - (params.target != SurfaceTarget::Texture3D || - current_surface->MatchTarget(params.target))) { - if (s_result == MatchStructureResult::FullMatch) { - return ManageStructuralMatch(current_surface, params); - } else { - return RebuildSurface(current_surface, params); - } - } if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } @@ -473,8 +482,10 @@ private: } void RegisterInnerCache(TSurface& surface) { - CacheAddr start = surface->GetCacheAddr() >> registry_page_bits; + const CacheAddr cache_addr = surface->GetCacheAddr(); + CacheAddr start = cache_addr >> registry_page_bits; const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; + l1_cache[cache_addr] = surface; while (start <= end) { registry[start].push_back(surface); start++; @@ -482,8 +493,10 @@ private: } void UnregisterInnerCache(TSurface& surface) { - CacheAddr start = surface->GetCacheAddr() >> registry_page_bits; + const CacheAddr cache_addr = surface->GetCacheAddr(); + CacheAddr start = cache_addr >> registry_page_bits; const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; + l1_cache.erase(cache_addr); while (start <= end) { auto& reg{registry[start]}; reg.erase(std::find(reg.begin(), reg.end(), surface)); @@ -559,6 +572,10 @@ private: static constexpr u64 registry_page_size{1 << registry_page_bits}; std::unordered_map> registry; + // The L1 Cache is used for fast texture lookup before checking the overlaps + // This avoids calculating size and other stuffs. + std::unordered_map l1_cache; + /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have /// previously been used. This is to prevent surfaces from being constantly created and /// destroyed when used with different surface parameters. -- cgit v1.2.3 From 94f2be5473182789ec3f6388b43fcd708a505500 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 10 May 2019 22:12:35 -0400 Subject: texture_cache: Optimize GetMipBlockHeight and GetMipBlockDepth --- src/common/bit_util.h | 44 +++++++++++++++++++++++++ src/video_core/texture_cache/surface_params.cpp | 19 ++++------- 2 files changed, 50 insertions(+), 13 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/common/bit_util.h b/src/common/bit_util.h index d032df413..6f7d5a947 100644 --- a/src/common/bit_util.h +++ b/src/common/bit_util.h @@ -97,4 +97,48 @@ inline u32 CountTrailingZeroes64(u64 value) { } #endif +#ifdef _MSC_VER + +inline u32 MostSignificantBit32(const u32 value) { + unsigned long result; + _BitScanReverse(&result, value); + return static_cast(result); +} + +inline u32 MostSignificantBit64(const u64 value) { + unsigned long result; + _BitScanReverse64(&result, value); + return static_cast(result); +} + +#else + +inline u32 MostSignificantBit32(const u32 value) { + return 31U - static_cast(__builtin_clz(value)); +} + +inline u32 MostSignificantBit64(const u64 value) { + return 63U - static_cast(__builtin_clzll(value)); +} + +#endif + +inline u32 Log2Floor32(const u32 value) { + return MostSignificantBit32(value); +} + +inline u32 Log2Ceil32(const u32 value) { + const u32 log2_f = Log2Floor32(value); + return log2_f + ((value ^ (1U << log2_f)) != 0U); +} + +inline u32 Log2Floor64(const u64 value) { + return MostSignificantBit64(value); +} + +inline u32 Log2Ceil64(const u64 value) { + const u64 log2_f = static_cast(Log2Floor64(value)); + return static_cast(log2_f + ((value ^ (1ULL << log2_f)) != 0ULL)); +} + } // namespace Common diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 3a47f404d..e7e671d8c 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -5,6 +5,7 @@ #include #include "common/alignment.h" +#include "common/bit_util.h" #include "common/cityhash.h" #include "core/core.h" #include "video_core/engines/shader_bytecode.h" @@ -190,11 +191,8 @@ u32 SurfaceParams::GetMipBlockHeight(u32 level) const { const u32 height{GetMipHeight(level)}; const u32 default_block_height{GetDefaultBlockHeight()}; const u32 blocks_in_y{(height + default_block_height - 1) / default_block_height}; - u32 block_height = 4; - while (block_height > 0 && blocks_in_y <= (1U << block_height) * 4) { - --block_height; - } - return block_height; + const u32 block_height = Common::Log2Ceil32(blocks_in_y); + return std::clamp(block_height, 3U, 8U) - 3U; } u32 SurfaceParams::GetMipBlockDepth(u32 level) const { @@ -206,15 +204,10 @@ u32 SurfaceParams::GetMipBlockDepth(u32 level) const { } const u32 depth{GetMipDepth(level)}; - u32 block_depth = 5; - while (block_depth > 0 && depth * 2 <= (1U << block_depth)) { - --block_depth; - } - - if (block_depth == 5 && GetMipBlockHeight(level) >= 2) { - return 4; + const u32 block_depth = Common::Log2Ceil32(depth); + if (block_depth > 4) { + return 5 - (GetMipBlockHeight(level) >= 2); } - return block_depth; } -- cgit v1.2.3 From 5192521dc3f752c385de356158706899f523e498 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 10 May 2019 22:26:46 -0400 Subject: texture_cache: Implement GPU Dirty Flags --- src/video_core/texture_cache/texture_cache.h | 37 +++++++++++++++++----------- 1 file changed, 22 insertions(+), 15 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index ad0fbd7ce..8aa0d6515 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -81,17 +81,22 @@ public: if (!gpu_addr) { return {}; } - if (gpu_addr == 0x1b7ec0000) { - // __debugbreak(); - } const auto params{SurfaceParams::CreateForTexture(system, config, entry)}; return GetSurface(gpu_addr, params, true).second; } TView GetDepthBufferSurface(bool preserve_contents) { - const auto& regs{system.GPU().Maxwell3D().regs}; + auto& maxwell3d = system.GPU().Maxwell3D(); + + if (!maxwell3d.dirty_flags.zeta_buffer) { + return depth_buffer.view; + } + maxwell3d.dirty_flags.zeta_buffer = false; + + const auto& regs{maxwell3d.regs}; const auto gpu_addr{regs.zeta.Address()}; if (!gpu_addr || !regs.zeta_enable) { + SetEmptyDepthBuffer(); return {}; } const auto depth_params{SurfaceParams::CreateForDepthBuffer( @@ -101,6 +106,8 @@ public: auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents); if (depth_buffer.target) depth_buffer.target->MarkAsProtected(false); + depth_buffer.target = surface_view.first; + depth_buffer.view = surface_view.second; if (depth_buffer.target) depth_buffer.target->MarkAsProtected(true); return surface_view.second; @@ -108,8 +115,13 @@ public: TView GetColorBufferSurface(std::size_t index, bool preserve_contents) { ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); + auto& maxwell3d = system.GPU().Maxwell3D(); + if (!maxwell3d.dirty_flags.color_buffer[index]) { + return render_targets[index].view; + } + maxwell3d.dirty_flags.color_buffer.reset(index); - const auto& regs{system.GPU().Maxwell3D().regs}; + const auto& regs{maxwell3d.regs}; if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { SetEmptyColorBuffer(index); @@ -128,6 +140,7 @@ public: if (render_targets[index].target) render_targets[index].target->MarkAsProtected(false); render_targets[index].target = surface_view.first; + render_targets[index].view = surface_view.second; if (render_targets[index].target) render_targets[index].target->MarkAsProtected(true); return surface_view.second; @@ -154,7 +167,6 @@ public: void SetEmptyColorBuffer(std::size_t index) { if (render_targets[index].target != nullptr) { render_targets[index].target->MarkAsProtected(false); - std::memset(&render_targets[index].config, sizeof(RenderTargetConfig), 0); render_targets[index].target = nullptr; render_targets[index].view = nullptr; } @@ -545,13 +557,7 @@ private: return {}; } - struct RenderInfo { - RenderTargetConfig config; - TSurface target; - TView view; - }; - - struct DepthBufferInfo { + struct FramebufferTargetInfo { TSurface target; TView view; }; @@ -580,8 +586,9 @@ private: /// previously been used. This is to prevent surfaces from being constantly created and /// destroyed when used with different surface parameters. std::unordered_map> surface_reserve; - std::array render_targets; - DepthBufferInfo depth_buffer; + std::array + render_targets; + FramebufferTargetInfo depth_buffer; std::vector staging_buffer; }; -- cgit v1.2.3 From 1bbc9debfbcbd960874e2f877604506d174f613c Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 10 May 2019 23:42:08 -0400 Subject: Remove Framebuffer reconfiguration and restrict rendertarget protection --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 16 +++++-------- src/video_core/renderer_opengl/gl_rasterizer.h | 5 ++-- src/video_core/texture_cache/surface_base.h | 15 ++++++++---- src/video_core/texture_cache/texture_cache.h | 30 +++++++----------------- 4 files changed, 27 insertions(+), 39 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 3baf1522d..2d6fd154a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -461,15 +461,15 @@ void RasterizerOpenGL::LoadDiskResources(const std::atomic_bool& stop_loading, } std::pair RasterizerOpenGL::ConfigureFramebuffers( - OpenGLState& current_state, bool must_reconfigure, bool using_color_fb, bool using_depth_fb, - bool preserve_contents, std::optional single_color_target) { + OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents, + std::optional single_color_target) { MICROPROFILE_SCOPE(OpenGL_Framebuffer); auto& gpu = system.GPU().Maxwell3D(); const auto& regs = gpu.regs; const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents, single_color_target}; - if (!must_reconfigure && fb_config_state == current_framebuffer_config_state && + if (fb_config_state == current_framebuffer_config_state && gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) { // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or // single color targets). This is done because the guest registers may not change but the @@ -622,9 +622,8 @@ void RasterizerOpenGL::Clear() { return; } - const auto [clear_depth, clear_stencil] = - ConfigureFramebuffers(clear_state, false, use_color, use_depth || use_stencil, false, - regs.clear_buffers.RT.Value()); + const auto [clear_depth, clear_stencil] = ConfigureFramebuffers( + clear_state, use_color, use_depth || use_stencil, false, regs.clear_buffers.RT.Value()); if (regs.clear_flags.scissor) { SyncScissorTest(clear_state); } @@ -659,7 +658,6 @@ void RasterizerOpenGL::DrawArrays() { auto& gpu = system.GPU().Maxwell3D(); const auto& regs = gpu.regs; - ConfigureFramebuffers(state); SyncColorMask(); SyncFragmentColorClampState(); SyncMultiSampleState(); @@ -706,9 +704,7 @@ void RasterizerOpenGL::DrawArrays() { DrawParameters params = SetupDraw(); SetupShaders(params.primitive_mode); - if (texture_cache.ConsumeReconfigurationFlag()) { - ConfigureFramebuffers(state, true); - } + ConfigureFramebuffers(state); buffer_cache.Unmap(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 970637efa..be5ac1b9f 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -111,9 +111,8 @@ private: * (requires using_depth_fb to be true) */ std::pair ConfigureFramebuffers( - OpenGLState& current_state, bool must_reconfigure = false, bool use_color_fb = true, - bool using_depth_fb = true, bool preserve_contents = true, - std::optional single_color_target = {}); + OpenGLState& current_state, bool use_color_fb = true, bool using_depth_fb = true, + bool preserve_contents = true, std::optional single_color_target = {}); /// Configures the current constbuffers to use for the draw command. void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 095deb602..78db2d665 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -218,12 +218,12 @@ public: virtual void DownloadTexture(std::vector& staging_buffer) = 0; void MarkAsModified(const bool is_modified_, const u64 tick) { - is_modified = is_modified_ || is_protected; + is_modified = is_modified_ || is_target; modification_tick = tick; } - void MarkAsProtected(const bool is_protected) { - this->is_protected = is_protected; + void MarkAsRenderTarget(const bool is_target) { + this->is_target = is_target; } void MarkAsPicked(const bool is_picked) { @@ -235,7 +235,12 @@ public: } bool IsProtected() const { - return is_protected; + // Only 3D Slices are to be protected + return is_target && params.block_depth > 0; + } + + bool IsRenderTarget() const { + return is_target; } bool IsRegistered() const { @@ -307,7 +312,7 @@ private: } bool is_modified{}; - bool is_protected{}; + bool is_target{}; bool is_registered{}; bool is_picked{}; u64 modification_tick{}; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 8aa0d6515..4ac5668c8 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -105,11 +105,11 @@ public: regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents); if (depth_buffer.target) - depth_buffer.target->MarkAsProtected(false); + depth_buffer.target->MarkAsRenderTarget(false); depth_buffer.target = surface_view.first; depth_buffer.view = surface_view.second; if (depth_buffer.target) - depth_buffer.target->MarkAsProtected(true); + depth_buffer.target->MarkAsRenderTarget(true); return surface_view.second; } @@ -138,11 +138,11 @@ public: auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), preserve_contents); if (render_targets[index].target) - render_targets[index].target->MarkAsProtected(false); + render_targets[index].target->MarkAsRenderTarget(false); render_targets[index].target = surface_view.first; render_targets[index].view = surface_view.second; if (render_targets[index].target) - render_targets[index].target->MarkAsProtected(true); + render_targets[index].target->MarkAsRenderTarget(true); return surface_view.second; } @@ -158,7 +158,7 @@ public: void SetEmptyDepthBuffer() { if (depth_buffer.target != nullptr) { - depth_buffer.target->MarkAsProtected(false); + depth_buffer.target->MarkAsRenderTarget(false); depth_buffer.target = nullptr; depth_buffer.view = nullptr; } @@ -166,7 +166,7 @@ public: void SetEmptyColorBuffer(std::size_t index) { if (render_targets[index].target != nullptr) { - render_targets[index].target->MarkAsProtected(false); + render_targets[index].target->MarkAsRenderTarget(false); render_targets[index].target = nullptr; render_targets[index].view = nullptr; } @@ -200,12 +200,6 @@ public: return ++ticks; } - bool ConsumeReconfigurationFlag() { - const bool result = force_reconfiguration; - force_reconfiguration = false; - return result; - } - protected: TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) : system{system}, rasterizer{rasterizer} { @@ -242,8 +236,8 @@ protected: rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); } - void Unregister(TSurface surface, const bool force_unregister = false) { - if (surface->IsProtected() && !force_unregister) { + void Unregister(TSurface surface) { + if (surface->IsProtected()) { return; } const GPUVAddr gpu_addr = surface->GetGpuAddr(); @@ -392,10 +386,8 @@ private: std::min(src_params.height, dst_height), 1); ImageCopy(surface, new_surface, copy_params); } - force_reconfiguration = false; for (auto surface : overlaps) { - force_reconfiguration |= surface->IsProtected(); - Unregister(surface, true); + Unregister(surface); } new_surface->MarkAsModified(modified, Tick()); Register(new_surface); @@ -567,10 +559,6 @@ private: u64 ticks{}; - // Sometimes Setup Textures can hit a surface that's on the render target, when this happens - // we force a reconfiguration of the frame buffer after setup. - bool force_reconfiguration; - // The internal Cache is different for the Texture Cache. It's based on buckets // of 1MB. This fits better for the purpose of this cache as textures are normaly // large in size. -- cgit v1.2.3 From 07cc7e0c12143a84744abb8dc03eb46eb615b308 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 10 May 2019 23:50:01 -0400 Subject: texture_cache: Add ASync Protections --- src/video_core/texture_cache/texture_cache.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 4ac5668c8..1b8ada910 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -5,6 +5,7 @@ #pragma once #include +#include #include #include #include @@ -56,12 +57,16 @@ public: } void InvalidateRegion(CacheAddr addr, std::size_t size) { + std::lock_guard lock{mutex}; + for (const auto& surface : GetSurfacesInRegion(addr, size)) { Unregister(surface); } } void FlushRegion(CacheAddr addr, std::size_t size) { + std::lock_guard lock{mutex}; + auto surfaces = GetSurfacesInRegion(addr, size); if (surfaces.empty()) { return; @@ -220,6 +225,8 @@ protected: const Common::Rectangle& dst_rect) = 0; void Register(TSurface surface) { + std::lock_guard lock{mutex}; + const GPUVAddr gpu_addr = surface->GetGpuAddr(); const CacheAddr cache_ptr = ToCacheAddr(memory_manager->GetPointer(gpu_addr)); const std::size_t size = surface->GetSizeInBytes(); @@ -237,6 +244,8 @@ protected: } void Unregister(TSurface surface) { + std::lock_guard lock{mutex}; + if (surface->IsProtected()) { return; } @@ -579,6 +588,7 @@ private: FramebufferTargetInfo depth_buffer; std::vector staging_buffer; + std::recursive_mutex mutex; }; } // namespace VideoCommon -- cgit v1.2.3 From 2131f715730580dfeb692acdf3ae3e62ffd455c1 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 10 May 2019 23:02:14 -0300 Subject: surface_params: Optimize CreateForTexture Instead of using Common::AlignUp, use Common::AlignBits to align the texture compression factor. --- src/video_core/surface.h | 142 ++++++++++++------------ src/video_core/texture_cache/surface_params.cpp | 6 +- 2 files changed, 76 insertions(+), 72 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/surface.h b/src/video_core/surface.h index b783e4b27..8e98033f3 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -122,71 +122,71 @@ enum class SurfaceTarget { TextureCubeArray, }; -constexpr std::array compression_factor_table = {{ - 1, // ABGR8U - 1, // ABGR8S - 1, // ABGR8UI - 1, // B5G6R5U - 1, // A2B10G10R10U - 1, // A1B5G5R5U - 1, // R8U - 1, // R8UI - 1, // RGBA16F - 1, // RGBA16U - 1, // RGBA16UI - 1, // R11FG11FB10F - 1, // RGBA32UI - 4, // DXT1 - 4, // DXT23 - 4, // DXT45 - 4, // DXN1 - 4, // DXN2UNORM - 4, // DXN2SNORM - 4, // BC7U - 4, // BC6H_UF16 - 4, // BC6H_SF16 - 4, // ASTC_2D_4X4 - 1, // BGRA8 - 1, // RGBA32F - 1, // RG32F - 1, // R32F - 1, // R16F - 1, // R16U - 1, // R16S - 1, // R16UI - 1, // R16I - 1, // RG16 - 1, // RG16F - 1, // RG16UI - 1, // RG16I - 1, // RG16S - 1, // RGB32F - 1, // RGBA8_SRGB - 1, // RG8U - 1, // RG8S - 1, // RG32UI - 1, // R32UI - 4, // ASTC_2D_8X8 - 4, // ASTC_2D_8X5 - 4, // ASTC_2D_5X4 - 1, // BGRA8_SRGB - 4, // DXT1_SRGB - 4, // DXT23_SRGB - 4, // DXT45_SRGB - 4, // BC7U_SRGB - 4, // ASTC_2D_4X4_SRGB - 4, // ASTC_2D_8X8_SRGB - 4, // ASTC_2D_8X5_SRGB - 4, // ASTC_2D_5X4_SRGB - 4, // ASTC_2D_5X5 - 4, // ASTC_2D_5X5_SRGB - 4, // ASTC_2D_10X8 - 4, // ASTC_2D_10X8_SRGB - 1, // Z32F - 1, // Z16 - 1, // Z24S8 - 1, // S8Z24 - 1, // Z32FS8 +inline constexpr std::array compression_factor_shift_table = {{ + 0, // ABGR8U + 0, // ABGR8S + 0, // ABGR8UI + 0, // B5G6R5U + 0, // A2B10G10R10U + 0, // A1B5G5R5U + 0, // R8U + 0, // R8UI + 0, // RGBA16F + 0, // RGBA16U + 0, // RGBA16UI + 0, // R11FG11FB10F + 0, // RGBA32UI + 2, // DXT1 + 2, // DXT23 + 2, // DXT45 + 2, // DXN1 + 2, // DXN2UNORM + 2, // DXN2SNORM + 2, // BC7U + 2, // BC6H_UF16 + 2, // BC6H_SF16 + 2, // ASTC_2D_4X4 + 0, // BGRA8 + 0, // RGBA32F + 0, // RG32F + 0, // R32F + 0, // R16F + 0, // R16U + 0, // R16S + 0, // R16UI + 0, // R16I + 0, // RG16 + 0, // RG16F + 0, // RG16UI + 0, // RG16I + 0, // RG16S + 0, // RGB32F + 0, // RGBA8_SRGB + 0, // RG8U + 0, // RG8S + 0, // RG32UI + 0, // R32UI + 2, // ASTC_2D_8X8 + 2, // ASTC_2D_8X5 + 2, // ASTC_2D_5X4 + 0, // BGRA8_SRGB + 2, // DXT1_SRGB + 2, // DXT23_SRGB + 2, // DXT45_SRGB + 2, // BC7U_SRGB + 2, // ASTC_2D_4X4_SRGB + 2, // ASTC_2D_8X8_SRGB + 2, // ASTC_2D_8X5_SRGB + 2, // ASTC_2D_5X4_SRGB + 2, // ASTC_2D_5X5 + 2, // ASTC_2D_5X5_SRGB + 2, // ASTC_2D_10X8 + 2, // ASTC_2D_10X8_SRGB + 0, // Z32F + 0, // Z16 + 0, // Z24S8 + 0, // S8Z24 + 0, // Z32FS8 }}; /** @@ -195,12 +195,14 @@ constexpr std::array compression_factor_table = {{ * compressed image. This is used for maintaining proper surface sizes for compressed * texture formats. */ -static constexpr u32 GetCompressionFactor(PixelFormat format) { - if (format == PixelFormat::Invalid) - return 0; +inline constexpr u32 GetCompressionFactorShift(PixelFormat format) { + DEBUG_ASSERT(format != PixelFormat::Invalid); + DEBUG_ASSERT(static_cast(format) < compression_factor_table.size()); + return compression_factor_shift_table[static_cast(format)]; +} - ASSERT(static_cast(format) < compression_factor_table.size()); - return compression_factor_table[static_cast(format)]; +inline constexpr u32 GetCompressionFactor(PixelFormat format) { + return 1U << GetCompressionFactorShift(format); } constexpr std::array block_width_table = {{ diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index e7e671d8c..6f39f8468 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -76,8 +76,10 @@ SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, params.type = GetFormatType(params.pixel_format); // TODO: on 1DBuffer we should use the tic info. params.target = TextureType2SurfaceTarget(entry.GetType(), entry.IsArray()); - params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format)); - params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format)); + params.width = + Common::AlignBits(config.tic.Width(), GetCompressionFactorShift(params.pixel_format)); + params.height = + Common::AlignBits(config.tic.Height(), GetCompressionFactorShift(params.pixel_format)); params.depth = config.tic.Depth(); if (params.target == SurfaceTarget::TextureCubemap || params.target == SurfaceTarget::TextureCubeArray) { -- cgit v1.2.3 From d65a4af89582f272efbbfd47d1ee78e616553312 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 11 May 2019 01:21:02 -0400 Subject: texture_cache return invalid buffer on deactivated color_mask --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 6 ++++-- src/video_core/texture_cache/texture_cache.h | 5 +++++ 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 2d6fd154a..2872dbdeb 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -370,10 +370,12 @@ void RasterizerOpenGL::SetupCachedFramebuffer(const FramebufferCacheKey& fbkey, return; if (fbkey.is_single_buffer) { - if (fbkey.color_attachments[0] != GL_NONE) { + if (fbkey.color_attachments[0] != GL_NONE && fbkey.colors[0]) { fbkey.colors[0]->Attach(fbkey.color_attachments[0]); + glDrawBuffer(fbkey.color_attachments[0]); + } else { + glDrawBuffer(GL_NONE); } - glDrawBuffer(fbkey.color_attachments[0]); } else { for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { if (fbkey.colors[index]) { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 1b8ada910..7058399e2 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -133,6 +133,11 @@ public: return {}; } + if (regs.color_mask[index].raw != 0) { + SetEmptyColorBuffer(index); + return {}; + } + const auto& config{regs.rt[index]}; const auto gpu_addr{config.Address()}; if (!gpu_addr) { -- cgit v1.2.3 From 9098905dd13bb68f2fe49a9590688b76cc999fdd Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 11 May 2019 03:15:49 -0300 Subject: gl_framebuffer_cache: Use a hashed struct to cache framebuffers --- src/video_core/CMakeLists.txt | 2 + .../renderer_opengl/gl_framebuffer_cache.cpp | 73 ++++++++++++++++++++++ .../renderer_opengl/gl_framebuffer_cache.h | 68 ++++++++++++++++++++ src/video_core/renderer_opengl/gl_rasterizer.cpp | 59 +---------------- src/video_core/renderer_opengl/gl_rasterizer.h | 6 +- src/video_core/texture_cache/texture_cache.h | 2 +- 6 files changed, 148 insertions(+), 62 deletions(-) create mode 100644 src/video_core/renderer_opengl/gl_framebuffer_cache.cpp create mode 100644 src/video_core/renderer_opengl/gl_framebuffer_cache.h (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 470fbceda..9d43f03d2 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -41,6 +41,8 @@ add_library(video_core STATIC renderer_opengl/gl_buffer_cache.h renderer_opengl/gl_device.cpp renderer_opengl/gl_device.h + renderer_opengl/gl_framebuffer_cache.cpp + renderer_opengl/gl_framebuffer_cache.h renderer_opengl/gl_global_cache.cpp renderer_opengl/gl_global_cache.h renderer_opengl/gl_rasterizer.cpp diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp new file mode 100644 index 000000000..bb9f9b81f --- /dev/null +++ b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp @@ -0,0 +1,73 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/cityhash.h" +#include "common/scope_exit.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/renderer_opengl/gl_framebuffer_cache.h" +#include "video_core/renderer_opengl/gl_state.h" + +namespace OpenGL { + +using Maxwell = Tegra::Engines::Maxwell3D::Regs; + +FramebufferCacheOpenGL::FramebufferCacheOpenGL() = default; + +FramebufferCacheOpenGL::~FramebufferCacheOpenGL() = default; + +GLuint FramebufferCacheOpenGL::GetFramebuffer(const FramebufferCacheKey& key) { + const auto [entry, is_cache_miss] = cache.try_emplace(key); + auto& framebuffer{entry->second}; + if (is_cache_miss) { + framebuffer = CreateFramebuffer(key); + } + return framebuffer.handle; +} + +OGLFramebuffer FramebufferCacheOpenGL::CreateFramebuffer(const FramebufferCacheKey& key) { + OGLFramebuffer framebuffer; + framebuffer.Create(); + + // TODO(Rodrigo): Use DSA here after Nvidia fixes their framebuffer DSA bugs. + local_state.draw.draw_framebuffer = framebuffer.handle; + local_state.ApplyFramebufferState(); + + if (key.is_single_buffer) { + if (key.color_attachments[0] != GL_NONE && key.colors[0]) { + key.colors[0]->Attach(key.color_attachments[0]); + glDrawBuffer(key.color_attachments[0]); + } else { + glDrawBuffer(GL_NONE); + } + } else { + for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { + if (key.colors[index]) { + key.colors[index]->Attach(GL_COLOR_ATTACHMENT0 + static_cast(index)); + } + } + glDrawBuffers(key.colors_count, key.color_attachments.data()); + } + + if (key.zeta) { + key.zeta->Attach(key.stencil_enable ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT); + } + + return framebuffer; +} + +std::size_t FramebufferCacheKey::Hash() const { + static_assert(sizeof(*this) % sizeof(u64) == 0, "Unaligned struct"); + return static_cast( + Common::CityHash64(reinterpret_cast(this), sizeof(*this))); +} + +bool FramebufferCacheKey::operator==(const FramebufferCacheKey& rhs) const { + return std::tie(is_single_buffer, stencil_enable, colors_count, color_attachments, colors, + zeta) == std::tie(rhs.is_single_buffer, rhs.stencil_enable, rhs.colors_count, + rhs.color_attachments, rhs.colors, rhs.zeta); +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.h b/src/video_core/renderer_opengl/gl_framebuffer_cache.h new file mode 100644 index 000000000..a3a996353 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_framebuffer_cache.h @@ -0,0 +1,68 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include + +#include "common/common_types.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_state.h" +#include "video_core/renderer_opengl/gl_texture_cache.h" + +namespace OpenGL { + +struct alignas(sizeof(u64)) FramebufferCacheKey { + bool is_single_buffer = false; + bool stencil_enable = false; + u16 colors_count = 0; + + std::array color_attachments{}; + std::array colors; + View zeta; + + std::size_t Hash() const; + + bool operator==(const FramebufferCacheKey& rhs) const; + + bool operator!=(const FramebufferCacheKey& rhs) const { + return !operator==(rhs); + } +}; + +} // namespace OpenGL + +namespace std { + +template <> +struct hash { + std::size_t operator()(const OpenGL::FramebufferCacheKey& k) const noexcept { + return k.Hash(); + } +}; + +} // namespace std + +namespace OpenGL { + +class FramebufferCacheOpenGL { +public: + FramebufferCacheOpenGL(); + ~FramebufferCacheOpenGL(); + + GLuint GetFramebuffer(const FramebufferCacheKey& key); + +private: + OGLFramebuffer CreateFramebuffer(const FramebufferCacheKey& key); + + OpenGLState local_state; + std::unordered_map cache; +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 2872dbdeb..8218c5143 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -78,26 +78,6 @@ struct DrawParameters { } }; -struct FramebufferCacheKey { - bool is_single_buffer = false; - bool stencil_enable = false; - - std::array color_attachments{}; - std::array colors{}; - u32 colors_count = 0; - - View zeta = nullptr; - - auto Tie() const { - return std::tie(is_single_buffer, stencil_enable, color_attachments, colors, colors_count, - zeta); - } - - bool operator<(const FramebufferCacheKey& rhs) const { - return Tie() < rhs.Tie(); - } -}; - RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, ScreenInfo& info) : texture_cache{system, *this}, shader_cache{*this, system, emu_window, device}, @@ -355,42 +335,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { gpu.dirty_flags.shaders = false; } -void RasterizerOpenGL::SetupCachedFramebuffer(const FramebufferCacheKey& fbkey, - OpenGLState& current_state) { - const auto [entry, is_cache_miss] = framebuffer_cache.try_emplace(fbkey); - auto& framebuffer = entry->second; - - if (is_cache_miss) - framebuffer.Create(); - - current_state.draw.draw_framebuffer = framebuffer.handle; - current_state.ApplyFramebufferState(); - - if (!is_cache_miss) - return; - - if (fbkey.is_single_buffer) { - if (fbkey.color_attachments[0] != GL_NONE && fbkey.colors[0]) { - fbkey.colors[0]->Attach(fbkey.color_attachments[0]); - glDrawBuffer(fbkey.color_attachments[0]); - } else { - glDrawBuffer(GL_NONE); - } - } else { - for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { - if (fbkey.colors[index]) { - fbkey.colors[index]->Attach(GL_COLOR_ATTACHMENT0 + static_cast(index)); - } - } - glDrawBuffers(fbkey.colors_count, fbkey.color_attachments.data()); - } - - if (fbkey.zeta) { - fbkey.zeta->Attach(fbkey.stencil_enable ? GL_DEPTH_STENCIL_ATTACHMENT - : GL_DEPTH_ATTACHMENT); - } -} - std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { const auto& regs = system.GPU().Maxwell3D().regs; @@ -556,7 +500,7 @@ std::pair RasterizerOpenGL::ConfigureFramebuffers( depth_surface->GetSurfaceParams().type == SurfaceType::DepthStencil; } - SetupCachedFramebuffer(fbkey, current_state); + current_state.draw.draw_framebuffer = framebuffer_cache.GetFramebuffer(fbkey); SyncViewport(current_state); return current_depth_stencil_usage = {static_cast(depth_surface), fbkey.stencil_enable}; @@ -638,6 +582,7 @@ void RasterizerOpenGL::Clear() { clear_state.ApplyDepth(); clear_state.ApplyStencilTest(); clear_state.ApplyViewport(); + clear_state.ApplyFramebufferState(); if (use_color) { glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index be5ac1b9f..2f13d9758 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -23,6 +23,7 @@ #include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_device.h" +#include "video_core/renderer_opengl/gl_framebuffer_cache.h" #include "video_core/renderer_opengl/gl_global_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_sampler_cache.h" @@ -49,7 +50,6 @@ namespace OpenGL { struct ScreenInfo; struct DrawParameters; -struct FramebufferCacheKey; class RasterizerOpenGL : public VideoCore::RasterizerInterface { public: @@ -193,6 +193,7 @@ private: ShaderCacheOpenGL shader_cache; GlobalRegionCacheOpenGL global_cache; SamplerCacheOpenGL sampler_cache; + FramebufferCacheOpenGL framebuffer_cache; Core::System& system; ScreenInfo& screen_info; @@ -203,7 +204,6 @@ private: OGLVertexArray> vertex_array_cache; - std::map framebuffer_cache; FramebufferConfigState current_framebuffer_config_state; std::pair current_depth_stencil_usage{}; @@ -226,8 +226,6 @@ private: void SetupShaders(GLenum primitive_mode); - void SetupCachedFramebuffer(const FramebufferCacheKey& fbkey, OpenGLState& current_state); - enum class AccelDraw { Disabled, Arrays, Indexed }; AccelDraw accelerate_draw = AccelDraw::Disabled; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 7058399e2..419c0de5e 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -133,7 +133,7 @@ public: return {}; } - if (regs.color_mask[index].raw != 0) { + if (regs.color_mask[index].raw == 0) { SetEmptyColorBuffer(index); return {}; } -- cgit v1.2.3 From c2ed348bddc1cd1bd97ce789d7855b1571e45ef4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 12 May 2019 18:31:03 -0300 Subject: surface_params: Ensure pitch is always written to avoid surface leaks --- src/video_core/texture_cache/surface_params.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 6f39f8468..8472b69dc 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -111,6 +111,7 @@ SurfaceParams SurfaceParams::CreateForDepthBuffer( params.unaligned_height = zeta_height; params.target = SurfaceTarget::Texture2D; params.depth = 1; + params.pitch = 0; params.num_levels = 1; params.is_layered = false; return params; @@ -131,6 +132,7 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz params.component_type = ComponentTypeFromRenderTarget(config.format); params.type = GetFormatType(params.pixel_format); if (params.is_tiled) { + params.pitch = 0; params.width = config.width; } else { const u32 bpp = GetFormatBpp(params.pixel_format) / CHAR_BIT; -- cgit v1.2.3 From 7731a0e2d15da04eea746b4b8dd5c6c4b29f9f29 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 12 May 2019 20:33:52 -0400 Subject: texture_cache: General Fixes Fixed ASTC mipmaps loading Fixed alignment on openGL upload/download Fixed Block Height Calculation Removed unalign_height --- .../renderer_opengl/gl_texture_cache.cpp | 11 ++- src/video_core/surface.h | 84 +++++++++++++++++++++- src/video_core/texture_cache/surface_base.cpp | 18 ++++- src/video_core/texture_cache/surface_base.h | 4 ++ src/video_core/texture_cache/surface_params.cpp | 52 ++++++++------ src/video_core/texture_cache/surface_params.h | 27 ++++--- src/video_core/textures/convert.cpp | 14 ++-- src/video_core/textures/convert.h | 7 +- 8 files changed, 170 insertions(+), 47 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index a55097e5f..197c9f02c 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -23,6 +23,7 @@ using VideoCore::MortonSwizzleMode; using VideoCore::Surface::ComponentType; using VideoCore::Surface::PixelFormat; +using VideoCore::Surface::SurfaceCompression; using VideoCore::Surface::SurfaceTarget; using VideoCore::Surface::SurfaceType; @@ -242,10 +243,10 @@ void CachedSurface::DownloadTexture(std::vector& staging_buffer) { MICROPROFILE_SCOPE(OpenGL_Texture_Download); // TODO(Rodrigo): Optimize alignment - glPixelStorei(GL_PACK_ALIGNMENT, 1); SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); }); for (u32 level = 0; level < params.num_levels; ++level) { + glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level))); glPixelStorei(GL_PACK_ROW_LENGTH, static_cast(params.GetMipWidth(level))); const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level); if (is_compressed) { @@ -270,10 +271,14 @@ void CachedSurface::UploadTexture(std::vector& staging_buffer) { void CachedSurface::UploadTextureMipmap(u32 level, std::vector& staging_buffer) { // TODO(Rodrigo): Optimize alignment - glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + glPixelStorei(GL_UNPACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level))); glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast(params.GetMipWidth(level))); - const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level); + auto compression_type = params.GetCompressionType(); + + const std::size_t mip_offset = compression_type == SurfaceCompression::Converted + ? params.GetConvertedMipmapOffset(level) + : params.GetHostMipmapLevelOffset(level); u8* buffer{staging_buffer.data() + mip_offset}; if (is_compressed) { const auto image_size{static_cast(params.GetHostMipmapSize(level))}; diff --git a/src/video_core/surface.h b/src/video_core/surface.h index 8e98033f3..5d49214e5 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -197,7 +197,7 @@ inline constexpr std::array compression_factor_shift_table */ inline constexpr u32 GetCompressionFactorShift(PixelFormat format) { DEBUG_ASSERT(format != PixelFormat::Invalid); - DEBUG_ASSERT(static_cast(format) < compression_factor_table.size()); + DEBUG_ASSERT(static_cast(format) < compression_factor_shift_table.size()); return compression_factor_shift_table[static_cast(format)]; } @@ -438,6 +438,88 @@ static constexpr u32 GetBytesPerPixel(PixelFormat pixel_format) { return GetFormatBpp(pixel_format) / CHAR_BIT; } +enum class SurfaceCompression : u8 { + None = 0, + Compressed = 1, + Converted = 2, + Rearranged = 3, +}; + +inline constexpr std::array compression_type_table = {{ + SurfaceCompression::None, // ABGR8U + SurfaceCompression::None, // ABGR8S + SurfaceCompression::None, // ABGR8UI + SurfaceCompression::None, // B5G6R5U + SurfaceCompression::None, // A2B10G10R10U + SurfaceCompression::None, // A1B5G5R5U + SurfaceCompression::None, // R8U + SurfaceCompression::None, // R8UI + SurfaceCompression::None, // RGBA16F + SurfaceCompression::None, // RGBA16U + SurfaceCompression::None, // RGBA16UI + SurfaceCompression::None, // R11FG11FB10F + SurfaceCompression::None, // RGBA32UI + SurfaceCompression::Compressed, // DXT1 + SurfaceCompression::Compressed, // DXT23 + SurfaceCompression::Compressed, // DXT45 + SurfaceCompression::Compressed, // DXN1 + SurfaceCompression::Compressed, // DXN2UNORM + SurfaceCompression::Compressed, // DXN2SNORM + SurfaceCompression::Compressed, // BC7U + SurfaceCompression::Compressed, // BC6H_UF16 + SurfaceCompression::Compressed, // BC6H_SF16 + SurfaceCompression::Converted, // ASTC_2D_4X4 + SurfaceCompression::None, // BGRA8 + SurfaceCompression::None, // RGBA32F + SurfaceCompression::None, // RG32F + SurfaceCompression::None, // R32F + SurfaceCompression::None, // R16F + SurfaceCompression::None, // R16U + SurfaceCompression::None, // R16S + SurfaceCompression::None, // R16UI + SurfaceCompression::None, // R16I + SurfaceCompression::None, // RG16 + SurfaceCompression::None, // RG16F + SurfaceCompression::None, // RG16UI + SurfaceCompression::None, // RG16I + SurfaceCompression::None, // RG16S + SurfaceCompression::None, // RGB32F + SurfaceCompression::None, // RGBA8_SRGB + SurfaceCompression::None, // RG8U + SurfaceCompression::None, // RG8S + SurfaceCompression::None, // RG32UI + SurfaceCompression::None, // R32UI + SurfaceCompression::Converted, // ASTC_2D_8X8 + SurfaceCompression::Converted, // ASTC_2D_8X5 + SurfaceCompression::Converted, // ASTC_2D_5X4 + SurfaceCompression::None, // BGRA8_SRGB + SurfaceCompression::Compressed, // DXT1_SRGB + SurfaceCompression::Compressed, // DXT23_SRGB + SurfaceCompression::Compressed, // DXT45_SRGB + SurfaceCompression::Compressed, // BC7U_SRGB + SurfaceCompression::Converted, // ASTC_2D_4X4_SRGB + SurfaceCompression::Converted, // ASTC_2D_8X8_SRGB + SurfaceCompression::Converted, // ASTC_2D_8X5_SRGB + SurfaceCompression::Converted, // ASTC_2D_5X4_SRGB + SurfaceCompression::Converted, // ASTC_2D_5X5 + SurfaceCompression::Converted, // ASTC_2D_5X5_SRGB + SurfaceCompression::Converted, // ASTC_2D_10X8 + SurfaceCompression::Converted, // ASTC_2D_10X8_SRGB + SurfaceCompression::None, // Z32F + SurfaceCompression::None, // Z16 + SurfaceCompression::None, // Z24S8 + SurfaceCompression::Rearranged, // S8Z24 + SurfaceCompression::None, // Z32FS8 +}}; + +static constexpr SurfaceCompression GetFormatCompressionType(PixelFormat format) { + if (format == PixelFormat::Invalid) + return SurfaceCompression::None; + + ASSERT(static_cast(format) < compression_type_table.size()); + return compression_type_table[static_cast(format)]; +} + SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type); bool SurfaceTargetIsLayered(SurfaceTarget target); diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 510d1aef5..ceff51043 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -17,6 +17,7 @@ MICROPROFILE_DEFINE(GPU_Flush_Texture, "GPU", "Texture Flush", MP_RGB(128, 192, using Tegra::Texture::ConvertFromGuestToHost; using VideoCore::MortonSwizzleMode; +using VideoCore::Surface::SurfaceCompression; SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params) : params{params}, mipmap_sizes(params.num_levels), @@ -102,9 +103,20 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, } } - for (u32 level = 0; level < params.num_levels; ++level) { - const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)}; - ConvertFromGuestToHost(staging_buffer.data() + host_offset, params.pixel_format, + auto compression_type = params.GetCompressionType(); + if (compression_type == SurfaceCompression::None || + compression_type == SurfaceCompression::Compressed) + return; + + for (u32 level_up = params.num_levels; level_up > 0; --level_up) { + const u32 level = level_up - 1; + const std::size_t in_host_offset{params.GetHostMipmapLevelOffset(level)}; + const std::size_t out_host_offset = compression_type == SurfaceCompression::Rearranged + ? in_host_offset + : params.GetConvertedMipmapOffset(level); + u8* in_buffer = staging_buffer.data() + in_host_offset; + u8* out_buffer = staging_buffer.data() + out_host_offset; + ConvertFromGuestToHost(in_buffer, out_buffer, params.pixel_format, params.GetMipWidth(level), params.GetMipHeight(level), params.GetMipDepth(level), true, true); } diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 78db2d665..cb7f22706 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -93,6 +93,10 @@ public: return mipmap_sizes[level]; } + bool IsLinear() const { + return !params.is_tiled; + } + bool MatchFormat(VideoCore::Surface::PixelFormat pixel_format) const { return params.pixel_format == pixel_format; } diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 8472b69dc..d9d157d02 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -76,17 +76,14 @@ SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, params.type = GetFormatType(params.pixel_format); // TODO: on 1DBuffer we should use the tic info. params.target = TextureType2SurfaceTarget(entry.GetType(), entry.IsArray()); - params.width = - Common::AlignBits(config.tic.Width(), GetCompressionFactorShift(params.pixel_format)); - params.height = - Common::AlignBits(config.tic.Height(), GetCompressionFactorShift(params.pixel_format)); + params.width = config.tic.Width(); + params.height = config.tic.Height(); params.depth = config.tic.Depth(); if (params.target == SurfaceTarget::TextureCubemap || params.target == SurfaceTarget::TextureCubeArray) { params.depth *= 6; } params.pitch = params.is_tiled ? 0 : config.tic.Pitch(); - params.unaligned_height = config.tic.Height(); params.num_levels = config.tic.max_mip_level + 1; params.is_layered = params.IsLayered(); return params; @@ -108,7 +105,6 @@ SurfaceParams SurfaceParams::CreateForDepthBuffer( params.type = GetFormatType(params.pixel_format); params.width = zeta_width; params.height = zeta_height; - params.unaligned_height = zeta_height; params.target = SurfaceTarget::Texture2D; params.depth = 1; params.pitch = 0; @@ -141,7 +137,6 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz } params.height = config.height; params.depth = 1; - params.unaligned_height = config.height; params.target = SurfaceTarget::Texture2D; params.num_levels = 1; params.is_layered = false; @@ -164,7 +159,6 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface( params.width = config.width; params.height = config.height; params.pitch = config.pitch; - params.unaligned_height = config.height; // TODO(Rodrigo): Try to guess the surface target from depth and layer parameters params.target = SurfaceTarget::Texture2D; params.depth = 1; @@ -185,18 +179,18 @@ bool SurfaceParams::IsLayered() const { } } +// Auto block resizing algorithm from: +// https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c u32 SurfaceParams::GetMipBlockHeight(u32 level) const { - // Auto block resizing algorithm from: - // https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c if (level == 0) { return this->block_height; } - const u32 height{GetMipHeight(level)}; + const u32 height_new{GetMipHeight(level)}; const u32 default_block_height{GetDefaultBlockHeight()}; - const u32 blocks_in_y{(height + default_block_height - 1) / default_block_height}; - const u32 block_height = Common::Log2Ceil32(blocks_in_y); - return std::clamp(block_height, 3U, 8U) - 3U; + const u32 blocks_in_y{(height_new + default_block_height - 1) / default_block_height}; + const u32 block_height_new = Common::Log2Ceil32(blocks_in_y); + return std::clamp(block_height_new, 3U, 7U) - 3U; } u32 SurfaceParams::GetMipBlockDepth(u32 level) const { @@ -207,12 +201,12 @@ u32 SurfaceParams::GetMipBlockDepth(u32 level) const { return 0; } - const u32 depth{GetMipDepth(level)}; - const u32 block_depth = Common::Log2Ceil32(depth); - if (block_depth > 4) { + const u32 depth_new{GetMipDepth(level)}; + const u32 block_depth_new = Common::Log2Ceil32(depth_new); + if (block_depth_new > 4) { return 5 - (GetMipBlockHeight(level) >= 2); } - return block_depth; + return block_depth_new; } std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const { @@ -231,6 +225,14 @@ std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level) const { return offset; } +std::size_t SurfaceParams::GetConvertedMipmapOffset(u32 level) const { + std::size_t offset = 0; + for (u32 i = 0; i < level; i++) { + offset += GetConvertedMipmapSize(i); + } + return offset; +} + std::size_t SurfaceParams::GetGuestMipmapSize(u32 level) const { return GetInnerMipmapMemorySize(level, false, false); } @@ -239,6 +241,14 @@ std::size_t SurfaceParams::GetHostMipmapSize(u32 level) const { return GetInnerMipmapMemorySize(level, true, false) * GetNumLayers(); } +std::size_t SurfaceParams::GetConvertedMipmapSize(u32 level) const { + constexpr std::size_t rgb8_bpp = 4ULL; + const std::size_t width_t = GetMipWidth(level); + const std::size_t height_t = GetMipHeight(level); + const std::size_t depth_t = is_layered ? depth : GetMipDepth(level); + return width_t * height_t * depth_t * rgb8_bpp; +} + std::size_t SurfaceParams::GetGuestLayerSize() const { return GetLayerSize(false, false); } @@ -287,12 +297,10 @@ std::size_t SurfaceParams::Hash() const { bool SurfaceParams::operator==(const SurfaceParams& rhs) const { return std::tie(is_tiled, block_width, block_height, block_depth, tile_width_spacing, width, - height, depth, pitch, unaligned_height, num_levels, pixel_format, - component_type, type, target) == + height, depth, pitch, num_levels, pixel_format, component_type, type, target) == std::tie(rhs.is_tiled, rhs.block_width, rhs.block_height, rhs.block_depth, rhs.tile_width_spacing, rhs.width, rhs.height, rhs.depth, rhs.pitch, - rhs.unaligned_height, rhs.num_levels, rhs.pixel_format, rhs.component_type, - rhs.type, rhs.target); + rhs.num_levels, rhs.pixel_format, rhs.component_type, rhs.type, rhs.target); } std::string SurfaceParams::TargetName() const { diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index 7c48782c7..b3082173f 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -7,6 +7,7 @@ #include #include "common/alignment.h" +#include "common/bit_util.h" #include "common/common_types.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/maxwell_3d.h" @@ -16,6 +17,8 @@ namespace VideoCommon { +using VideoCore::Surface::SurfaceCompression; + class SurfaceParams { public: /// Creates SurfaceCachedParams from a texture configuration. @@ -50,17 +53,12 @@ public: std::size_t GetHostSizeInBytes() const { std::size_t host_size_in_bytes; - if (IsPixelFormatASTC(pixel_format)) { + if (GetCompressionType() == SurfaceCompression::Converted) { constexpr std::size_t rgb8_bpp = 4ULL; // ASTC is uncompressed in software, in emulated as RGBA8 host_size_in_bytes = 0; for (u32 level = 0; level < num_levels; ++level) { - const std::size_t width = - Common::AlignUp(GetMipWidth(level), GetDefaultBlockWidth()); - const std::size_t height = - Common::AlignUp(GetMipHeight(level), GetDefaultBlockHeight()); - const std::size_t depth = is_layered ? this->depth : GetMipDepth(level); - host_size_in_bytes += width * height * depth * rgb8_bpp; + host_size_in_bytes += GetConvertedMipmapSize(level); } } else { host_size_in_bytes = GetInnerMemorySize(true, false, false); @@ -93,6 +91,12 @@ public: /// Returns the block depth of a given mipmap level. u32 GetMipBlockDepth(u32 level) const; + u32 GetRowAlignment(u32 level) const { + const u32 bpp = + GetCompressionType() == SurfaceCompression::Converted ? 4 : GetBytesPerPixel(); + return 1U << Common::CountTrailingZeroes32(GetMipWidth(level) * bpp); + } + // Helper used for out of class size calculations static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height, const u32 block_depth) { @@ -106,12 +110,16 @@ public: /// Returns the offset in bytes in host memory (linear) of a given mipmap level. std::size_t GetHostMipmapLevelOffset(u32 level) const; + std::size_t GetConvertedMipmapOffset(u32 level) const; + /// Returns the size in bytes in guest memory of a given mipmap level. std::size_t GetGuestMipmapSize(u32 level) const; /// Returns the size in bytes in host memory (linear) of a given mipmap level. std::size_t GetHostMipmapSize(u32 level) const; + std::size_t GetConvertedMipmapSize(u32 level) const; + /// Returns the size of a layer in bytes in guest memory. std::size_t GetGuestLayerSize() const; @@ -141,6 +149,10 @@ public: /// Returns true if the pixel format is a depth and/or stencil format. bool IsPixelFormatZeta() const; + SurfaceCompression GetCompressionType() const { + return VideoCore::Surface::GetFormatCompressionType(pixel_format); + } + std::string TargetName() const; bool is_tiled; @@ -154,7 +166,6 @@ public: u32 height; u32 depth; u32 pitch; - u32 unaligned_height; u32 num_levels; VideoCore::Surface::PixelFormat pixel_format; VideoCore::Surface::ComponentType component_type; diff --git a/src/video_core/textures/convert.cpp b/src/video_core/textures/convert.cpp index 82050bd51..f3efa7eb0 100644 --- a/src/video_core/textures/convert.cpp +++ b/src/video_core/textures/convert.cpp @@ -62,19 +62,19 @@ static void ConvertZ24S8ToS8Z24(u8* data, u32 width, u32 height) { SwapS8Z24ToZ24S8(data, width, height); } -void ConvertFromGuestToHost(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth, - bool convert_astc, bool convert_s8z24) { +void ConvertFromGuestToHost(u8* in_data, u8* out_data, PixelFormat pixel_format, u32 width, + u32 height, u32 depth, bool convert_astc, bool convert_s8z24) { if (convert_astc && IsPixelFormatASTC(pixel_format)) { // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC. u32 block_width{}; u32 block_height{}; std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format); - const std::vector rgba8_data = - Tegra::Texture::ASTC::Decompress(data, width, height, depth, block_width, block_height); - std::copy(rgba8_data.begin(), rgba8_data.end(), data); + const std::vector rgba8_data = Tegra::Texture::ASTC::Decompress( + in_data, width, height, depth, block_width, block_height); + std::copy(rgba8_data.begin(), rgba8_data.end(), out_data); } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) { - Tegra::Texture::ConvertS8Z24ToZ24S8(data, width, height); + Tegra::Texture::ConvertS8Z24ToZ24S8(in_data, width, height); } } @@ -90,4 +90,4 @@ void ConvertFromHostToGuest(u8* data, PixelFormat pixel_format, u32 width, u32 h } } -} // namespace Tegra::Texture \ No newline at end of file +} // namespace Tegra::Texture diff --git a/src/video_core/textures/convert.h b/src/video_core/textures/convert.h index 12542e71c..d5d6c77bb 100644 --- a/src/video_core/textures/convert.h +++ b/src/video_core/textures/convert.h @@ -12,10 +12,11 @@ enum class PixelFormat; namespace Tegra::Texture { -void ConvertFromGuestToHost(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width, - u32 height, u32 depth, bool convert_astc, bool convert_s8z24); +void ConvertFromGuestToHost(u8* in_data, u8* out_data, VideoCore::Surface::PixelFormat pixel_format, + u32 width, u32 height, u32 depth, bool convert_astc, + bool convert_s8z24); void ConvertFromHostToGuest(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width, u32 height, u32 depth, bool convert_astc, bool convert_s8z24); -} // namespace Tegra::Texture \ No newline at end of file +} // namespace Tegra::Texture -- cgit v1.2.3 From a79831d9d02f7c42d82ea36210cac7952a3ef16e Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 13 May 2019 19:14:02 -0400 Subject: texture_cache: Implement Guard mechanism --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 4 ++++ src/video_core/texture_cache/texture_cache.h | 9 ++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 8218c5143..afacc3fbd 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -424,6 +424,8 @@ std::pair RasterizerOpenGL::ConfigureFramebuffers( } current_framebuffer_config_state = fb_config_state; + texture_cache.Guard(true); + View depth_surface{}; if (using_depth_fb) { depth_surface = texture_cache.GetDepthBufferSurface(preserve_contents); @@ -500,6 +502,8 @@ std::pair RasterizerOpenGL::ConfigureFramebuffers( depth_surface->GetSurfaceParams().type == SurfaceType::DepthStencil; } + texture_cache.Guard(false); + current_state.draw.draw_framebuffer = framebuffer_cache.GetFramebuffer(fbkey); SyncViewport(current_state); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 419c0de5e..2ad6210dd 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -64,6 +64,10 @@ public: } } + void Guard(bool new_guard) { + guard_cache = new_guard; + } + void FlushRegion(CacheAddr addr, std::size_t size) { std::lock_guard lock{mutex}; @@ -251,7 +255,7 @@ protected: void Unregister(TSurface surface) { std::lock_guard lock{mutex}; - if (surface->IsProtected()) { + if (guard_cache && surface->IsProtected()) { return; } const GPUVAddr gpu_addr = surface->GetGpuAddr(); @@ -573,6 +577,9 @@ private: u64 ticks{}; + // Guards the cache for protection conflicts. + bool guard_cache{}; + // The internal Cache is different for the Texture Cache. It's based on buckets // of 1MB. This fits better for the purpose of this cache as textures are normaly // large in size. -- cgit v1.2.3 From 4530511ee4dfc92ddbfed7f91978f332be517c90 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 13 May 2019 21:35:32 -0400 Subject: texture_cache: Try to Reconstruct Surface on bigger than overlap. This fixes clouds in SMO Cap Kingdom and lens on Cloud Kingdom. Also moved accurate_gpu setting check to Pick Strategy --- src/video_core/texture_cache/texture_cache.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 2ad6210dd..38b56475f 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -295,6 +295,9 @@ private: RecycleStrategy PickStrategy(std::vector& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, const bool untopological) { + if (Settings::values.use_accurate_gpu_emulation) { + return RecycleStrategy::Flush; + } // 3D Textures decision if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) { return RecycleStrategy::Flush; @@ -319,10 +322,7 @@ private: for (auto surface : overlaps) { Unregister(surface); } - RecycleStrategy strategy = !Settings::values.use_accurate_gpu_emulation - ? PickStrategy(overlaps, params, gpu_addr, untopological) - : RecycleStrategy::Flush; - switch (strategy) { + switch (PickStrategy(overlaps, params, gpu_addr, untopological)) { case RecycleStrategy::Ignore: { return InitializeSurface(gpu_addr, params, preserve_contents); } @@ -453,6 +453,13 @@ private: if (overlaps.size() == 1) { TSurface current_surface = overlaps[0]; if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { + if (current_surface->GetGpuAddr() == gpu_addr) { + std::optional> view = + ReconstructSurface(overlaps, params, gpu_addr, host_ptr); + if (view.has_value()) { + return *view; + } + } return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } std::optional view = -- cgit v1.2.3 From 6162cb922e67c6c529fb17a91da726fdf3444a50 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 13 May 2019 22:59:18 -0400 Subject: texture_cache: Document the most important methods. --- src/video_core/texture_cache/texture_cache.h | 95 +++++++++++++++++++++++++--- 1 file changed, 87 insertions(+), 8 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 38b56475f..04e9528b8 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -64,6 +64,10 @@ public: } } + /** + * `Guard` guarantees that rendertargets don't unregister themselves if the + * collide. Protection is currently only done on 3D slices. + **/ void Guard(bool new_guard) { guard_cache = new_guard; } @@ -293,6 +297,14 @@ private: BufferCopy = 3, }; + /** + * `PickStrategy` takes care of selecting a proper strategy to deal with a texture recycle. + * @param overlaps, the overlapping surfaces registered in the cache. + * @param params, the paremeters on the new surface. + * @param gpu_addr, the starting address of the new surface. + * @param untopological, tells the recycler that the texture has no way to match the overlaps + * due to topological reasons. + **/ RecycleStrategy PickStrategy(std::vector& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, const bool untopological) { if (Settings::values.use_accurate_gpu_emulation) { @@ -315,6 +327,18 @@ private: return RecycleStrategy::Ignore; } + /** + * `RecycleSurface` es a method we use to decide what to do with textures we can't resolve in + *the cache It has 2 implemented strategies: Ignore and Flush. Ignore just unregisters all the + *overlaps and loads the new texture. Flush, flushes all the overlaps into memory and loads the + *new surface from that data. + * @param overlaps, the overlapping surfaces registered in the cache. + * @param params, the paremeters on the new surface. + * @param gpu_addr, the starting address of the new surface. + * @param preserve_contents, tells if the new surface should be loaded from meory or left blank + * @param untopological, tells the recycler that the texture has no way to match the overlaps + * due to topological reasons. + **/ std::pair RecycleSurface(std::vector& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, const bool preserve_contents, @@ -343,6 +367,12 @@ private: } } + /** + * `RebuildSurface` this method takes a single surface and recreates into another that + * may differ in format, target or width alingment. + * @param current_surface, the registered surface in the cache which we want to convert. + * @param params, the new surface params which we'll use to recreate the surface. + **/ std::pair RebuildSurface(TSurface current_surface, const SurfaceParams& params) { const auto gpu_addr = current_surface->GetGpuAddr(); @@ -357,6 +387,14 @@ private: return {new_surface, new_surface->GetMainView()}; } + /** + * `ManageStructuralMatch` this method takes a single surface and checks with the new surface's + * params if it's an exact match, we return the main view of the registered surface. If it's + * formats don't match, we rebuild the surface. We call this last method a `Mirage`. If formats + * match but the targets don't, we create an overview View of the registered surface. + * @param current_surface, the registered surface in the cache which we want to convert. + * @param params, the new surface params which we want to check. + **/ std::pair ManageStructuralMatch(TSurface current_surface, const SurfaceParams& params) { const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); @@ -370,10 +408,18 @@ private: return {current_surface, current_surface->EmplaceOverview(params)}; } - std::optional> ReconstructSurface(std::vector& overlaps, - const SurfaceParams& params, - const GPUVAddr gpu_addr, - const u8* host_ptr) { + /** + * `TryReconstructSurface` unlike `RebuildSurface` where we know the registered surface + * matches the candidate in some way, we got no guarantess here. We try to see if the overlaps + * are sublayers/mipmaps of the new surface, if they all match we end up recreating a surface + * for them, else we return nothing. + * @param overlaps, the overlapping surfaces registered in the cache. + * @param params, the paremeters on the new surface. + * @param gpu_addr, the starting address of the new surface. + **/ + std::optional> TryReconstructSurface(std::vector& overlaps, + const SurfaceParams& params, + const GPUVAddr gpu_addr) { if (params.target == SurfaceTarget::Texture3D) { return {}; } @@ -412,12 +458,30 @@ private: return {{new_surface, new_surface->GetMainView()}}; } + /** + * `GetSurface` gets the starting address and parameters of a candidate surface and tries + * to find a matching surface within the cache. This is done in 3 big steps. The first is to + * check the 1st Level Cache in order to find an exact match, if we fail, we move to step 2. + * Step 2 is checking if there are any overlaps at all, if none, we just load the texture from + * memory else we move to step 3. Step 3 consists on figuring the relationship between the + * candidate texture and the overlaps. We divide the scenarios depending if there's 1 or many + * overlaps. If there's many, we just try to reconstruct a new surface out of them based on the + * candidate's parameters, if we fail, we recycle. When there's only 1 overlap then we have to + * check if the candidate is a view (layer/mipmap) of the overlap or if the registered surface + * is a mipmap/layer of the candidate. In this last case we reconstruct a new surface. + * @param gpu_addr, the starting address of the candidate surface. + * @param params, the paremeters on the candidate surface. + * @param preserve_contents, tells if the new surface should be loaded from meory or left blank. + **/ std::pair GetSurface(const GPUVAddr gpu_addr, const SurfaceParams& params, bool preserve_contents) { const auto host_ptr{memory_manager->GetPointer(gpu_addr)}; const auto cache_addr{ToCacheAddr(host_ptr)}; + // Step 1 + // Check Level 1 Cache for a fast structural match. If candidate surface + // matches at certain level we are pretty much done. if (l1_cache.count(cache_addr) > 0) { TSurface current_surface = l1_cache[cache_addr]; if (!current_surface->MatchesTopology(params)) { @@ -437,31 +501,43 @@ private: } } + // Step 2 + // Obtain all possible overlaps in the memory region const std::size_t candidate_size = params.GetGuestSizeInBytes(); auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)}; + // If none are found, we are done. we just load the surface and create it. if (overlaps.empty()) { return InitializeSurface(gpu_addr, params, preserve_contents); } + // Step 3 + // Now we need to figure the relationship between the texture and its overlaps + // we do a topological test to ensure we can find some relationship. If it fails + // inmediatly recycle the texture for (auto surface : overlaps) { if (!surface->MatchesTopology(params)) { return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, true); } } + // Split cases between 1 overlap or many. if (overlaps.size() == 1) { TSurface current_surface = overlaps[0]; + // First check if the surface is within the overlap. If not, it means + // two things either the candidate surface is a supertexture of the overlap + // or they don't match in any known way. if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { if (current_surface->GetGpuAddr() == gpu_addr) { std::optional> view = - ReconstructSurface(overlaps, params, gpu_addr, host_ptr); + TryReconstructSurface(overlaps, params, gpu_addr); if (view.has_value()) { return *view; } } return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } + // Now we check if the candidate is a mipmap/layer of the overlap std::optional view = current_surface->EmplaceView(params, gpu_addr, candidate_size); if (view.has_value()) { @@ -472,15 +548,18 @@ private: } return {current_surface, *view}; } - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } else { + // If there are many overlaps, odds are they are subtextures of the candidate + // surface. We try to construct a new surface based on the candidate parameters, + // using the overlaps. If a single overlap fails, this will fail. std::optional> view = - ReconstructSurface(overlaps, params, gpu_addr, host_ptr); + TryReconstructSurface(overlaps, params, gpu_addr); if (view.has_value()) { return *view; } - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } + // We failed all the tests, recycle the overlaps into a new texture. + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } std::pair InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params, -- cgit v1.2.3 From d267948a73d2364949660a24d07833ea05c9fcc8 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 14 May 2019 00:55:32 -0400 Subject: texture_cache: loose TryReconstructSurface when accurate GPU is not on. Also corrects some asserts. --- src/video_core/engines/maxwell_dma.cpp | 2 +- src/video_core/texture_cache/surface_base.cpp | 2 +- src/video_core/texture_cache/texture_cache.h | 20 ++++++++++++++++++-- 3 files changed, 20 insertions(+), 4 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 3a5dfef0c..afb9578d0 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -111,7 +111,7 @@ void MaxwellDMA::HandleCopy() { memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); } else { - ASSERT(regs.dst_params.BlockDepth() == 1); + ASSERT(regs.dst_params.BlockDepth() == 0); const u32 src_bytes_per_pixel = regs.src_pitch / regs.x_count; diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index ceff51043..d4aa2c54b 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -130,7 +130,7 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, return; } if (params.is_tiled) { - ASSERT_MSG(params.block_width == 1, "Block width is defined as {}", params.block_width); + ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width); for (u32 level = 0; level < params.num_levels; ++level) { const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)}; SwizzleFunc(MortonSwizzleMode::LinearToMorton, host_ptr, params, diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 04e9528b8..85c9160e0 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -425,6 +425,7 @@ private: } bool modified = false; TSurface new_surface = GetUncachedSurface(gpu_addr, params); + u32 passed_tests = 0; for (auto surface : overlaps) { const SurfaceParams& src_params = surface->GetSurfaceParams(); if (src_params.is_layered || src_params.num_levels > 1) { @@ -434,12 +435,12 @@ private: const std::size_t candidate_size = surface->GetSizeInBytes(); auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; if (!mipmap_layer) { - return {}; + continue; } const u32 layer{mipmap_layer->first}; const u32 mipmap{mipmap_layer->second}; if (new_surface->GetMipmapSize(mipmap) != candidate_size) { - return {}; + continue; } modified |= surface->IsModified(); // Now we got all the data set up @@ -448,8 +449,15 @@ private: const CopyParams copy_params(0, 0, 0, 0, 0, layer, 0, mipmap, std::min(src_params.width, dst_width), std::min(src_params.height, dst_height), 1); + passed_tests++; ImageCopy(surface, new_surface, copy_params); } + if (passed_tests == 0) { + return {}; + // In Accurate GPU all test should pass, else we recycle + } else if (Settings::values.use_accurate_gpu_emulation && passed_tests != overlaps.size()) { + return {}; + } for (auto surface : overlaps) { Unregister(surface); } @@ -548,6 +556,14 @@ private: } return {current_surface, *view}; } + // The next case is unsafe, so if we r in accurate GPU, just skip it + if (Settings::values.use_accurate_gpu_emulation) { + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); + } + // This is the case the texture is a part of the parent. + if (current_surface->MatchesSubTexture(params, gpu_addr)) { + return RebuildSurface(current_surface, params); + } } else { // If there are many overlaps, odds are they are subtextures of the candidate // surface. We try to construct a new surface based on the candidate parameters, -- cgit v1.2.3 From 175aa343ff1c9f931b266caf2d19b8df943dab0d Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 18 May 2019 04:57:49 -0400 Subject: texture_cache: Fermi2D reform and implement View Mirage This also does some fixes on compressed textures reinterpret and on the Fermi2D engine in general. --- src/video_core/engines/fermi_2d.cpp | 10 ++-- src/video_core/engines/fermi_2d.h | 44 +++++++++++++----- src/video_core/rasterizer_interface.h | 3 +- .../renderer_opengl/gl_framebuffer_cache.cpp | 8 ++-- src/video_core/renderer_opengl/gl_rasterizer.cpp | 5 +- src/video_core/renderer_opengl/gl_rasterizer.h | 3 +- .../renderer_opengl/gl_texture_cache.cpp | 54 ++++++++++------------ src/video_core/renderer_opengl/gl_texture_cache.h | 6 +-- src/video_core/texture_cache/surface_base.h | 15 ++++-- src/video_core/texture_cache/surface_params.h | 14 ++++++ src/video_core/texture_cache/texture_cache.h | 40 ++++++++++------ 11 files changed, 125 insertions(+), 77 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 55966eef1..d63b82838 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -4,7 +4,6 @@ #include "common/assert.h" #include "common/logging/log.h" -#include "common/math_util.h" #include "video_core/engines/fermi_2d.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" @@ -35,7 +34,7 @@ void Fermi2D::HandleSurfaceCopy() { static_cast(regs.operation)); // TODO(Subv): Only raw copies are implemented. - ASSERT(regs.operation == Regs::Operation::SrcCopy); + ASSERT(regs.operation == Operation::SrcCopy); const u32 src_blit_x1{static_cast(regs.blit_src_x >> 32)}; const u32 src_blit_y1{static_cast(regs.blit_src_y >> 32)}; @@ -48,8 +47,13 @@ void Fermi2D::HandleSurfaceCopy() { const Common::Rectangle dst_rect{regs.blit_dst_x, regs.blit_dst_y, regs.blit_dst_x + regs.blit_dst_width, regs.blit_dst_y + regs.blit_dst_height}; + Config copy_config; + copy_config.operation = regs.operation; + copy_config.filter = regs.blit_control.filter; + copy_config.src_rect = src_rect; + copy_config.dst_rect = dst_rect; - if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, src_rect, dst_rect)) { + if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, copy_config)) { UNIMPLEMENTED(); } } diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 3d28afa91..0a4c7c5ad 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -9,6 +9,7 @@ #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" +#include "common/math_util.h" #include "video_core/gpu.h" namespace Tegra { @@ -38,6 +39,26 @@ public: /// Write the value to the register identified by method. void CallMethod(const GPU::MethodCall& method_call); + enum class Origin : u32 { + Center = 0, + Corner = 1, + }; + + enum class Filter : u32 { + PointSample = 0, // Nearest + Linear = 1, + }; + + enum class Operation : u32 { + SrcCopyAnd = 0, + ROPAnd = 1, + Blend = 2, + SrcCopy = 3, + ROP = 4, + SrcCopyPremult = 5, + BlendPremult = 6, + }; + struct Regs { static constexpr std::size_t NUM_REGS = 0x258; @@ -76,16 +97,6 @@ public: }; static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size"); - enum class Operation : u32 { - SrcCopyAnd = 0, - ROPAnd = 1, - Blend = 2, - SrcCopy = 3, - ROP = 4, - SrcCopyPremult = 5, - BlendPremult = 6, - }; - union { struct { INSERT_PADDING_WORDS(0x80); @@ -102,7 +113,11 @@ public: INSERT_PADDING_WORDS(0x177); - u32 blit_control; + union { + u32 raw; + BitField<0, 1, Origin> origin; + BitField<4, 1, Filter> filter; + } blit_control; INSERT_PADDING_WORDS(0x8); @@ -121,6 +136,13 @@ public: }; } regs{}; + struct Config { + Operation operation; + Filter filter; + Common::Rectangle src_rect; + Common::Rectangle dst_rect; + }; + private: VideoCore::RasterizerInterface& rasterizer; MemoryManager& memory_manager; diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 3c18d3b1f..6007e8c2e 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -52,8 +52,7 @@ public: /// Attempt to use a faster method to perform a surface copy virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, const Tegra::Engines::Fermi2D::Regs::Surface& dst, - const Common::Rectangle& src_rect, - const Common::Rectangle& dst_rect) { + const Tegra::Engines::Fermi2D::Config& copy_config) { return false; } diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp index bb9f9b81f..7c926bd48 100644 --- a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp @@ -37,7 +37,7 @@ OGLFramebuffer FramebufferCacheOpenGL::CreateFramebuffer(const FramebufferCacheK if (key.is_single_buffer) { if (key.color_attachments[0] != GL_NONE && key.colors[0]) { - key.colors[0]->Attach(key.color_attachments[0]); + key.colors[0]->Attach(key.color_attachments[0], GL_DRAW_FRAMEBUFFER); glDrawBuffer(key.color_attachments[0]); } else { glDrawBuffer(GL_NONE); @@ -45,14 +45,16 @@ OGLFramebuffer FramebufferCacheOpenGL::CreateFramebuffer(const FramebufferCacheK } else { for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { if (key.colors[index]) { - key.colors[index]->Attach(GL_COLOR_ATTACHMENT0 + static_cast(index)); + key.colors[index]->Attach(GL_COLOR_ATTACHMENT0 + static_cast(index), + GL_DRAW_FRAMEBUFFER); } } glDrawBuffers(key.colors_count, key.color_attachments.data()); } if (key.zeta) { - key.zeta->Attach(key.stencil_enable ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT); + key.zeta->Attach(key.stencil_enable ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT, + GL_DRAW_FRAMEBUFFER); } return framebuffer; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 4f7eeb22c..d613cb1dc 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -696,10 +696,9 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, const Tegra::Engines::Fermi2D::Regs::Surface& dst, - const Common::Rectangle& src_rect, - const Common::Rectangle& dst_rect) { + const Tegra::Engines::Fermi2D::Config& copy_config) { MICROPROFILE_SCOPE(OpenGL_Blits); - texture_cache.DoFermiCopy(src, dst, src_rect, dst_rect); + texture_cache.DoFermiCopy(src, dst, copy_config); return true; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 64c27660f..33582ac42 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -67,8 +67,7 @@ public: void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, const Tegra::Engines::Fermi2D::Regs::Surface& dst, - const Common::Rectangle& src_rect, - const Common::Rectangle& dst_rect) override; + const Tegra::Engines::Fermi2D::Config& copy_config) override; bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) override; bool AccelerateDrawBatch(bool is_indexed) override; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 197c9f02c..9e9734f9e 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -378,26 +378,26 @@ CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& p CachedSurfaceView::~CachedSurfaceView() = default; -void CachedSurfaceView::Attach(GLenum attachment) const { +void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const { ASSERT(params.num_layers == 1 && params.num_levels == 1); const auto& owner_params = surface.GetSurfaceParams(); switch (owner_params.target) { case SurfaceTarget::Texture1D: - glFramebufferTexture1D(GL_DRAW_FRAMEBUFFER, attachment, surface.GetTarget(), - surface.GetTexture(), params.base_level); + glFramebufferTexture1D(target, attachment, surface.GetTarget(), surface.GetTexture(), + params.base_level); break; case SurfaceTarget::Texture2D: - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, attachment, surface.GetTarget(), - surface.GetTexture(), params.base_level); + glFramebufferTexture2D(target, attachment, surface.GetTarget(), surface.GetTexture(), + params.base_level); break; case SurfaceTarget::Texture1DArray: case SurfaceTarget::Texture2DArray: case SurfaceTarget::TextureCubemap: case SurfaceTarget::TextureCubeArray: - glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, attachment, surface.GetTexture(), - params.base_level, params.base_layer); + glFramebufferTextureLayer(target, attachment, surface.GetTexture(), params.base_level, + params.base_layer); break; default: UNIMPLEMENTED(); @@ -460,11 +460,10 @@ void TextureCacheOpenGL::ImageCopy(Surface src_surface, Surface dst_surface, copy_params.depth); } -void TextureCacheOpenGL::ImageBlit(Surface src_surface, Surface dst_surface, - const Common::Rectangle& src_rect, - const Common::Rectangle& dst_rect) { - const auto& src_params{src_surface->GetSurfaceParams()}; - const auto& dst_params{dst_surface->GetSurfaceParams()}; +void TextureCacheOpenGL::ImageBlit(View src_view, View dst_view, + const Tegra::Engines::Fermi2D::Config& copy_config) { + const auto& src_params{src_view->GetSurfaceParams()}; + const auto& dst_params{dst_view->GetSurfaceParams()}; OpenGLState prev_state{OpenGLState::GetCurState()}; SCOPE_EXIT({ prev_state.Apply(); }); @@ -476,51 +475,46 @@ void TextureCacheOpenGL::ImageBlit(Surface src_surface, Surface dst_surface, u32 buffers{}; - UNIMPLEMENTED_IF(src_params.target != SurfaceTarget::Texture2D); - UNIMPLEMENTED_IF(dst_params.target != SurfaceTarget::Texture2D); - - const GLuint src_texture{src_surface->GetTexture()}; - const GLuint dst_texture{dst_surface->GetTexture()}; + UNIMPLEMENTED_IF(src_params.target == SurfaceTarget::Texture3D); + UNIMPLEMENTED_IF(dst_params.target == SurfaceTarget::Texture3D); if (src_params.type == SurfaceType::ColorTexture) { - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, - src_texture, 0); + src_view->Attach(GL_COLOR_ATTACHMENT0, GL_READ_FRAMEBUFFER); glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, - dst_texture, 0); + dst_view->Attach(GL_COLOR_ATTACHMENT0, GL_DRAW_FRAMEBUFFER); glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); buffers = GL_COLOR_BUFFER_BIT; } else if (src_params.type == SurfaceType::Depth) { glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, src_texture, - 0); + src_view->Attach(GL_DEPTH_ATTACHMENT, GL_READ_FRAMEBUFFER); glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_texture, - 0); + dst_view->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER); glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); buffers = GL_DEPTH_BUFFER_BIT; } else if (src_params.type == SurfaceType::DepthStencil) { glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - src_texture, 0); + src_view->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_READ_FRAMEBUFFER); glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - dst_texture, 0); + dst_view->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_DRAW_FRAMEBUFFER); buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; } + const Common::Rectangle& src_rect = copy_config.src_rect; + const Common::Rectangle& dst_rect = copy_config.dst_rect; + const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear; + glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, buffers, - buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST); + is_linear ? GL_LINEAR : GL_NEAREST); } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 0ba42dbab..0b333e9e3 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -73,7 +73,7 @@ public: ~CachedSurfaceView(); /// Attaches this texture view to the current bound GL_DRAW_FRAMEBUFFER - void Attach(GLenum attachment) const; + void Attach(GLenum attachment, GLenum target) const; GLuint GetTexture() { if (is_proxy) { @@ -138,8 +138,8 @@ protected: void ImageCopy(Surface src_surface, Surface dst_surface, const VideoCommon::CopyParams& copy_params) override; - void ImageBlit(Surface src_surface, Surface dst_surface, const Common::Rectangle& src_rect, - const Common::Rectangle& dst_rect) override; + void ImageBlit(View src_view, View dst_view, + const Tegra::Engines::Fermi2D::Config& copy_config) override; private: OGLFramebuffer src_framebuffer; diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index cb7f22706..a3dd1c607 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -126,14 +126,19 @@ public: return MatchStructureResult::None; } // Tiled surface - if (std::tie(params.height, params.depth, params.block_width, params.block_height, - params.block_depth, params.tile_width_spacing, params.num_levels) == - std::tie(rhs.height, rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth, + if (std::tie(params.depth, params.block_width, params.block_height, params.block_depth, + params.tile_width_spacing, params.num_levels) == + std::tie(rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth, rhs.tile_width_spacing, rhs.num_levels)) { - if (params.width == rhs.width) { + if (std::tie(params.width, params.height) == std::tie(rhs.width, rhs.height)) { return MatchStructureResult::FullMatch; } - if (params.GetBlockAlignedWidth() == rhs.GetBlockAlignedWidth()) { + const u32 ws = SurfaceParams::ConvertWidth(rhs.GetBlockAlignedWidth(), + params.pixel_format, rhs.pixel_format); + const u32 hs = + SurfaceParams::ConvertHeight(rhs.height, params.pixel_format, rhs.pixel_format); + const u32 w1 = params.GetBlockAlignedWidth(); + if (std::tie(w1, params.height) == std::tie(ws, hs)) { return MatchStructureResult::SemiMatch; } } diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index b3082173f..13a08a60f 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -126,6 +126,20 @@ public: /// Returns the size of a layer in bytes in host memory for a given mipmap level. std::size_t GetHostLayerSize(u32 level) const; + static u32 ConvertWidth(u32 width, VideoCore::Surface::PixelFormat pixel_format_from, + VideoCore::Surface::PixelFormat pixel_format_to) { + const u32 bw1 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_from); + const u32 bw2 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_to); + return (width * bw2 + bw1 - 1) / bw1; + } + + static u32 ConvertHeight(u32 height, VideoCore::Surface::PixelFormat pixel_format_from, + VideoCore::Surface::PixelFormat pixel_format_to) { + const u32 bh1 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_from); + const u32 bh2 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_to); + return (height * bh2 + bh1 - 1) / bh1; + } + /// Returns the default block width. u32 GetDefaultBlockWidth() const { return VideoCore::Surface::GetDefaultBlockWidth(pixel_format); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 85c9160e0..593ceeaf6 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -141,11 +141,6 @@ public: return {}; } - if (regs.color_mask[index].raw == 0) { - SetEmptyColorBuffer(index); - return {}; - } - const auto& config{regs.rt[index]}; const auto gpu_addr{config.Address()}; if (!gpu_addr) { @@ -192,11 +187,11 @@ public: void DoFermiCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, - const Common::Rectangle& src_rect, - const Common::Rectangle& dst_rect) { - TSurface dst_surface = GetFermiSurface(dst_config); - ImageBlit(GetFermiSurface(src_config), dst_surface, src_rect, dst_rect); - dst_surface->MarkAsModified(true, Tick()); + const Tegra::Engines::Fermi2D::Config& copy_config) { + std::pair dst_surface = GetFermiSurface(dst_config); + std::pair src_surface = GetFermiSurface(src_config); + ImageBlit(src_surface.second, dst_surface.second, copy_config); + dst_surface.first->MarkAsModified(true, Tick()); } TSurface TryFindFramebufferSurface(const u8* host_ptr) { @@ -234,8 +229,8 @@ protected: virtual void ImageCopy(TSurface src_surface, TSurface dst_surface, const CopyParams& copy_params) = 0; - virtual void ImageBlit(TSurface src, TSurface dst, const Common::Rectangle& src_rect, - const Common::Rectangle& dst_rect) = 0; + virtual void ImageBlit(TView src_view, TView dst_view, + const Tegra::Engines::Fermi2D::Config& copy_config) = 0; void Register(TSurface surface) { std::lock_guard lock{mutex}; @@ -282,10 +277,11 @@ protected: return new_surface; } - TSurface GetFermiSurface(const Tegra::Engines::Fermi2D::Regs::Surface& config) { + std::pair GetFermiSurface( + const Tegra::Engines::Fermi2D::Regs::Surface& config) { SurfaceParams params = SurfaceParams::CreateForFermiCopySurface(config); const GPUVAddr gpu_addr = config.Address(); - return GetSurface(gpu_addr, params, true).first; + return GetSurface(gpu_addr, params, true); } Core::System& system; @@ -551,7 +547,21 @@ private: if (view.has_value()) { const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); if (is_mirage) { - LOG_CRITICAL(HW_GPU, "Mirage View Unsupported"); + // On a mirage view, we need to recreate the surface under this new view + // and then obtain a view again. + SurfaceParams new_params = current_surface->GetSurfaceParams(); + const u32 wh = SurfaceParams::ConvertWidth( + new_params.width, new_params.pixel_format, params.pixel_format); + const u32 hh = SurfaceParams::ConvertHeight( + new_params.height, new_params.pixel_format, params.pixel_format); + new_params.width = wh; + new_params.height = hh; + new_params.pixel_format = params.pixel_format; + std::pair pair = RebuildSurface(current_surface, new_params); + std::optional mirage_view = + pair.first->EmplaceView(params, gpu_addr, candidate_size); + if (mirage_view) + return {pair.first, *mirage_view}; return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } return {current_surface, *view}; -- cgit v1.2.3 From fcac55d5bff025fee822c2e7b0e06cdc178143dc Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 21 May 2019 07:56:53 -0400 Subject: texture_cache: Add checks for texture buffers. --- src/video_core/texture_cache/surface_base.h | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index a3dd1c607..210f27907 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -114,10 +114,23 @@ public: bool MatchesTopology(const SurfaceParams& rhs) const { const u32 src_bpp{params.GetBytesPerPixel()}; const u32 dst_bpp{rhs.GetBytesPerPixel()}; - return std::tie(src_bpp, params.is_tiled) == std::tie(dst_bpp, rhs.is_tiled); + const bool ib1 = params.IsBuffer(); + const bool ib2 = rhs.IsBuffer(); + return std::tie(src_bpp, params.is_tiled, ib1) == std::tie(dst_bpp, rhs.is_tiled, ib2); } MatchStructureResult MatchesStructure(const SurfaceParams& rhs) const { + // Buffer surface Check + if (params.IsBuffer()) { + const std::size_t wd1 = params.width*params.GetBytesPerPixel(); + const std::size_t wd2 = rhs.width*rhs.GetBytesPerPixel(); + if (wd1 == wd2) { + return MatchStructureResult::FullMatch; + } + return MatchStructureResult::None; + } + + // Linear Surface check if (!params.is_tiled) { if (std::tie(params.width, params.height, params.pitch) == std::tie(rhs.width, rhs.height, rhs.pitch)) { @@ -125,7 +138,8 @@ public: } return MatchStructureResult::None; } - // Tiled surface + + // Tiled Surface check if (std::tie(params.depth, params.block_width, params.block_height, params.block_depth, params.tile_width_spacing, params.num_levels) == std::tie(rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth, -- cgit v1.2.3 From e60ed2bb3e7e4ce63cc263019cce72a080c536ed Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 21 May 2019 08:36:00 -0400 Subject: texture_cache: return null surface on invalid address --- src/video_core/texture_cache/texture_cache.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 593ceeaf6..24c87127d 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -483,6 +483,18 @@ private: const auto host_ptr{memory_manager->GetPointer(gpu_addr)}; const auto cache_addr{ToCacheAddr(host_ptr)}; + // Step 0: guarantee a valid surface + if (!cache_addr) { + // Return a null surface if it's invalid + SurfaceParams new_params = params; + new_params.width = 1; + new_params.height = 1; + new_params.depth = 1; + new_params.block_height = 0; + new_params.block_depth = 0; + return InitializeSurface(gpu_addr, new_params, false); + } + // Step 1 // Check Level 1 Cache for a fast structural match. If candidate surface // matches at certain level we are pretty much done. -- cgit v1.2.3 From bdf9faab331cd79ca5c5e51c2369fc801e8cecea Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 21 May 2019 11:24:20 -0400 Subject: texture_cache: Handle uncontinuous surfaces. --- src/video_core/memory_manager.cpp | 5 +-- src/video_core/texture_cache/surface_base.cpp | 47 ++++++++++++++++++++++----- src/video_core/texture_cache/surface_base.h | 34 ++++++++++++++++--- src/video_core/texture_cache/texture_cache.h | 17 ++++++---- 4 files changed, 82 insertions(+), 21 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 5d8d126c1..322453116 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -202,11 +202,12 @@ const u8* MemoryManager::GetPointer(GPUVAddr addr) const { } bool MemoryManager::IsBlockContinuous(const GPUVAddr start, const std::size_t size) const { - const GPUVAddr end = start + size; + const std::size_t inner_size = size - 1; + const GPUVAddr end = start + inner_size; const auto host_ptr_start = reinterpret_cast(GetPointer(start)); const auto host_ptr_end = reinterpret_cast(GetPointer(end)); const auto range = static_cast(host_ptr_end - host_ptr_start); - return range == size; + return range == inner_size; } void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const { diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index d4aa2c54b..7e90960f7 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -68,12 +68,27 @@ void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, const Surf } void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, - std::vector& staging_buffer) { + StagingCache& staging_cache) { MICROPROFILE_SCOPE(GPU_Load_Texture); - const auto host_ptr{memory_manager.GetPointer(gpu_addr)}; - if (!host_ptr) { - return; + auto& staging_buffer = staging_cache.GetBuffer(0); + u8* host_ptr; + is_continuous = memory_manager.IsBlockContinuous(gpu_addr, guest_memory_size); + + // Handle continuouty + if (is_continuous) { + // Use physical memory directly + host_ptr = memory_manager.GetPointer(gpu_addr); + if (!host_ptr) { + return; + } + } else { + // Use an extra temporal buffer + auto& tmp_buffer = staging_cache.GetBuffer(1); + tmp_buffer.resize(guest_memory_size); + host_ptr = tmp_buffer.data(); + memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); } + if (params.is_tiled) { ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}", params.block_width, static_cast(params.target)); @@ -123,12 +138,25 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, } void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, - std::vector& staging_buffer) { + StagingCache& staging_cache) { MICROPROFILE_SCOPE(GPU_Flush_Texture); - const auto host_ptr{memory_manager.GetPointer(gpu_addr)}; - if (!host_ptr) { - return; + auto& staging_buffer = staging_cache.GetBuffer(0); + u8* host_ptr; + + // Handle continuouty + if (is_continuous) { + // Use physical memory directly + host_ptr = memory_manager.GetPointer(gpu_addr); + if (!host_ptr) { + return; + } + } else { + // Use an extra temporal buffer + auto& tmp_buffer = staging_cache.GetBuffer(1); + tmp_buffer.resize(guest_memory_size); + host_ptr = tmp_buffer.data(); } + if (params.is_tiled) { ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width); for (u32 level = 0; level < params.num_levels; ++level) { @@ -154,6 +182,9 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, } } } + if (!is_continuous) { + memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); + } } } // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 210f27907..dacbc97c7 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -32,11 +32,28 @@ enum class MatchStructureResult : u32 { None = 2, }; +class StagingCache { +public: + StagingCache() {} + ~StagingCache() = default; + + std::vector& GetBuffer(std::size_t index) { + return staging_buffer[index]; + } + + void SetSize(std::size_t size) { + staging_buffer.resize(size); + } + +private: + std::vector> staging_buffer; +}; + class SurfaceBaseImpl { public: - void LoadBuffer(Tegra::MemoryManager& memory_manager, std::vector& staging_buffer); + void LoadBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache); - void FlushBuffer(Tegra::MemoryManager& memory_manager, std::vector& staging_buffer); + void FlushBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache); GPUVAddr GetGpuAddr() const { return gpu_addr; @@ -93,6 +110,14 @@ public: return mipmap_sizes[level]; } + void MarkAsContinuous(const bool is_continuous) { + this->is_continuous = is_continuous; + } + + bool IsContinuous() const { + return is_continuous; + } + bool IsLinear() const { return !params.is_tiled; } @@ -122,8 +147,8 @@ public: MatchStructureResult MatchesStructure(const SurfaceParams& rhs) const { // Buffer surface Check if (params.IsBuffer()) { - const std::size_t wd1 = params.width*params.GetBytesPerPixel(); - const std::size_t wd2 = rhs.width*rhs.GetBytesPerPixel(); + const std::size_t wd1 = params.width * params.GetBytesPerPixel(); + const std::size_t wd2 = rhs.width * rhs.GetBytesPerPixel(); if (wd1 == wd2) { return MatchStructureResult::FullMatch; } @@ -193,6 +218,7 @@ protected: CacheAddr cache_addr{}; CacheAddr cache_addr_end{}; VAddr cpu_addr{}; + bool is_continuous{}; std::vector mipmap_sizes; std::vector mipmap_offsets; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 24c87127d..ab4e094ea 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -220,6 +220,7 @@ protected: SetEmptyColorBuffer(i); } SetEmptyDepthBuffer(); + staging_cache.SetSize(2); } ~TextureCache() = default; @@ -244,6 +245,8 @@ protected: gpu_addr); return; } + bool continuouty = memory_manager->IsBlockContinuous(gpu_addr, size); + surface->MarkAsContinuous(continuouty); surface->SetCacheAddr(cache_ptr); surface->SetCpuAddr(*cpu_addr); RegisterInnerCache(surface); @@ -611,9 +614,9 @@ private: } void LoadSurface(const TSurface& surface) { - staging_buffer.resize(surface->GetHostSizeInBytes()); - surface->LoadBuffer(*memory_manager, staging_buffer); - surface->UploadTexture(staging_buffer); + staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); + surface->LoadBuffer(*memory_manager, staging_cache); + surface->UploadTexture(staging_cache.GetBuffer(0)); surface->MarkAsModified(false, Tick()); } @@ -621,9 +624,9 @@ private: if (!surface->IsModified()) { return; } - staging_buffer.resize(surface->GetHostSizeInBytes()); - surface->DownloadTexture(staging_buffer); - surface->FlushBuffer(*memory_manager, staging_buffer); + staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); + surface->DownloadTexture(staging_cache.GetBuffer(0)); + surface->FlushBuffer(*memory_manager, staging_cache); surface->MarkAsModified(false, Tick()); } @@ -723,7 +726,7 @@ private: render_targets; FramebufferTargetInfo depth_buffer; - std::vector staging_buffer; + StagingCache staging_cache; std::recursive_mutex mutex; }; -- cgit v1.2.3 From ea1525dab1bf7e9e56471b6d5fd50014bfeb4f96 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 21 May 2019 12:48:28 -0400 Subject: Fix rebase errors --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 10 ++++++++-- src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | 2 +- src/video_core/texture_cache/surface_params.h | 4 ++++ 3 files changed, 13 insertions(+), 3 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index d613cb1dc..8fe115aec 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -822,8 +822,14 @@ TextureBufferUsage RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, c unit.sampler = sampler_cache.GetSampler(texture.tsc); if (const auto view{texture_cache.GetTextureSurface(texture, entry)}; view) { - view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, - texture.tic.w_source); + if (view->GetSurfaceParams().IsBuffer()) { + // Record that this texture is a texture buffer. + texture_buffer_usage.set(bindpoint); + } else { + // Apply swizzle to textures that are not buffers. + view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, + texture.tic.w_source); + } state.texture_units[current_bindpoint].texture = view->GetTexture(); } else { // Can occur when texture addr is null or its memory is unmapped/invalid diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 51d9aae94..5ec911adc 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -291,7 +291,7 @@ std::optional ShaderDiskCacheOpenGL::LoadDecompiledEn } ShaderDiskCacheDecompiled entry; - entry.code = std::move(code); + entry.code = std::string(reinterpret_cast(code.data()), code_size); u32 const_buffers_count{}; if (!LoadObjectFromPrecompiled(const_buffers_count)) { diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index 13a08a60f..d9aa0b521 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -167,6 +167,10 @@ public: return VideoCore::Surface::GetFormatCompressionType(pixel_format); } + bool IsBuffer() const { + return target == VideoCore::Surface::SurfaceTarget::TextureBuffer; + } + std::string TargetName() const; bool is_tiled; -- cgit v1.2.3 From 0966665fc225eee29b3ed87baefd74f79c19d307 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 22 May 2019 12:30:53 -0400 Subject: texture_cache: Only load on recycle with accurate GPU. Testing so far has proven this to be quite safe as texture memory read added a 2-5ms load to the current cache. --- src/video_core/texture_cache/texture_cache.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index ab4e094ea..685bd28f4 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -342,12 +342,13 @@ private: const SurfaceParams& params, const GPUVAddr gpu_addr, const bool preserve_contents, const bool untopological) { + const bool do_load = Settings::values.use_accurate_gpu_emulation && preserve_contents; for (auto surface : overlaps) { Unregister(surface); } switch (PickStrategy(overlaps, params, gpu_addr, untopological)) { case RecycleStrategy::Ignore: { - return InitializeSurface(gpu_addr, params, preserve_contents); + return InitializeSurface(gpu_addr, params, do_load); } case RecycleStrategy::Flush: { std::sort(overlaps.begin(), overlaps.end(), @@ -361,7 +362,7 @@ private: } default: { UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!"); - return InitializeSurface(gpu_addr, params, preserve_contents); + return InitializeSurface(gpu_addr, params, do_load); } } } -- cgit v1.2.3 From 92513541529e90f4f79a1f2c3f8ccf5a199e4c20 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 24 May 2019 11:59:23 -0400 Subject: texture_cache: Correct copying between compressed and uncompressed formats --- src/video_core/texture_cache/surface_base.h | 9 ++++----- src/video_core/texture_cache/surface_params.h | 20 ++++++++++++++++++++ src/video_core/texture_cache/texture_cache.h | 8 +++----- 3 files changed, 27 insertions(+), 10 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index dacbc97c7..77c2d6758 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -235,9 +235,8 @@ private: for (u32 layer = 0; layer < layers; layer++) { for (u32 level = 0; level < mipmaps; level++) { - const u32 width{std::min(params.GetMipWidth(level), in_params.GetMipWidth(level))}; - const u32 height{ - std::min(params.GetMipHeight(level), in_params.GetMipHeight(level))}; + const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level); + const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level); result.emplace_back(width, height, layer, level); } } @@ -250,8 +249,8 @@ private: result.reserve(mipmaps); for (u32 level = 0; level < mipmaps; level++) { - const u32 width{std::min(params.GetMipWidth(level), in_params.GetMipWidth(level))}; - const u32 height{std::min(params.GetMipHeight(level), in_params.GetMipHeight(level))}; + const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level); + const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level); const u32 depth{std::min(params.GetMipDepth(level), in_params.GetMipDepth(level))}; result.emplace_back(width, height, depth, level); } diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index d9aa0b521..c3affd621 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -140,6 +140,26 @@ public: return (height * bh2 + bh1 - 1) / bh1; } + // this finds the maximun possible width between 2 2D layers of different formats + static u32 IntersectWidth(const SurfaceParams& src_params, const SurfaceParams& dst_params, + const u32 src_level, const u32 dst_level) { + const u32 bw1 = src_params.GetDefaultBlockWidth(); + const u32 bw2 = dst_params.GetDefaultBlockWidth(); + const u32 t_src_width = (src_params.GetMipWidth(src_level) * bw2 + bw1 - 1) / bw1; + const u32 t_dst_width = (dst_params.GetMipWidth(dst_level) * bw1 + bw2 - 1) / bw2; + return std::min(t_src_width, t_dst_width); + } + + // this finds the maximun possible height between 2 2D layers of different formats + static u32 IntersectHeight(const SurfaceParams& src_params, const SurfaceParams& dst_params, + const u32 src_level, const u32 dst_level) { + const u32 bh1 = src_params.GetDefaultBlockHeight(); + const u32 bh2 = dst_params.GetDefaultBlockHeight(); + const u32 t_src_height = (src_params.GetMipHeight(src_level) * bh2 + bh1 - 1) / bh1; + const u32 t_dst_height = (dst_params.GetMipHeight(dst_level) * bh1 + bh2 - 1) / bh2; + return std::min(t_src_height, t_dst_height); + } + /// Returns the default block width. u32 GetDefaultBlockWidth() const { return VideoCore::Surface::GetDefaultBlockWidth(pixel_format); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 685bd28f4..d2093e581 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -444,11 +444,9 @@ private: } modified |= surface->IsModified(); // Now we got all the data set up - const u32 dst_width{params.GetMipWidth(mipmap)}; - const u32 dst_height{params.GetMipHeight(mipmap)}; - const CopyParams copy_params(0, 0, 0, 0, 0, layer, 0, mipmap, - std::min(src_params.width, dst_width), - std::min(src_params.height, dst_height), 1); + const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap); + const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap); + const CopyParams copy_params(0, 0, 0, 0, 0, layer, 0, mipmap, width, height, 1); passed_tests++; ImageCopy(surface, new_surface, copy_params); } -- cgit v1.2.3 From 228f516bb4426a41a4d1c1756751557f7a0eecda Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 24 May 2019 15:34:31 -0400 Subject: texture_cache uncompress-compress is untopological. This makes conflicts between non compress and compress textures to be auto recycled. It also limits the amount of mipmaps a texture can have if it goes above it's limit. --- .../renderer_opengl/gl_texture_cache.cpp | 10 ++++----- src/video_core/texture_cache/surface_base.h | 18 +++++++++++++-- src/video_core/texture_cache/surface_params.cpp | 4 ++++ src/video_core/texture_cache/surface_params.h | 14 ++++++++++++ src/video_core/texture_cache/texture_cache.h | 26 ++++++++++++---------- 5 files changed, 53 insertions(+), 19 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 9e9734f9e..e6f08a764 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -195,17 +195,17 @@ OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum inte switch (params.target) { case SurfaceTarget::Texture1D: - glTextureStorage1D(texture.handle, params.num_levels, internal_format, params.width); + glTextureStorage1D(texture.handle, params.emulated_levels, internal_format, params.width); break; case SurfaceTarget::Texture2D: case SurfaceTarget::TextureCubemap: - glTextureStorage2D(texture.handle, params.num_levels, internal_format, params.width, + glTextureStorage2D(texture.handle, params.emulated_levels, internal_format, params.width, params.height); break; case SurfaceTarget::Texture3D: case SurfaceTarget::Texture2DArray: case SurfaceTarget::TextureCubeArray: - glTextureStorage3D(texture.handle, params.num_levels, internal_format, params.width, + glTextureStorage3D(texture.handle, params.emulated_levels, internal_format, params.width, params.height, params.depth); break; default: @@ -245,7 +245,7 @@ void CachedSurface::DownloadTexture(std::vector& staging_buffer) { // TODO(Rodrigo): Optimize alignment SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); }); - for (u32 level = 0; level < params.num_levels; ++level) { + for (u32 level = 0; level < params.emulated_levels; ++level) { glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level))); glPixelStorei(GL_PACK_ROW_LENGTH, static_cast(params.GetMipWidth(level))); const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level); @@ -264,7 +264,7 @@ void CachedSurface::DownloadTexture(std::vector& staging_buffer) { void CachedSurface::UploadTexture(std::vector& staging_buffer) { MICROPROFILE_SCOPE(OpenGL_Texture_Upload); SCOPE_EXIT({ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); }); - for (u32 level = 0; level < params.num_levels; ++level) { + for (u32 level = 0; level < params.emulated_levels; ++level) { UploadTextureMipmap(level, staging_buffer); } } diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 77c2d6758..70b5258c9 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -32,6 +32,12 @@ enum class MatchStructureResult : u32 { None = 2, }; +enum class MatchTopologyResult : u32 { + FullMatch = 0, + CompressUnmatch = 1, + None = 2, +}; + class StagingCache { public: StagingCache() {} @@ -136,12 +142,20 @@ public: params.target == SurfaceTarget::Texture2D && params.num_levels == 1; } - bool MatchesTopology(const SurfaceParams& rhs) const { + MatchTopologyResult MatchesTopology(const SurfaceParams& rhs) const { const u32 src_bpp{params.GetBytesPerPixel()}; const u32 dst_bpp{rhs.GetBytesPerPixel()}; const bool ib1 = params.IsBuffer(); const bool ib2 = rhs.IsBuffer(); - return std::tie(src_bpp, params.is_tiled, ib1) == std::tie(dst_bpp, rhs.is_tiled, ib2); + if (std::tie(src_bpp, params.is_tiled, ib1) == std::tie(dst_bpp, rhs.is_tiled, ib2)) { + const bool cb1 = params.IsCompressed(); + const bool cb2 = rhs.IsCompressed(); + if (cb1 == cb2) { + return MatchTopologyResult::FullMatch; + } + return MatchTopologyResult::CompressUnmatch; + } + return MatchTopologyResult::None; } MatchStructureResult MatchesStructure(const SurfaceParams& rhs) const { diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index d9d157d02..77c09264a 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -85,6 +85,7 @@ SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, } params.pitch = params.is_tiled ? 0 : config.tic.Pitch(); params.num_levels = config.tic.max_mip_level + 1; + params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap()); params.is_layered = params.IsLayered(); return params; } @@ -109,6 +110,7 @@ SurfaceParams SurfaceParams::CreateForDepthBuffer( params.depth = 1; params.pitch = 0; params.num_levels = 1; + params.emulated_levels = 1; params.is_layered = false; return params; } @@ -139,6 +141,7 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz params.depth = 1; params.target = SurfaceTarget::Texture2D; params.num_levels = 1; + params.emulated_levels = 1; params.is_layered = false; return params; } @@ -163,6 +166,7 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface( params.target = SurfaceTarget::Texture2D; params.depth = 1; params.num_levels = 1; + params.emulated_levels = 1; params.is_layered = params.IsLayered(); return params; } diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index c3affd621..5fde695b6 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -160,6 +160,19 @@ public: return std::min(t_src_height, t_dst_height); } + u32 MaxPossibleMipmap() const { + const u32 max_mipmap_w = Common::Log2Ceil32(width) + 1U; + const u32 max_mipmap_h = Common::Log2Ceil32(height) + 1U; + const u32 max_mipmap = std::max(max_mipmap_w, max_mipmap_h); + if (target != VideoCore::Surface::SurfaceTarget::Texture3D) + return max_mipmap; + return std::max(max_mipmap, Common::Log2Ceil32(depth) + 1U); + } + + bool IsCompressed() const { + return GetDefaultBlockHeight() > 1 || GetDefaultBlockWidth() > 1; + } + /// Returns the default block width. u32 GetDefaultBlockWidth() const { return VideoCore::Surface::GetDefaultBlockWidth(pixel_format); @@ -205,6 +218,7 @@ public: u32 depth; u32 pitch; u32 num_levels; + u32 emulated_levels; VideoCore::Surface::PixelFormat pixel_format; VideoCore::Surface::ComponentType component_type; VideoCore::Surface::SurfaceType type; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index d2093e581..69ef7a2bd 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -305,7 +305,7 @@ private: * due to topological reasons. **/ RecycleStrategy PickStrategy(std::vector& overlaps, const SurfaceParams& params, - const GPUVAddr gpu_addr, const bool untopological) { + const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { if (Settings::values.use_accurate_gpu_emulation) { return RecycleStrategy::Flush; } @@ -320,8 +320,8 @@ private: } } // Untopological decision - if (untopological) { - return RecycleStrategy::Ignore; + if (untopological == MatchTopologyResult::CompressUnmatch) { + return RecycleStrategy::Flush; } return RecycleStrategy::Ignore; } @@ -341,7 +341,7 @@ private: std::pair RecycleSurface(std::vector& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, const bool preserve_contents, - const bool untopological) { + const MatchTopologyResult untopological) { const bool do_load = Settings::values.use_accurate_gpu_emulation && preserve_contents; for (auto surface : overlaps) { Unregister(surface); @@ -502,9 +502,10 @@ private: // matches at certain level we are pretty much done. if (l1_cache.count(cache_addr) > 0) { TSurface current_surface = l1_cache[cache_addr]; - if (!current_surface->MatchesTopology(params)) { + auto topological_result = current_surface->MatchesTopology(params); + if (topological_result != MatchTopologyResult::FullMatch) { std::vector overlaps{current_surface}; - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, true); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, topological_result); } MatchStructureResult s_result = current_surface->MatchesStructure(params); if (s_result != MatchStructureResult::None && @@ -534,8 +535,9 @@ private: // we do a topological test to ensure we can find some relationship. If it fails // inmediatly recycle the texture for (auto surface : overlaps) { - if (!surface->MatchesTopology(params)) { - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, true); + auto topological_result = surface->MatchesTopology(params); + if (topological_result != MatchTopologyResult::FullMatch) { + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, topological_result); } } @@ -553,7 +555,7 @@ private: return *view; } } - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); } // Now we check if the candidate is a mipmap/layer of the overlap std::optional view = @@ -576,13 +578,13 @@ private: pair.first->EmplaceView(params, gpu_addr, candidate_size); if (mirage_view) return {pair.first, *mirage_view}; - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); } return {current_surface, *view}; } // The next case is unsafe, so if we r in accurate GPU, just skip it if (Settings::values.use_accurate_gpu_emulation) { - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); } // This is the case the texture is a part of the parent. if (current_surface->MatchesSubTexture(params, gpu_addr)) { @@ -599,7 +601,7 @@ private: } } // We failed all the tests, recycle the overlaps into a new texture. - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); } std::pair InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params, -- cgit v1.2.3 From 60bf761afbb125abd324e4b798d18a1611b5777b Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 1 Jun 2019 19:12:00 -0400 Subject: texture_cache: Implement Buffer Copy and detect Turing GPUs Image Copies --- src/video_core/renderer_opengl/gl_device.cpp | 1 + src/video_core/renderer_opengl/gl_device.h | 5 ++ src/video_core/renderer_opengl/gl_rasterizer.cpp | 2 +- .../renderer_opengl/gl_resource_manager.cpp | 8 ++ .../renderer_opengl/gl_resource_manager.h | 3 + .../renderer_opengl/gl_texture_cache.cpp | 92 +++++++++++++++++++++- src/video_core/renderer_opengl/gl_texture_cache.h | 9 ++- src/video_core/texture_cache/texture_cache.h | 40 +++++++--- 8 files changed, 148 insertions(+), 12 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 65a88b06c..ad15ea54e 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -28,6 +28,7 @@ Device::Device() { max_varyings = GetInteger(GL_MAX_VARYING_VECTORS); has_variable_aoffi = TestVariableAoffi(); has_component_indexing_bug = TestComponentIndexingBug(); + is_turing_plus = GLAD_GL_NV_mesh_shader; } Device::Device(std::nullptr_t) { diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 8c8c93760..1afe16779 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -34,6 +34,10 @@ public: return has_component_indexing_bug; } + bool IsTuringGPU() const { + return is_turing_plus; + } + private: static bool TestVariableAoffi(); static bool TestComponentIndexingBug(); @@ -43,6 +47,7 @@ private: u32 max_varyings{}; bool has_variable_aoffi{}; bool has_component_indexing_bug{}; + bool is_turing_plus{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 8fe115aec..97c55f2ec 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -82,7 +82,7 @@ struct DrawParameters { RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, ScreenInfo& info) - : texture_cache{system, *this}, shader_cache{*this, system, emu_window, device}, + : texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device}, global_cache{*this}, system{system}, screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) { OpenGLState::ApplyDefaultState(); diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index 9f81c15cb..a1f91d677 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp @@ -148,6 +148,14 @@ void OGLBuffer::Release() { handle = 0; } +void OGLBuffer::MakePersistant(std::size_t buffer_size) { + if (handle == 0 || buffer_size == 0) + return; + + const GLbitfield flags = GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT | GL_MAP_READ_BIT; + glNamedBufferStorage(handle, static_cast(buffer_size), nullptr, flags); +} + void OGLSync::Create() { if (handle != 0) return; diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index 310ee2bf3..f2873ef96 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -186,6 +186,9 @@ public: /// Deletes the internal OpenGL resource void Release(); + // Converts the buffer into a persistant storage buffer + void MakePersistant(std::size_t buffer_size); + GLuint handle = 0; }; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index e6f08a764..bddb15cb1 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "common/assert.h" +#include "common/bit_util.h" #include "common/common_types.h" #include "common/microprofile.h" #include "common/scope_exit.h" @@ -435,8 +436,10 @@ OGLTextureView CachedSurfaceView::CreateTextureView() const { } TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system, - VideoCore::RasterizerInterface& rasterizer) + VideoCore::RasterizerInterface& rasterizer, + const Device& device) : TextureCacheBase{system, rasterizer} { + support_info.depth_color_image_copies = !device.IsTuringGPU(); src_framebuffer.Create(); dst_framebuffer.Create(); } @@ -449,6 +452,14 @@ Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams void TextureCacheOpenGL::ImageCopy(Surface src_surface, Surface dst_surface, const VideoCommon::CopyParams& copy_params) { + if (!support_info.depth_color_image_copies) { + const auto& src_params = src_surface->GetSurfaceParams(); + const auto& dst_params = dst_surface->GetSurfaceParams(); + if (src_params.type != dst_params.type) { + // A fallback is needed + return; + } + } const auto src_handle = src_surface->GetTexture(); const auto src_target = src_surface->GetTarget(); const auto dst_handle = dst_surface->GetTexture(); @@ -517,4 +528,83 @@ void TextureCacheOpenGL::ImageBlit(View src_view, View dst_view, is_linear ? GL_LINEAR : GL_NEAREST); } +void TextureCacheOpenGL::BufferCopy(Surface src_surface, Surface dst_surface) { + const auto& src_params = src_surface->GetSurfaceParams(); + const auto& dst_params = dst_surface->GetSurfaceParams(); + + const auto source_format = GetFormatTuple(src_params.pixel_format, src_params.component_type); + const auto dest_format = GetFormatTuple(dst_params.pixel_format, dst_params.component_type); + + const std::size_t source_size = src_surface->GetHostSizeInBytes(); + const std::size_t dest_size = dst_surface->GetHostSizeInBytes(); + + const std::size_t buffer_size = std::max(source_size, dest_size); + + GLuint copy_pbo_handle = FetchPBO(buffer_size); + + glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle); + + if (source_format.compressed) { + glGetCompressedTextureImage(src_surface->GetTexture(), 0, static_cast(source_size), + nullptr); + } else { + glGetTextureImage(src_surface->GetTexture(), 0, source_format.format, source_format.type, + static_cast(source_size), nullptr); + } + glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo_handle); + + const GLsizei width = static_cast(dst_params.width); + const GLsizei height = static_cast(dst_params.height); + const GLsizei depth = static_cast(dst_params.depth); + if (dest_format.compressed) { + LOG_CRITICAL(HW_GPU, "Compressed buffer copy is unimplemented!"); + UNREACHABLE(); + } else { + switch (dst_params.target) { + case SurfaceTarget::Texture1D: + glTextureSubImage1D(dst_surface->GetTexture(), 0, 0, width, dest_format.format, + dest_format.type, nullptr); + break; + case SurfaceTarget::Texture2D: + glTextureSubImage2D(dst_surface->GetTexture(), 0, 0, 0, width, height, + dest_format.format, dest_format.type, nullptr); + break; + case SurfaceTarget::Texture3D: + case SurfaceTarget::Texture2DArray: + case SurfaceTarget::TextureCubeArray: + glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth, + dest_format.format, dest_format.type, nullptr); + break; + case SurfaceTarget::TextureCubemap: + glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth, + dest_format.format, dest_format.type, nullptr); + break; + default: + LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", + static_cast(dst_params.target)); + UNREACHABLE(); + } + } + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + + glTextureBarrier(); +} + +GLuint TextureCacheOpenGL::FetchPBO(std::size_t buffer_size) { + if (buffer_size < 0) { + UNREACHABLE(); + return 0; + } + const u32 l2 = Common::Log2Ceil64(static_cast(buffer_size)); + OGLBuffer& cp = copy_pbo_cache[l2]; + if (cp.handle == 0) { + const std::size_t ceil_size = 1ULL << l2; + cp.Create(); + cp.MakePersistant(ceil_size); + } + return cp.handle; +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 0b333e9e3..f514f137c 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -13,6 +13,7 @@ #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/texture_cache/texture_cache.h" @@ -129,7 +130,8 @@ private: class TextureCacheOpenGL final : public TextureCacheBase { public: - explicit TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer); + explicit TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer, + const Device& device); ~TextureCacheOpenGL(); protected: @@ -141,9 +143,14 @@ protected: void ImageBlit(View src_view, View dst_view, const Tegra::Engines::Fermi2D::Config& copy_config) override; + void BufferCopy(Surface src_surface, Surface dst_surface) override; + private: + GLuint FetchPBO(std::size_t buffer_size); + OGLFramebuffer src_framebuffer; OGLFramebuffer dst_framebuffer; + std::unordered_map copy_pbo_cache; }; } // namespace OpenGL diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 69ef7a2bd..e0d0e1f70 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -214,6 +214,13 @@ public: } protected: + // This structure is used for communicating with the backend, on which behaviors + // it supports and what not, to avoid assuming certain things about hardware. + // The backend is RESPONSIBLE for filling this settings on creation. + struct Support { + bool depth_color_image_copies; + } support_info; + TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) : system{system}, rasterizer{rasterizer} { for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { @@ -233,6 +240,10 @@ protected: virtual void ImageBlit(TView src_view, TView dst_view, const Tegra::Engines::Fermi2D::Config& copy_config) = 0; + // Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture + // and reading it from a sepparate buffer. + virtual void BufferCopy(TSurface src_surface, TSurface dst_surface) = 0; + void Register(TSurface surface) { std::lock_guard lock{mutex}; @@ -377,9 +388,14 @@ private: const SurfaceParams& params) { const auto gpu_addr = current_surface->GetGpuAddr(); TSurface new_surface = GetUncachedSurface(gpu_addr, params); - std::vector bricks = current_surface->BreakDown(params); - for (auto& brick : bricks) { - ImageCopy(current_surface, new_surface, brick); + const auto& cr_params = current_surface->GetSurfaceParams(); + if (!support_info.depth_color_image_copies && cr_params.type != params.type) { + BufferCopy(current_surface, new_surface); + } else { + std::vector bricks = current_surface->BreakDown(params); + for (auto& brick : bricks) { + ImageCopy(current_surface, new_surface, brick); + } } Unregister(current_surface); Register(new_surface); @@ -505,7 +521,8 @@ private: auto topological_result = current_surface->MatchesTopology(params); if (topological_result != MatchTopologyResult::FullMatch) { std::vector overlaps{current_surface}; - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, topological_result); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, + topological_result); } MatchStructureResult s_result = current_surface->MatchesStructure(params); if (s_result != MatchStructureResult::None && @@ -537,7 +554,8 @@ private: for (auto surface : overlaps) { auto topological_result = surface->MatchesTopology(params); if (topological_result != MatchTopologyResult::FullMatch) { - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, topological_result); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, + topological_result); } } @@ -555,7 +573,8 @@ private: return *view; } } - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, + MatchTopologyResult::FullMatch); } // Now we check if the candidate is a mipmap/layer of the overlap std::optional view = @@ -578,13 +597,15 @@ private: pair.first->EmplaceView(params, gpu_addr, candidate_size); if (mirage_view) return {pair.first, *mirage_view}; - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, + MatchTopologyResult::FullMatch); } return {current_surface, *view}; } // The next case is unsafe, so if we r in accurate GPU, just skip it if (Settings::values.use_accurate_gpu_emulation) { - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, + MatchTopologyResult::FullMatch); } // This is the case the texture is a part of the parent. if (current_surface->MatchesSubTexture(params, gpu_addr)) { @@ -601,7 +622,8 @@ private: } } // We failed all the tests, recycle the overlaps into a new texture. - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, + MatchTopologyResult::FullMatch); } std::pair InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params, -- cgit v1.2.3 From 3809041c24a6ebea009923c14fb36aa1031bf188 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 1 Jun 2019 22:15:55 -0400 Subject: texture_cache: Optimize GetSurface and use references on functions that don't change a surface. --- src/video_core/renderer_opengl/gl_texture_cache.cpp | 6 +++--- src/video_core/renderer_opengl/gl_texture_cache.h | 6 +++--- src/video_core/texture_cache/texture_cache.h | 12 ++++++------ 3 files changed, 12 insertions(+), 12 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index bddb15cb1..71f6888c6 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -450,7 +450,7 @@ Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams return std::make_shared(gpu_addr, params); } -void TextureCacheOpenGL::ImageCopy(Surface src_surface, Surface dst_surface, +void TextureCacheOpenGL::ImageCopy(Surface& src_surface, Surface& dst_surface, const VideoCommon::CopyParams& copy_params) { if (!support_info.depth_color_image_copies) { const auto& src_params = src_surface->GetSurfaceParams(); @@ -471,7 +471,7 @@ void TextureCacheOpenGL::ImageCopy(Surface src_surface, Surface dst_surface, copy_params.depth); } -void TextureCacheOpenGL::ImageBlit(View src_view, View dst_view, +void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view, const Tegra::Engines::Fermi2D::Config& copy_config) { const auto& src_params{src_view->GetSurfaceParams()}; const auto& dst_params{dst_view->GetSurfaceParams()}; @@ -528,7 +528,7 @@ void TextureCacheOpenGL::ImageBlit(View src_view, View dst_view, is_linear ? GL_LINEAR : GL_NEAREST); } -void TextureCacheOpenGL::BufferCopy(Surface src_surface, Surface dst_surface) { +void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) { const auto& src_params = src_surface->GetSurfaceParams(); const auto& dst_params = dst_surface->GetSurfaceParams(); diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index f514f137c..dda3bf715 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -137,13 +137,13 @@ public: protected: Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override; - void ImageCopy(Surface src_surface, Surface dst_surface, + void ImageCopy(Surface& src_surface, Surface& dst_surface, const VideoCommon::CopyParams& copy_params) override; - void ImageBlit(View src_view, View dst_view, + void ImageBlit(View& src_view, View& dst_view, const Tegra::Engines::Fermi2D::Config& copy_config) override; - void BufferCopy(Surface src_surface, Surface dst_surface) override; + void BufferCopy(Surface& src_surface, Surface& dst_surface) override; private: GLuint FetchPBO(std::size_t buffer_size); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index e0d0e1f70..951168357 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -234,15 +234,15 @@ protected: virtual TSurface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) = 0; - virtual void ImageCopy(TSurface src_surface, TSurface dst_surface, + virtual void ImageCopy(TSurface& src_surface, TSurface& dst_surface, const CopyParams& copy_params) = 0; - virtual void ImageBlit(TView src_view, TView dst_view, + virtual void ImageBlit(TView& src_view, TView& dst_view, const Tegra::Engines::Fermi2D::Config& copy_config) = 0; // Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture // and reading it from a sepparate buffer. - virtual void BufferCopy(TSurface src_surface, TSurface dst_surface) = 0; + virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; void Register(TSurface surface) { std::lock_guard lock{mutex}; @@ -516,8 +516,9 @@ private: // Step 1 // Check Level 1 Cache for a fast structural match. If candidate surface // matches at certain level we are pretty much done. - if (l1_cache.count(cache_addr) > 0) { - TSurface current_surface = l1_cache[cache_addr]; + auto iter = l1_cache.find(cache_addr); + if (iter != l1_cache.end()) { + TSurface& current_surface = iter->second; auto topological_result = current_surface->MatchesTopology(params); if (topological_result != MatchTopologyResult::FullMatch) { std::vector overlaps{current_surface}; @@ -526,7 +527,6 @@ private: } MatchStructureResult s_result = current_surface->MatchesStructure(params); if (s_result != MatchStructureResult::None && - current_surface->GetGpuAddr() == gpu_addr && (params.target != SurfaceTarget::Texture3D || current_surface->MatchTarget(params.target))) { if (s_result == MatchStructureResult::FullMatch) { -- cgit v1.2.3 From 9f755218a1359cbd004e6c287f5fead0897c1d11 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 1 Jun 2019 23:03:22 -0400 Subject: texture_cache: move some large methods to cpp files --- src/video_core/texture_cache/surface_base.cpp | 103 +++++++++++++++++++++++ src/video_core/texture_cache/surface_base.h | 106 ++---------------------- src/video_core/texture_cache/surface_params.cpp | 33 -------- src/video_core/texture_cache/surface_params.h | 32 +++++-- 4 files changed, 135 insertions(+), 139 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 7e90960f7..8c6edb04f 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -42,6 +42,109 @@ SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params) } } +MatchTopologyResult SurfaceBaseImpl::MatchesTopology(const SurfaceParams& rhs) const { + const u32 src_bpp{params.GetBytesPerPixel()}; + const u32 dst_bpp{rhs.GetBytesPerPixel()}; + const bool ib1 = params.IsBuffer(); + const bool ib2 = rhs.IsBuffer(); + if (std::tie(src_bpp, params.is_tiled, ib1) == std::tie(dst_bpp, rhs.is_tiled, ib2)) { + const bool cb1 = params.IsCompressed(); + const bool cb2 = rhs.IsCompressed(); + if (cb1 == cb2) { + return MatchTopologyResult::FullMatch; + } + return MatchTopologyResult::CompressUnmatch; + } + return MatchTopologyResult::None; +} + +MatchStructureResult SurfaceBaseImpl::MatchesStructure(const SurfaceParams& rhs) const { + // Buffer surface Check + if (params.IsBuffer()) { + const std::size_t wd1 = params.width * params.GetBytesPerPixel(); + const std::size_t wd2 = rhs.width * rhs.GetBytesPerPixel(); + if (wd1 == wd2) { + return MatchStructureResult::FullMatch; + } + return MatchStructureResult::None; + } + + // Linear Surface check + if (!params.is_tiled) { + if (std::tie(params.width, params.height, params.pitch) == + std::tie(rhs.width, rhs.height, rhs.pitch)) { + return MatchStructureResult::FullMatch; + } + return MatchStructureResult::None; + } + + // Tiled Surface check + if (std::tie(params.depth, params.block_width, params.block_height, params.block_depth, + params.tile_width_spacing, params.num_levels) == + std::tie(rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth, + rhs.tile_width_spacing, rhs.num_levels)) { + if (std::tie(params.width, params.height) == std::tie(rhs.width, rhs.height)) { + return MatchStructureResult::FullMatch; + } + const u32 ws = SurfaceParams::ConvertWidth(rhs.GetBlockAlignedWidth(), params.pixel_format, + rhs.pixel_format); + const u32 hs = + SurfaceParams::ConvertHeight(rhs.height, params.pixel_format, rhs.pixel_format); + const u32 w1 = params.GetBlockAlignedWidth(); + if (std::tie(w1, params.height) == std::tie(ws, hs)) { + return MatchStructureResult::SemiMatch; + } + } + return MatchStructureResult::None; +} + +std::optional> SurfaceBaseImpl::GetLayerMipmap( + const GPUVAddr candidate_gpu_addr) const { + if (candidate_gpu_addr < gpu_addr) { + return {}; + } + const auto relative_address{static_cast(candidate_gpu_addr - gpu_addr)}; + const auto layer{static_cast(relative_address / layer_size)}; + const GPUVAddr mipmap_address = relative_address - layer_size * layer; + const auto mipmap_it = + Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); + if (mipmap_it == mipmap_offsets.end()) { + return {}; + } + const auto level{static_cast(std::distance(mipmap_offsets.begin(), mipmap_it))}; + return std::make_pair(layer, level); +} + +std::vector SurfaceBaseImpl::BreakDownLayered(const SurfaceParams& in_params) const { + const u32 layers{params.depth}; + const u32 mipmaps{params.num_levels}; + std::vector result; + result.reserve(static_cast(layers) * static_cast(mipmaps)); + + for (u32 layer = 0; layer < layers; layer++) { + for (u32 level = 0; level < mipmaps; level++) { + const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level); + const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level); + result.emplace_back(width, height, layer, level); + } + } + return result; +} + +std::vector SurfaceBaseImpl::BreakDownNonLayered(const SurfaceParams& in_params) const { + const u32 mipmaps{params.num_levels}; + std::vector result; + result.reserve(mipmaps); + + for (u32 level = 0; level < mipmaps; level++) { + const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level); + const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level); + const u32 depth{std::min(params.GetMipDepth(level), in_params.GetMipDepth(level))}; + result.emplace_back(width, height, depth, level); + } + return result; +} + void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, u8* buffer, u32 level) { const u32 width{params.GetMipWidth(level)}; diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 70b5258c9..9d19ecd5f 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -136,83 +136,17 @@ public: return params.target == target; } + MatchTopologyResult MatchesTopology(const SurfaceParams& rhs) const; + + MatchStructureResult MatchesStructure(const SurfaceParams& rhs) const; + bool MatchesSubTexture(const SurfaceParams& rhs, const GPUVAddr other_gpu_addr) const { return std::tie(gpu_addr, params.target, params.num_levels) == std::tie(other_gpu_addr, rhs.target, rhs.num_levels) && params.target == SurfaceTarget::Texture2D && params.num_levels == 1; } - MatchTopologyResult MatchesTopology(const SurfaceParams& rhs) const { - const u32 src_bpp{params.GetBytesPerPixel()}; - const u32 dst_bpp{rhs.GetBytesPerPixel()}; - const bool ib1 = params.IsBuffer(); - const bool ib2 = rhs.IsBuffer(); - if (std::tie(src_bpp, params.is_tiled, ib1) == std::tie(dst_bpp, rhs.is_tiled, ib2)) { - const bool cb1 = params.IsCompressed(); - const bool cb2 = rhs.IsCompressed(); - if (cb1 == cb2) { - return MatchTopologyResult::FullMatch; - } - return MatchTopologyResult::CompressUnmatch; - } - return MatchTopologyResult::None; - } - - MatchStructureResult MatchesStructure(const SurfaceParams& rhs) const { - // Buffer surface Check - if (params.IsBuffer()) { - const std::size_t wd1 = params.width * params.GetBytesPerPixel(); - const std::size_t wd2 = rhs.width * rhs.GetBytesPerPixel(); - if (wd1 == wd2) { - return MatchStructureResult::FullMatch; - } - return MatchStructureResult::None; - } - - // Linear Surface check - if (!params.is_tiled) { - if (std::tie(params.width, params.height, params.pitch) == - std::tie(rhs.width, rhs.height, rhs.pitch)) { - return MatchStructureResult::FullMatch; - } - return MatchStructureResult::None; - } - - // Tiled Surface check - if (std::tie(params.depth, params.block_width, params.block_height, params.block_depth, - params.tile_width_spacing, params.num_levels) == - std::tie(rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth, - rhs.tile_width_spacing, rhs.num_levels)) { - if (std::tie(params.width, params.height) == std::tie(rhs.width, rhs.height)) { - return MatchStructureResult::FullMatch; - } - const u32 ws = SurfaceParams::ConvertWidth(rhs.GetBlockAlignedWidth(), - params.pixel_format, rhs.pixel_format); - const u32 hs = - SurfaceParams::ConvertHeight(rhs.height, params.pixel_format, rhs.pixel_format); - const u32 w1 = params.GetBlockAlignedWidth(); - if (std::tie(w1, params.height) == std::tie(ws, hs)) { - return MatchStructureResult::SemiMatch; - } - } - return MatchStructureResult::None; - } - - std::optional> GetLayerMipmap(const GPUVAddr candidate_gpu_addr) const { - if (candidate_gpu_addr < gpu_addr) { - return {}; - } - const auto relative_address{static_cast(candidate_gpu_addr - gpu_addr)}; - const auto layer{static_cast(relative_address / layer_size)}; - const GPUVAddr mipmap_address = relative_address - layer_size * layer; - const auto mipmap_it = - Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); - if (mipmap_it == mipmap_offsets.end()) { - return {}; - } - const auto level{static_cast(std::distance(mipmap_offsets.begin(), mipmap_it))}; - return std::make_pair(layer, level); - } + std::optional> GetLayerMipmap(const GPUVAddr candidate_gpu_addr) const; std::vector BreakDown(const SurfaceParams& in_params) const { return params.is_layered ? BreakDownLayered(in_params) : BreakDownNonLayered(in_params); @@ -241,35 +175,9 @@ private: void SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, u8* buffer, u32 level); - std::vector BreakDownLayered(const SurfaceParams& in_params) const { - const u32 layers{params.depth}; - const u32 mipmaps{params.num_levels}; - std::vector result; - result.reserve(static_cast(layers) * static_cast(mipmaps)); - - for (u32 layer = 0; layer < layers; layer++) { - for (u32 level = 0; level < mipmaps; level++) { - const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level); - const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level); - result.emplace_back(width, height, layer, level); - } - } - return result; - } - - std::vector BreakDownNonLayered(const SurfaceParams& in_params) const { - const u32 mipmaps{params.num_levels}; - std::vector result; - result.reserve(mipmaps); + std::vector BreakDownLayered(const SurfaceParams& in_params) const; - for (u32 level = 0; level < mipmaps; level++) { - const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level); - const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level); - const u32 depth{std::min(params.GetMipDepth(level), in_params.GetMipDepth(level))}; - result.emplace_back(width, height, depth, level); - } - return result; - } + std::vector BreakDownNonLayered(const SurfaceParams& in_params) const; }; template diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 77c09264a..60a7356bb 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -6,7 +6,6 @@ #include "common/alignment.h" #include "common/bit_util.h" -#include "common/cityhash.h" #include "core/core.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/surface.h" @@ -237,14 +236,6 @@ std::size_t SurfaceParams::GetConvertedMipmapOffset(u32 level) const { return offset; } -std::size_t SurfaceParams::GetGuestMipmapSize(u32 level) const { - return GetInnerMipmapMemorySize(level, false, false); -} - -std::size_t SurfaceParams::GetHostMipmapSize(u32 level) const { - return GetInnerMipmapMemorySize(level, true, false) * GetNumLayers(); -} - std::size_t SurfaceParams::GetConvertedMipmapSize(u32 level) const { constexpr std::size_t rgb8_bpp = 4ULL; const std::size_t width_t = GetMipWidth(level); @@ -253,10 +244,6 @@ std::size_t SurfaceParams::GetConvertedMipmapSize(u32 level) const { return width_t * height_t * depth_t * rgb8_bpp; } -std::size_t SurfaceParams::GetGuestLayerSize() const { - return GetLayerSize(false, false); -} - std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) const { std::size_t size = 0; for (u32 level = 0; level < num_levels; ++level) { @@ -269,16 +256,6 @@ std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) co return size; } -std::size_t SurfaceParams::GetHostLayerSize(u32 level) const { - ASSERT(target != SurfaceTarget::Texture3D); - return GetInnerMipmapMemorySize(level, true, false); -} - -bool SurfaceParams::IsPixelFormatZeta() const { - return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat && - pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat; -} - std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool uncompressed) const { const bool tiled{as_host_size ? false : is_tiled}; @@ -289,16 +266,6 @@ std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size GetMipBlockHeight(level), GetMipBlockDepth(level)); } -std::size_t SurfaceParams::GetInnerMemorySize(bool as_host_size, bool layer_only, - bool uncompressed) const { - return GetLayerSize(as_host_size, uncompressed) * (layer_only ? 1U : depth); -} - -std::size_t SurfaceParams::Hash() const { - return static_cast( - Common::CityHash64(reinterpret_cast(this), sizeof(*this))); -} - bool SurfaceParams::operator==(const SurfaceParams& rhs) const { return std::tie(is_tiled, block_width, block_height, block_depth, tile_width_spacing, width, height, depth, pitch, num_levels, pixel_format, component_type, type, target) == diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index 5fde695b6..c51e174cd 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -8,6 +8,7 @@ #include "common/alignment.h" #include "common/bit_util.h" +#include "common/cityhash.h" #include "common/common_types.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/maxwell_3d.h" @@ -39,7 +40,10 @@ public: static SurfaceParams CreateForFermiCopySurface( const Tegra::Engines::Fermi2D::Regs::Surface& config); - std::size_t Hash() const; + std::size_t Hash() const { + return static_cast( + Common::CityHash64(reinterpret_cast(this), sizeof(*this))); + } bool operator==(const SurfaceParams& rhs) const; @@ -113,18 +117,27 @@ public: std::size_t GetConvertedMipmapOffset(u32 level) const; /// Returns the size in bytes in guest memory of a given mipmap level. - std::size_t GetGuestMipmapSize(u32 level) const; + std::size_t GetGuestMipmapSize(u32 level) const { + return GetInnerMipmapMemorySize(level, false, false); + } /// Returns the size in bytes in host memory (linear) of a given mipmap level. - std::size_t GetHostMipmapSize(u32 level) const; + std::size_t GetHostMipmapSize(u32 level) const { + return GetInnerMipmapMemorySize(level, true, false) * GetNumLayers(); + } std::size_t GetConvertedMipmapSize(u32 level) const; /// Returns the size of a layer in bytes in guest memory. - std::size_t GetGuestLayerSize() const; + std::size_t GetGuestLayerSize() const { + return GetLayerSize(false, false); + } /// Returns the size of a layer in bytes in host memory for a given mipmap level. - std::size_t GetHostLayerSize(u32 level) const; + std::size_t GetHostLayerSize(u32 level) const { + ASSERT(target != VideoCore::Surface::SurfaceTarget::Texture3D); + return GetInnerMipmapMemorySize(level, true, false); + } static u32 ConvertWidth(u32 width, VideoCore::Surface::PixelFormat pixel_format_from, VideoCore::Surface::PixelFormat pixel_format_to) { @@ -194,7 +207,10 @@ public: } /// Returns true if the pixel format is a depth and/or stencil format. - bool IsPixelFormatZeta() const; + bool IsPixelFormatZeta() const { + return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat && + pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat; + } SurfaceCompression GetCompressionType() const { return VideoCore::Surface::GetFormatCompressionType(pixel_format); @@ -229,7 +245,9 @@ private: std::size_t GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool uncompressed) const; /// Returns the size of all mipmap levels and aligns as needed. - std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const; + std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const { + return GetLayerSize(as_host_size, uncompressed) * (layer_only ? 1U : depth); + } /// Returns the size of a layer std::size_t GetLayerSize(bool as_host_size, bool uncompressed) const; -- cgit v1.2.3 From 6f69f06873f666174d3c0306055bc5f097d64afc Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 4 Jun 2019 12:12:40 -0400 Subject: texture_cache: Don't Image Copy if component types differ --- src/video_core/texture_cache/texture_cache.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 951168357..d2c27bcef 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -389,7 +389,8 @@ private: const auto gpu_addr = current_surface->GetGpuAddr(); TSurface new_surface = GetUncachedSurface(gpu_addr, params); const auto& cr_params = current_surface->GetSurfaceParams(); - if (!support_info.depth_color_image_copies && cr_params.type != params.type) { + if (cr_params.type != params.type && (!support_info.depth_color_image_copies || + cr_params.component_type != params.component_type)) { BufferCopy(current_surface, new_surface); } else { std::vector bricks = current_surface->BreakDown(params); -- cgit v1.2.3 From 561ce29c98bf822941061023e1f71a62175318ae Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 10 Jun 2019 10:39:59 -0400 Subject: texture_cache: correct mutex locks --- src/video_core/texture_cache/texture_cache.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index d2c27bcef..503bd2b43 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -90,6 +90,7 @@ public: TView GetTextureSurface(const Tegra::Texture::FullTextureInfo& config, const VideoCommon::Shader::Sampler& entry) { + std::lock_guard lock{mutex}; const auto gpu_addr{config.tic.Address()}; if (!gpu_addr) { return {}; @@ -99,6 +100,7 @@ public: } TView GetDepthBufferSurface(bool preserve_contents) { + std::lock_guard lock{mutex}; auto& maxwell3d = system.GPU().Maxwell3D(); if (!maxwell3d.dirty_flags.zeta_buffer) { @@ -127,6 +129,7 @@ public: } TView GetColorBufferSurface(std::size_t index, bool preserve_contents) { + std::lock_guard lock{mutex}; ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); auto& maxwell3d = system.GPU().Maxwell3D(); if (!maxwell3d.dirty_flags.color_buffer[index]) { @@ -188,6 +191,7 @@ public: void DoFermiCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, const Tegra::Engines::Fermi2D::Config& copy_config) { + std::lock_guard lock{mutex}; std::pair dst_surface = GetFermiSurface(dst_config); std::pair src_surface = GetFermiSurface(src_config); ImageBlit(src_surface.second, dst_surface.second, copy_config); @@ -245,8 +249,6 @@ protected: virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; void Register(TSurface surface) { - std::lock_guard lock{mutex}; - const GPUVAddr gpu_addr = surface->GetGpuAddr(); const CacheAddr cache_ptr = ToCacheAddr(memory_manager->GetPointer(gpu_addr)); const std::size_t size = surface->GetSizeInBytes(); @@ -266,8 +268,6 @@ protected: } void Unregister(TSurface surface) { - std::lock_guard lock{mutex}; - if (guard_cache && surface->IsProtected()) { return; } -- cgit v1.2.3 From b01f9c8a7090fa056ca564593eabcebab946ef41 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 11 Jun 2019 07:20:27 -0400 Subject: texture_cache: eliminate accelerated depth->color/color->depth copies due to driver instability. --- src/video_core/renderer_opengl/gl_device.cpp | 1 - src/video_core/renderer_opengl/gl_device.h | 5 ----- src/video_core/renderer_opengl/gl_texture_cache.cpp | 13 +++++-------- src/video_core/texture_cache/texture_cache.h | 9 +-------- 4 files changed, 6 insertions(+), 22 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index ad15ea54e..65a88b06c 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -28,7 +28,6 @@ Device::Device() { max_varyings = GetInteger(GL_MAX_VARYING_VECTORS); has_variable_aoffi = TestVariableAoffi(); has_component_indexing_bug = TestComponentIndexingBug(); - is_turing_plus = GLAD_GL_NV_mesh_shader; } Device::Device(std::nullptr_t) { diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 1afe16779..8c8c93760 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -34,10 +34,6 @@ public: return has_component_indexing_bug; } - bool IsTuringGPU() const { - return is_turing_plus; - } - private: static bool TestVariableAoffi(); static bool TestComponentIndexingBug(); @@ -47,7 +43,6 @@ private: u32 max_varyings{}; bool has_variable_aoffi{}; bool has_component_indexing_bug{}; - bool is_turing_plus{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 71f6888c6..7c1d14138 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -439,7 +439,6 @@ TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer, const Device& device) : TextureCacheBase{system, rasterizer} { - support_info.depth_color_image_copies = !device.IsTuringGPU(); src_framebuffer.Create(); dst_framebuffer.Create(); } @@ -452,13 +451,11 @@ Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams void TextureCacheOpenGL::ImageCopy(Surface& src_surface, Surface& dst_surface, const VideoCommon::CopyParams& copy_params) { - if (!support_info.depth_color_image_copies) { - const auto& src_params = src_surface->GetSurfaceParams(); - const auto& dst_params = dst_surface->GetSurfaceParams(); - if (src_params.type != dst_params.type) { - // A fallback is needed - return; - } + const auto& src_params = src_surface->GetSurfaceParams(); + const auto& dst_params = dst_surface->GetSurfaceParams(); + if (src_params.type != dst_params.type) { + // A fallback is needed + return; } const auto src_handle = src_surface->GetTexture(); const auto src_target = src_surface->GetTarget(); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 503bd2b43..c95b1b976 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -218,12 +218,6 @@ public: } protected: - // This structure is used for communicating with the backend, on which behaviors - // it supports and what not, to avoid assuming certain things about hardware. - // The backend is RESPONSIBLE for filling this settings on creation. - struct Support { - bool depth_color_image_copies; - } support_info; TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) : system{system}, rasterizer{rasterizer} { @@ -389,8 +383,7 @@ private: const auto gpu_addr = current_surface->GetGpuAddr(); TSurface new_surface = GetUncachedSurface(gpu_addr, params); const auto& cr_params = current_surface->GetSurfaceParams(); - if (cr_params.type != params.type && (!support_info.depth_color_image_copies || - cr_params.component_type != params.component_type)) { + if (cr_params.type != params.type || (cr_params.component_type != params.component_type)) { BufferCopy(current_surface, new_surface); } else { std::vector bricks = current_surface->BreakDown(params); -- cgit v1.2.3 From a56f687793a0a24a368f0dafd5333daf8cbacecf Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 12 Jun 2019 09:32:26 -0400 Subject: texture_cache: correct texture buffer on surface params --- src/video_core/texture_cache/surface_params.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 60a7356bb..f789da2c4 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -74,10 +74,17 @@ SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value()); params.type = GetFormatType(params.pixel_format); // TODO: on 1DBuffer we should use the tic info. - params.target = TextureType2SurfaceTarget(entry.GetType(), entry.IsArray()); - params.width = config.tic.Width(); - params.height = config.tic.Height(); - params.depth = config.tic.Depth(); + if (!config.tic.IsBuffer()) { + params.target = TextureType2SurfaceTarget(entry.GetType(), entry.IsArray()); + params.width = config.tic.Width(); + params.height = config.tic.Height(); + params.depth = config.tic.Depth(); + } else { + params.target = SurfaceTarget::TextureBuffer; + params.width = config.tic.Width(); + params.height = 0; + params.depth = 0; + } if (params.target == SurfaceTarget::TextureCubemap || params.target == SurfaceTarget::TextureCubeArray) { params.depth *= 6; -- cgit v1.2.3 From 2d83553ea7ab2629e7e1a83cc3345c0115d69453 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 13 Jun 2019 09:46:36 -0400 Subject: texture_cache: Implement siblings texture formats. --- src/video_core/texture_cache/surface_base.h | 4 +++ src/video_core/texture_cache/texture_cache.h | 39 +++++++++++++++++++--------- 2 files changed, 31 insertions(+), 12 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 9d19ecd5f..58265e9d3 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -132,6 +132,10 @@ public: return params.pixel_format == pixel_format; } + VideoCore::Surface::PixelFormat GetFormat() const { + return params.pixel_format; + } + bool MatchTarget(VideoCore::Surface::SurfaceTarget target) const { return params.target == target; } diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index c95b1b976..022416706 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -43,6 +43,8 @@ class RasterizerInterface; namespace VideoCommon { +using VideoCore::Surface::PixelFormat; + using VideoCore::Surface::SurfaceTarget; using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; @@ -96,7 +98,7 @@ public: return {}; } const auto params{SurfaceParams::CreateForTexture(system, config, entry)}; - return GetSurface(gpu_addr, params, true).second; + return GetSurface(gpu_addr, params, true, false).second; } TView GetDepthBufferSurface(bool preserve_contents) { @@ -118,7 +120,7 @@ public: system, regs.zeta_width, regs.zeta_height, regs.zeta.format, regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height, regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; - auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents); + auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents, true); if (depth_buffer.target) depth_buffer.target->MarkAsRenderTarget(false); depth_buffer.target = surface_view.first; @@ -152,7 +154,7 @@ public: } auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), - preserve_contents); + preserve_contents, true); if (render_targets[index].target) render_targets[index].target->MarkAsRenderTarget(false); render_targets[index].target = surface_view.first; @@ -226,6 +228,11 @@ protected: } SetEmptyDepthBuffer(); staging_cache.SetSize(2); + siblings_table[PixelFormat::Z16] = PixelFormat::R16F; + siblings_table[PixelFormat::Z32F] = PixelFormat::R32F; + siblings_table[PixelFormat::Z32FS8] = PixelFormat::RG32F; + siblings_table[PixelFormat::R16F] = PixelFormat::Z16; + siblings_table[PixelFormat::R32F] = PixelFormat::Z32F; } ~TextureCache() = default; @@ -289,7 +296,7 @@ protected: const Tegra::Engines::Fermi2D::Regs::Surface& config) { SurfaceParams params = SurfaceParams::CreateForFermiCopySurface(config); const GPUVAddr gpu_addr = config.Address(); - return GetSurface(gpu_addr, params, true); + return GetSurface(gpu_addr, params, true, false); } Core::System& system; @@ -406,16 +413,22 @@ private: * @param params, the new surface params which we want to check. **/ std::pair ManageStructuralMatch(TSurface current_surface, - const SurfaceParams& params) { + const SurfaceParams& params, bool is_render) { const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); + const bool matches_target = current_surface->MatchTarget(params.target); + auto match_check = ([&]() -> std::pair { + if (matches_target) { + return {current_surface, current_surface->GetMainView()}; + } + return {current_surface, current_surface->EmplaceOverview(params)}; + }); if (is_mirage) { + if (!is_render && siblings_table[current_surface->GetFormat()] == params.pixel_format) { + return match_check(); + } return RebuildSurface(current_surface, params); } - const bool matches_target = current_surface->MatchTarget(params.target); - if (matches_target) { - return {current_surface, current_surface->GetMainView()}; - } - return {current_surface, current_surface->EmplaceOverview(params)}; + return match_check(); } /** @@ -490,7 +503,7 @@ private: * @param preserve_contents, tells if the new surface should be loaded from meory or left blank. **/ std::pair GetSurface(const GPUVAddr gpu_addr, const SurfaceParams& params, - bool preserve_contents) { + bool preserve_contents, bool is_render) { const auto host_ptr{memory_manager->GetPointer(gpu_addr)}; const auto cache_addr{ToCacheAddr(host_ptr)}; @@ -524,7 +537,7 @@ private: (params.target != SurfaceTarget::Texture3D || current_surface->MatchTarget(params.target))) { if (s_result == MatchStructureResult::FullMatch) { - return ManageStructuralMatch(current_surface, params); + return ManageStructuralMatch(current_surface, params, is_render); } else { return RebuildSurface(current_surface, params); } @@ -724,6 +737,8 @@ private: // Guards the cache for protection conflicts. bool guard_cache{}; + std::unordered_map siblings_table; + // The internal Cache is different for the Texture Cache. It's based on buckets // of 1MB. This fits better for the purpose of this cache as textures are normaly // large in size. -- cgit v1.2.3 From 3dd76432141a5cbc97bed15788984b37e44aa4a5 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 13 Jun 2019 10:39:45 -0400 Subject: texture_cache: Use siblings textures on Rebuild and fix possible error on blitting --- .../renderer_opengl/gl_texture_cache.cpp | 2 +- src/video_core/texture_cache/texture_cache.h | 33 +++++++++++++++------- 2 files changed, 24 insertions(+), 11 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 7c1d14138..d30d04cd5 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -522,7 +522,7 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view, glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, buffers, - is_linear ? GL_LINEAR : GL_NEAREST); + is_linear && (buffers == GL_COLOR_BUFFER_BIT) ? GL_LINEAR : GL_NEAREST); } void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 022416706..201c4d42e 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -220,7 +220,6 @@ public: } protected: - TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) : system{system}, rasterizer{rasterizer} { for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { @@ -233,6 +232,7 @@ protected: siblings_table[PixelFormat::Z32FS8] = PixelFormat::RG32F; siblings_table[PixelFormat::R16F] = PixelFormat::Z16; siblings_table[PixelFormat::R32F] = PixelFormat::Z32F; + siblings_table[PixelFormat::RG32F] = PixelFormat::Z32FS8; } ~TextureCache() = default; @@ -385,15 +385,27 @@ private: * @param current_surface, the registered surface in the cache which we want to convert. * @param params, the new surface params which we'll use to recreate the surface. **/ - std::pair RebuildSurface(TSurface current_surface, - const SurfaceParams& params) { + std::pair RebuildSurface(TSurface current_surface, const SurfaceParams& params, + bool is_render) { const auto gpu_addr = current_surface->GetGpuAddr(); - TSurface new_surface = GetUncachedSurface(gpu_addr, params); const auto& cr_params = current_surface->GetSurfaceParams(); - if (cr_params.type != params.type || (cr_params.component_type != params.component_type)) { + TSurface new_surface; + if (cr_params.pixel_format != params.pixel_format && !is_render && + siblings_table[cr_params.pixel_format] == params.pixel_format) { + SurfaceParams new_params = params; + new_params.pixel_format = cr_params.pixel_format; + new_params.component_type = cr_params.component_type; + new_params.type = cr_params.type; + new_surface = GetUncachedSurface(gpu_addr, new_params); + } else { + new_surface = GetUncachedSurface(gpu_addr, params); + } + const auto& final_params = new_surface->GetSurfaceParams(); + if (cr_params.type != final_params.type || + (cr_params.component_type != final_params.component_type)) { BufferCopy(current_surface, new_surface); } else { - std::vector bricks = current_surface->BreakDown(params); + std::vector bricks = current_surface->BreakDown(final_params); for (auto& brick : bricks) { ImageCopy(current_surface, new_surface, brick); } @@ -426,7 +438,7 @@ private: if (!is_render && siblings_table[current_surface->GetFormat()] == params.pixel_format) { return match_check(); } - return RebuildSurface(current_surface, params); + return RebuildSurface(current_surface, params, is_render); } return match_check(); } @@ -539,7 +551,7 @@ private: if (s_result == MatchStructureResult::FullMatch) { return ManageStructuralMatch(current_surface, params, is_render); } else { - return RebuildSurface(current_surface, params); + return RebuildSurface(current_surface, params, is_render); } } } @@ -599,7 +611,8 @@ private: new_params.width = wh; new_params.height = hh; new_params.pixel_format = params.pixel_format; - std::pair pair = RebuildSurface(current_surface, new_params); + std::pair pair = + RebuildSurface(current_surface, new_params, is_render); std::optional mirage_view = pair.first->EmplaceView(params, gpu_addr, candidate_size); if (mirage_view) @@ -616,7 +629,7 @@ private: } // This is the case the texture is a part of the parent. if (current_surface->MatchesSubTexture(params, gpu_addr)) { - return RebuildSurface(current_surface, params); + return RebuildSurface(current_surface, params, is_render); } } else { // If there are many overlaps, odds are they are subtextures of the candidate -- cgit v1.2.3 From 7232a1ed16e46715c29d781fb143bdf799090bec Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 13 Jun 2019 16:41:16 -0400 Subject: decoders: correct block calculation --- src/video_core/engines/engine_upload.cpp | 2 +- src/video_core/engines/engine_upload.h | 6 ++--- src/video_core/engines/fermi_2d.h | 6 ++--- src/video_core/engines/maxwell_dma.cpp | 2 +- src/video_core/engines/maxwell_dma.h | 4 ++-- src/video_core/texture_cache/texture_cache.h | 16 +++++++++++++ src/video_core/textures/decoders.cpp | 34 ++++++++++++---------------- 7 files changed, 41 insertions(+), 29 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp index c776b9a56..d44ad0cd8 100644 --- a/src/video_core/engines/engine_upload.cpp +++ b/src/video_core/engines/engine_upload.cpp @@ -39,7 +39,7 @@ void State::ProcessData(const u32 data, const bool is_last_call) { UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 0); UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 0); const std::size_t dst_size = Tegra::Texture::CalculateSize( - true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1); + true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 0); tmp_buffer.resize(dst_size); memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size); Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, regs.dest.y, diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h index cb294aec3..462da419e 100644 --- a/src/video_core/engines/engine_upload.h +++ b/src/video_core/engines/engine_upload.h @@ -39,15 +39,15 @@ struct Registers { } u32 BlockWidth() const { - return block_width; + return block_width.Value(); } u32 BlockHeight() const { - return block_height; + return block_height.Value(); } u32 BlockDepth() const { - return block_depth; + return block_depth.Value(); } } dest; }; diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 0a4c7c5ad..05421d185 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -84,15 +84,15 @@ public: } u32 BlockWidth() const { - return block_width; + return block_width.Value(); } u32 BlockHeight() const { - return block_height; + return block_height.Value(); } u32 BlockDepth() const { - return block_depth; + return block_depth.Value(); } }; static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size"); diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index afb9578d0..3a5dfef0c 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -111,7 +111,7 @@ void MaxwellDMA::HandleCopy() { memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); } else { - ASSERT(regs.dst_params.BlockDepth() == 0); + ASSERT(regs.dst_params.BlockDepth() == 1); const u32 src_bytes_per_pixel = regs.src_pitch / regs.x_count; diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 522fa97dc..17b015ca7 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -59,11 +59,11 @@ public: }; u32 BlockHeight() const { - return block_height; + return block_height.Value(); } u32 BlockDepth() const { - return block_depth; + return block_depth.Value(); } }; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 201c4d42e..7a9b4c27d 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -335,6 +335,9 @@ private: if (untopological == MatchTopologyResult::CompressUnmatch) { return RecycleStrategy::Flush; } + if (untopological == MatchTopologyResult::FullMatch && !params.is_tiled) { + return RecycleStrategy::Flush; + } return RecycleStrategy::Ignore; } @@ -372,6 +375,11 @@ private: } return InitializeSurface(gpu_addr, params, preserve_contents); } + case RecycleStrategy::BufferCopy: { + auto new_surface = GetUncachedSurface(gpu_addr, params); + BufferCopy(overlaps[0], new_surface); + return {new_surface, new_surface->GetMainView()}; + } default: { UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!"); return InitializeSurface(gpu_addr, params, do_load); @@ -520,6 +528,10 @@ private: const auto host_ptr{memory_manager->GetPointer(gpu_addr)}; const auto cache_addr{ToCacheAddr(host_ptr)}; + if (gpu_addr == 0x00000001682F0000ULL) { + LOG_CRITICAL(HW_GPU, "Here's the texture!"); + } + // Step 0: guarantee a valid surface if (!cache_addr) { // Return a null surface if it's invalid @@ -566,6 +578,10 @@ private: return InitializeSurface(gpu_addr, params, preserve_contents); } + if (!params.is_tiled) { + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, + MatchTopologyResult::FullMatch); + } // Step 3 // Now we need to figure the relationship between the texture and its overlaps // we do a topological test to ensure we can find some relationship. If it fails diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index f45fd175a..9a2f4198a 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -256,19 +256,18 @@ std::vector UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, } void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, - u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height) { - const u32 block_height_size{1U << block_height}; + u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height_bit) { + const u32 block_height = 1U << block_height_bit; const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) / gob_size_x}; for (u32 line = 0; line < subrect_height; ++line) { const u32 gob_address_y = - (line / (gob_size_y * block_height_size)) * gob_size * block_height_size * - image_width_in_gobs + - ((line % (gob_size_y * block_height_size)) / gob_size_y) * gob_size; + (line / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs + + ((line % (gob_size_y * block_height)) / gob_size_y) * gob_size; const auto& table = legacy_swizzle_table[line % gob_size_y]; for (u32 x = 0; x < subrect_width; ++x) { const u32 gob_address = - gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height_size; + gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height; const u32 swizzled_offset = gob_address + table[(x * bytes_per_pixel) % gob_size_x]; u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel; u8* dest_addr = swizzled_data + swizzled_offset; @@ -279,19 +278,17 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 } void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, - u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, + u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height_bit, u32 offset_x, u32 offset_y) { - const u32 block_height_size{1U << block_height}; + const u32 block_height = 1U << block_height_bit; for (u32 line = 0; line < subrect_height; ++line) { const u32 y2 = line + offset_y; - const u32 gob_address_y = - (y2 / (gob_size_y * block_height_size)) * gob_size * block_height_size + - ((y2 % (gob_size_y * block_height_size)) / gob_size_y) * gob_size; + const u32 gob_address_y = (y2 / (gob_size_y * block_height)) * gob_size * block_height + + ((y2 % (gob_size_y * block_height)) / gob_size_y) * gob_size; const auto& table = legacy_swizzle_table[y2 % gob_size_y]; for (u32 x = 0; x < subrect_width; ++x) { const u32 x2 = (x + offset_x) * bytes_per_pixel; - const u32 gob_address = - gob_address_y + (x2 / gob_size_x) * gob_size * block_height_size; + const u32 gob_address = gob_address_y + (x2 / gob_size_x) * gob_size * block_height; const u32 swizzled_offset = gob_address + table[x2 % gob_size_x]; u8* dest_line = unswizzled_data + line * dest_pitch + x * bytes_per_pixel; u8* source_addr = swizzled_data + swizzled_offset; @@ -302,20 +299,19 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 } void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y, - const u32 block_height, const std::size_t copy_size, const u8* source_data, + const u32 block_height_bit, const std::size_t copy_size, const u8* source_data, u8* swizzle_data) { - const u32 block_height_size{1U << block_height}; + const u32 block_height = 1U << block_height_bit; const u32 image_width_in_gobs{(width + gob_size_x - 1) / gob_size_x}; std::size_t count = 0; for (std::size_t y = dst_y; y < height && count < copy_size; ++y) { const std::size_t gob_address_y = - (y / (gob_size_y * block_height_size)) * gob_size * block_height_size * - image_width_in_gobs + - ((y % (gob_size_y * block_height_size)) / gob_size_y) * gob_size; + (y / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs + + ((y % (gob_size_y * block_height)) / gob_size_y) * gob_size; const auto& table = legacy_swizzle_table[y % gob_size_y]; for (std::size_t x = dst_x; x < width && count < copy_size; ++x) { const std::size_t gob_address = - gob_address_y + (x / gob_size_x) * gob_size * block_height_size; + gob_address_y + (x / gob_size_x) * gob_size * block_height; const std::size_t swizzled_offset = gob_address + table[x % gob_size_x]; const u8* source_line = source_data + count; u8* dest_addr = swizzle_data + swizzled_offset; -- cgit v1.2.3 From 03d489dcf5dbe13dff1ff788c609f964dd24019c Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 14 Jun 2019 15:41:28 -0400 Subject: texture_cache: Initialize all siblings to invalid pixel format. --- src/video_core/texture_cache/texture_cache.h | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 7a9b4c27d..8213f434d 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -227,12 +227,18 @@ protected: } SetEmptyDepthBuffer(); staging_cache.SetSize(2); - siblings_table[PixelFormat::Z16] = PixelFormat::R16F; - siblings_table[PixelFormat::Z32F] = PixelFormat::R32F; - siblings_table[PixelFormat::Z32FS8] = PixelFormat::RG32F; - siblings_table[PixelFormat::R16F] = PixelFormat::Z16; - siblings_table[PixelFormat::R32F] = PixelFormat::Z32F; - siblings_table[PixelFormat::RG32F] = PixelFormat::Z32FS8; + auto make_siblings = ([this](PixelFormat a, PixelFormat b) { + siblings_table[a] = b; + siblings_table[b] = a; + }); + const u32 max_formats = static_cast(PixelFormat::Max); + siblings_table.reserve(max_formats); + for (u32 i = 0; i < max_formats; i++) { + siblings_table[static_cast(i)] = PixelFormat::Invalid; + } + make_siblings(PixelFormat::Z16, PixelFormat::R16F); + make_siblings(PixelFormat::Z32F, PixelFormat::R32F); + make_siblings(PixelFormat::Z32FS8, PixelFormat::RG32F); } ~TextureCache() = default; @@ -766,6 +772,9 @@ private: // Guards the cache for protection conflicts. bool guard_cache{}; + // The siblings table is for formats that can inter exchange with one another + // without causing issues. This is only valid when a conflict occurs on a non + // rendering use. std::unordered_map siblings_table; // The internal Cache is different for the Texture Cache. It's based on buckets -- cgit v1.2.3 From 082740d34db0996a0af73d7680c57e1abb31c712 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 14 Jun 2019 16:40:04 -0400 Subject: surface: Correct format S8Z24 --- src/video_core/engines/maxwell_dma.cpp | 2 +- src/video_core/surface.cpp | 4 ++-- src/video_core/texture_cache/texture_cache.h | 4 ---- src/video_core/textures/texture.h | 4 ++-- 4 files changed, 5 insertions(+), 9 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 3a5dfef0c..afb9578d0 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -111,7 +111,7 @@ void MaxwellDMA::HandleCopy() { memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); } else { - ASSERT(regs.dst_params.BlockDepth() == 1); + ASSERT(regs.dst_params.BlockDepth() == 0); const u32 src_bytes_per_pixel = regs.src_pitch / regs.x_count; diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 52a79e4a7..c50f6354d 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp @@ -308,8 +308,8 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format, return PixelFormat::Z32F; case Tegra::Texture::TextureFormat::Z16: return PixelFormat::Z16; - case Tegra::Texture::TextureFormat::Z24S8: - return PixelFormat::Z24S8; + case Tegra::Texture::TextureFormat::S8Z24: + return PixelFormat::S8Z24; case Tegra::Texture::TextureFormat::ZF32_X24S8: return PixelFormat::Z32FS8; case Tegra::Texture::TextureFormat::DXT1: diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 8213f434d..a9e61cba1 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -534,10 +534,6 @@ private: const auto host_ptr{memory_manager->GetPointer(gpu_addr)}; const auto cache_addr{ToCacheAddr(host_ptr)}; - if (gpu_addr == 0x00000001682F0000ULL) { - LOG_CRITICAL(HW_GPU, "Here's the texture!"); - } - // Step 0: guarantee a valid surface if (!cache_addr) { // Return a null surface if it's invalid diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index ddeed73d0..e3be018b9 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h @@ -52,9 +52,9 @@ enum class TextureFormat : u32 { DXT45 = 0x26, DXN1 = 0x27, DXN2 = 0x28, - Z24S8 = 0x29, + S8Z24 = 0x29, X8Z24 = 0x2a, - S8Z24 = 0x2b, + Z24S8 = 0x2b, X4V4Z24__COV4R4V = 0x2c, X4V4Z24__COV8R8V = 0x2d, V8Z24__COV4R12V = 0x2e, -- cgit v1.2.3 From fed773a86c96fc62f18181a1d3ba410b25c2edee Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 14 Jun 2019 18:40:06 -0400 Subject: texture_cache: Implement Irregular Views in surfaces --- src/video_core/texture_cache/surface_base.cpp | 3 +++ src/video_core/texture_cache/surface_base.h | 25 +++++++++++++++++++++---- 2 files changed, 24 insertions(+), 4 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 8c6edb04f..97bf9ad7a 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -100,6 +100,9 @@ MatchStructureResult SurfaceBaseImpl::MatchesStructure(const SurfaceParams& rhs) std::optional> SurfaceBaseImpl::GetLayerMipmap( const GPUVAddr candidate_gpu_addr) const { + if (gpu_addr == candidate_gpu_addr) { + return {{0,0}}; + } if (candidate_gpu_addr < gpu_addr) { return {}; } diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 58265e9d3..662221adc 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -238,6 +238,26 @@ public: return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels)); } + std::optional EmplaceIrregularView(const SurfaceParams& view_params, + const GPUVAddr view_addr, + const std::size_t candidate_size, const u32 mipmap, + const u32 layer) { + const auto layer_mipmap{GetLayerMipmap(view_addr + candidate_size)}; + if (!layer_mipmap) { + return {}; + } + const u32 end_layer{layer_mipmap->first}; + const u32 end_mipmap{layer_mipmap->second}; + if (layer != end_layer) { + if (mipmap == 0 && end_mipmap == 0) { + return GetView(ViewParams(view_params.target, layer, end_layer - layer + 1, 0, 1)); + } + return {}; + } else { + return GetView(ViewParams(view_params.target, layer, 1, mipmap, end_mipmap - mipmap + 1)); + } + } + std::optional EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr, const std::size_t candidate_size) { if (params.target == SurfaceTarget::Texture3D || @@ -252,10 +272,7 @@ public: const u32 layer{layer_mipmap->first}; const u32 mipmap{layer_mipmap->second}; if (GetMipmapSize(mipmap) != candidate_size) { - // TODO: The view may cover many mimaps, this case can still go on. - // This edge-case can be safely be ignored since it will just result in worse - // performance. - return {}; + return EmplaceIrregularView(view_params, view_addr, candidate_size, mipmap, layer); } return GetView(ViewParams(view_params.target, layer, 1, mipmap, 1)); } -- cgit v1.2.3 From 198a0395bb1b1d19de12560ac146add0705ed00e Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 15 Jun 2019 11:08:11 -0400 Subject: texture_cache: Corrections to buffers and shadow formats use. --- src/video_core/texture_cache/surface_params.cpp | 44 +++++++++++++++++++------ 1 file changed, 34 insertions(+), 10 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index f789da2c4..290ba438d 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -16,11 +16,13 @@ namespace VideoCommon { using VideoCore::Surface::ComponentTypeFromDepthFormat; using VideoCore::Surface::ComponentTypeFromRenderTarget; using VideoCore::Surface::ComponentTypeFromTexture; +using VideoCore::Surface::PixelFormat; using VideoCore::Surface::PixelFormatFromDepthFormat; using VideoCore::Surface::PixelFormatFromRenderTargetFormat; using VideoCore::Surface::PixelFormatFromTextureFormat; using VideoCore::Surface::SurfaceTarget; using VideoCore::Surface::SurfaceTargetFromTextureType; +using VideoCore::Surface::SurfaceType; SurfaceTarget TextureType2SurfaceTarget(Tegra::Shader::TextureType type, bool is_array) { switch (type) { @@ -71,6 +73,24 @@ SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, params.tile_width_spacing = params.is_tiled ? (1 << config.tic.tile_width_spacing.Value()) : 1; params.pixel_format = PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(), params.srgb_conversion); + params.type = GetFormatType(params.pixel_format); + if (entry.IsShadow() && params.type == SurfaceType::ColorTexture) { + switch (params.pixel_format) { + case PixelFormat::R16F: { + params.pixel_format = PixelFormat::Z16; + break; + } + case PixelFormat::R32F: { + params.pixel_format = PixelFormat::Z32F; + break; + } + default: { + UNIMPLEMENTED_MSG("Unimplemented shadow convert format: {}", + static_cast(params.pixel_format)); + } + } + params.type = GetFormatType(params.pixel_format); + } params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value()); params.type = GetFormatType(params.pixel_format); // TODO: on 1DBuffer we should use the tic info. @@ -79,20 +99,24 @@ SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, params.width = config.tic.Width(); params.height = config.tic.Height(); params.depth = config.tic.Depth(); + params.pitch = params.is_tiled ? 0 : config.tic.Pitch(); + if (params.target == SurfaceTarget::TextureCubemap || + params.target == SurfaceTarget::TextureCubeArray) { + params.depth *= 6; + } + params.num_levels = config.tic.max_mip_level + 1; + params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap()); + params.is_layered = params.IsLayered(); } else { params.target = SurfaceTarget::TextureBuffer; params.width = config.tic.Width(); - params.height = 0; - params.depth = 0; + params.pitch = params.width * params.GetBytesPerPixel(); + params.height = 1; + params.depth = 1; + params.num_levels = 1; + params.emulated_levels = 1; + params.is_layered = false; } - if (params.target == SurfaceTarget::TextureCubemap || - params.target == SurfaceTarget::TextureCubeArray) { - params.depth *= 6; - } - params.pitch = params.is_tiled ? 0 : config.tic.Pitch(); - params.num_levels = config.tic.max_mip_level + 1; - params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap()); - params.is_layered = params.IsLayered(); return params; } -- cgit v1.2.3 From d7587842eb404a52eb75a12816028f0706821dd0 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 15 Jun 2019 13:22:57 -0400 Subject: texture_cache: Implement texception detection and texture barriers. --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 10 +++++-- src/video_core/texture_cache/texture_cache.h | 37 ++++++++++++++++++++---- 2 files changed, 40 insertions(+), 7 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 97c55f2ec..c9f3a35e6 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -422,7 +422,7 @@ std::pair RasterizerOpenGL::ConfigureFramebuffers( } current_framebuffer_config_state = fb_config_state; - texture_cache.Guard(true); + texture_cache.GuardRenderTargets(true); View depth_surface{}; if (using_depth_fb) { @@ -500,7 +500,7 @@ std::pair RasterizerOpenGL::ConfigureFramebuffers( depth_surface->GetSurfaceParams().type == SurfaceType::DepthStencil; } - texture_cache.Guard(false); + texture_cache.GuardRenderTargets(false); current_state.draw.draw_framebuffer = framebuffer_cache.GetFramebuffer(fbkey); SyncViewport(current_state); @@ -651,7 +651,9 @@ void RasterizerOpenGL::DrawArrays() { SetupVertexBuffer(vao); DrawParameters params = SetupDraw(); + texture_cache.GuardSamplers(true); SetupShaders(params.primitive_mode); + texture_cache.GuardSamplers(false); ConfigureFramebuffers(state); @@ -660,6 +662,10 @@ void RasterizerOpenGL::DrawArrays() { shader_program_manager->ApplyTo(state); state.Apply(); + if (texture_cache.TextureBarrier()) { + glTextureBarrier(); + } + params.DispatchDraw(); accelerate_draw = AccelDraw::Disabled; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index a9e61cba1..353fa4e31 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -70,8 +70,12 @@ public: * `Guard` guarantees that rendertargets don't unregister themselves if the * collide. Protection is currently only done on 3D slices. **/ - void Guard(bool new_guard) { - guard_cache = new_guard; + void GuardRenderTargets(bool new_guard) { + guard_render_targets = new_guard; + } + + void GuardSamplers(bool new_guard) { + guard_samplers = new_guard; } void FlushRegion(CacheAddr addr, std::size_t size) { @@ -98,7 +102,25 @@ public: return {}; } const auto params{SurfaceParams::CreateForTexture(system, config, entry)}; - return GetSurface(gpu_addr, params, true, false).second; + auto pair = GetSurface(gpu_addr, params, true, false); + if (guard_samplers) { + if (sampled_textures_stack_pointer == sampled_textures_stack.size()) { + sampled_textures_stack.resize(sampled_textures_stack.size() * 2); + } + sampled_textures_stack[sampled_textures_stack_pointer] = pair.first; + sampled_textures_stack_pointer++; + } + return pair.second; + } + + bool TextureBarrier() { + bool must_do = false; + for (u32 i = 0; i < sampled_textures_stack_pointer; i++) { + must_do |= sampled_textures_stack[i]->IsRenderTarget(); + sampled_textures_stack[i] = nullptr; + } + sampled_textures_stack_pointer = 0; + return must_do; } TView GetDepthBufferSurface(bool preserve_contents) { @@ -239,6 +261,7 @@ protected: make_siblings(PixelFormat::Z16, PixelFormat::R16F); make_siblings(PixelFormat::Z32F, PixelFormat::R32F); make_siblings(PixelFormat::Z32FS8, PixelFormat::RG32F); + sampled_textures_stack.resize(64); } ~TextureCache() = default; @@ -275,7 +298,7 @@ protected: } void Unregister(TSurface surface) { - if (guard_cache && surface->IsProtected()) { + if (guard_render_targets && surface->IsProtected()) { return; } const GPUVAddr gpu_addr = surface->GetGpuAddr(); @@ -766,7 +789,8 @@ private: u64 ticks{}; // Guards the cache for protection conflicts. - bool guard_cache{}; + bool guard_render_targets{}; + bool guard_samplers{}; // The siblings table is for formats that can inter exchange with one another // without causing issues. This is only valid when a conflict occurs on a non @@ -792,6 +816,9 @@ private: render_targets; FramebufferTargetInfo depth_buffer; + std::vector sampled_textures_stack{}; + u32 sampled_textures_stack_pointer{}; + StagingCache staging_cache; std::recursive_mutex mutex; }; -- cgit v1.2.3 From 6acdae0e4c9d0c20f668cd86250b5d5b0dbd70c4 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 17 Jun 2019 19:19:47 -0400 Subject: texture_cache: Correct format R16U as sibling --- src/video_core/texture_cache/surface_params.cpp | 1 + src/video_core/texture_cache/texture_cache.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 290ba438d..a670fc1a9 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -76,6 +76,7 @@ SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, params.type = GetFormatType(params.pixel_format); if (entry.IsShadow() && params.type == SurfaceType::ColorTexture) { switch (params.pixel_format) { + case PixelFormat::R16U: case PixelFormat::R16F: { params.pixel_format = PixelFormat::Z16; break; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 353fa4e31..78821503e 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -258,7 +258,7 @@ protected: for (u32 i = 0; i < max_formats; i++) { siblings_table[static_cast(i)] = PixelFormat::Invalid; } - make_siblings(PixelFormat::Z16, PixelFormat::R16F); + make_siblings(PixelFormat::Z16, PixelFormat::R16U); make_siblings(PixelFormat::Z32F, PixelFormat::R32F); make_siblings(PixelFormat::Z32FS8, PixelFormat::RG32F); sampled_textures_stack.resize(64); -- cgit v1.2.3 From 97c8c9f49a3327f8f38dd460951071630c3e26fa Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 20 Jun 2019 14:58:32 -0400 Subject: texture_cache: Eliminate linear textures fallthrough --- src/video_core/texture_cache/texture_cache.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 78821503e..d86ddeb76 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -603,10 +603,6 @@ private: return InitializeSurface(gpu_addr, params, preserve_contents); } - if (!params.is_tiled) { - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, - MatchTopologyResult::FullMatch); - } // Step 3 // Now we need to figure the relationship between the texture and its overlaps // we do a topological test to ensure we can find some relationship. If it fails -- cgit v1.2.3 From d1812316e1b0f03af2ba10d4fe04be728e72725c Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 20 Jun 2019 21:22:20 -0400 Subject: texture_cache: Style and Corrections --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 +- src/video_core/shader/node.h | 3 +- src/video_core/surface.h | 128 ++++++++++----------- src/video_core/texture_cache/surface_base.cpp | 2 +- src/video_core/texture_cache/surface_base.h | 3 +- src/video_core/texture_cache/texture_cache.h | 1 + src/video_core/textures/decoders.cpp | 7 +- 7 files changed, 75 insertions(+), 71 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 02e217b8c..718703091 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -104,7 +104,7 @@ constexpr std::tuple GetPrimitiveDescription(GLen std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { constexpr std::size_t start_offset = 10; constexpr u64 key = 0xE2400FFFFF07000FULL; - constexpr u64 mask =0xFFFFFFFFFF7FFFFFULL; + constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL; std::size_t offset = start_offset; std::size_t size = start_offset * sizeof(u64); while (offset < program.size()) { diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 2bf535928..0ac83fcf0 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -339,7 +339,8 @@ struct MetaImage { }; /// Parameters that modify an operation but are not part of any particular operand -using Meta = std::variant; +using Meta = + std::variant; /// Holds any kind of operation that can be done in the IR class OperationNode final { diff --git a/src/video_core/surface.h b/src/video_core/surface.h index 959504cd3..ee9f00fa6 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -447,70 +447,70 @@ enum class SurfaceCompression : u8 { }; inline constexpr std::array compression_type_table = {{ - SurfaceCompression::None, // ABGR8U - SurfaceCompression::None, // ABGR8S - SurfaceCompression::None, // ABGR8UI - SurfaceCompression::None, // B5G6R5U - SurfaceCompression::None, // A2B10G10R10U - SurfaceCompression::None, // A1B5G5R5U - SurfaceCompression::None, // R8U - SurfaceCompression::None, // R8UI - SurfaceCompression::None, // RGBA16F - SurfaceCompression::None, // RGBA16U - SurfaceCompression::None, // RGBA16UI - SurfaceCompression::None, // R11FG11FB10F - SurfaceCompression::None, // RGBA32UI - SurfaceCompression::Compressed, // DXT1 - SurfaceCompression::Compressed, // DXT23 - SurfaceCompression::Compressed, // DXT45 - SurfaceCompression::Compressed, // DXN1 - SurfaceCompression::Compressed, // DXN2UNORM - SurfaceCompression::Compressed, // DXN2SNORM - SurfaceCompression::Compressed, // BC7U - SurfaceCompression::Compressed, // BC6H_UF16 - SurfaceCompression::Compressed, // BC6H_SF16 - SurfaceCompression::Converted, // ASTC_2D_4X4 - SurfaceCompression::None, // BGRA8 - SurfaceCompression::None, // RGBA32F - SurfaceCompression::None, // RG32F - SurfaceCompression::None, // R32F - SurfaceCompression::None, // R16F - SurfaceCompression::None, // R16U - SurfaceCompression::None, // R16S - SurfaceCompression::None, // R16UI - SurfaceCompression::None, // R16I - SurfaceCompression::None, // RG16 - SurfaceCompression::None, // RG16F - SurfaceCompression::None, // RG16UI - SurfaceCompression::None, // RG16I - SurfaceCompression::None, // RG16S - SurfaceCompression::None, // RGB32F - SurfaceCompression::None, // RGBA8_SRGB - SurfaceCompression::None, // RG8U - SurfaceCompression::None, // RG8S - SurfaceCompression::None, // RG32UI - SurfaceCompression::None, // R32UI - SurfaceCompression::Converted, // ASTC_2D_8X8 - SurfaceCompression::Converted, // ASTC_2D_8X5 - SurfaceCompression::Converted, // ASTC_2D_5X4 - SurfaceCompression::None, // BGRA8_SRGB - SurfaceCompression::Compressed, // DXT1_SRGB - SurfaceCompression::Compressed, // DXT23_SRGB - SurfaceCompression::Compressed, // DXT45_SRGB - SurfaceCompression::Compressed, // BC7U_SRGB - SurfaceCompression::Converted, // ASTC_2D_4X4_SRGB - SurfaceCompression::Converted, // ASTC_2D_8X8_SRGB - SurfaceCompression::Converted, // ASTC_2D_8X5_SRGB - SurfaceCompression::Converted, // ASTC_2D_5X4_SRGB - SurfaceCompression::Converted, // ASTC_2D_5X5 - SurfaceCompression::Converted, // ASTC_2D_5X5_SRGB - SurfaceCompression::Converted, // ASTC_2D_10X8 - SurfaceCompression::Converted, // ASTC_2D_10X8_SRGB - SurfaceCompression::None, // Z32F - SurfaceCompression::None, // Z16 - SurfaceCompression::None, // Z24S8 - SurfaceCompression::Rearranged, // S8Z24 - SurfaceCompression::None, // Z32FS8 + SurfaceCompression::None, // ABGR8U + SurfaceCompression::None, // ABGR8S + SurfaceCompression::None, // ABGR8UI + SurfaceCompression::None, // B5G6R5U + SurfaceCompression::None, // A2B10G10R10U + SurfaceCompression::None, // A1B5G5R5U + SurfaceCompression::None, // R8U + SurfaceCompression::None, // R8UI + SurfaceCompression::None, // RGBA16F + SurfaceCompression::None, // RGBA16U + SurfaceCompression::None, // RGBA16UI + SurfaceCompression::None, // R11FG11FB10F + SurfaceCompression::None, // RGBA32UI + SurfaceCompression::Compressed, // DXT1 + SurfaceCompression::Compressed, // DXT23 + SurfaceCompression::Compressed, // DXT45 + SurfaceCompression::Compressed, // DXN1 + SurfaceCompression::Compressed, // DXN2UNORM + SurfaceCompression::Compressed, // DXN2SNORM + SurfaceCompression::Compressed, // BC7U + SurfaceCompression::Compressed, // BC6H_UF16 + SurfaceCompression::Compressed, // BC6H_SF16 + SurfaceCompression::Converted, // ASTC_2D_4X4 + SurfaceCompression::None, // BGRA8 + SurfaceCompression::None, // RGBA32F + SurfaceCompression::None, // RG32F + SurfaceCompression::None, // R32F + SurfaceCompression::None, // R16F + SurfaceCompression::None, // R16U + SurfaceCompression::None, // R16S + SurfaceCompression::None, // R16UI + SurfaceCompression::None, // R16I + SurfaceCompression::None, // RG16 + SurfaceCompression::None, // RG16F + SurfaceCompression::None, // RG16UI + SurfaceCompression::None, // RG16I + SurfaceCompression::None, // RG16S + SurfaceCompression::None, // RGB32F + SurfaceCompression::None, // RGBA8_SRGB + SurfaceCompression::None, // RG8U + SurfaceCompression::None, // RG8S + SurfaceCompression::None, // RG32UI + SurfaceCompression::None, // R32UI + SurfaceCompression::Converted, // ASTC_2D_8X8 + SurfaceCompression::Converted, // ASTC_2D_8X5 + SurfaceCompression::Converted, // ASTC_2D_5X4 + SurfaceCompression::None, // BGRA8_SRGB + SurfaceCompression::Compressed, // DXT1_SRGB + SurfaceCompression::Compressed, // DXT23_SRGB + SurfaceCompression::Compressed, // DXT45_SRGB + SurfaceCompression::Compressed, // BC7U_SRGB + SurfaceCompression::Converted, // ASTC_2D_4X4_SRGB + SurfaceCompression::Converted, // ASTC_2D_8X8_SRGB + SurfaceCompression::Converted, // ASTC_2D_8X5_SRGB + SurfaceCompression::Converted, // ASTC_2D_5X4_SRGB + SurfaceCompression::Converted, // ASTC_2D_5X5 + SurfaceCompression::Converted, // ASTC_2D_5X5_SRGB + SurfaceCompression::Converted, // ASTC_2D_10X8 + SurfaceCompression::Converted, // ASTC_2D_10X8_SRGB + SurfaceCompression::None, // Z32F + SurfaceCompression::None, // Z16 + SurfaceCompression::None, // Z24S8 + SurfaceCompression::Rearranged, // S8Z24 + SurfaceCompression::None, // Z32FS8 }}; static constexpr SurfaceCompression GetFormatCompressionType(PixelFormat format) { diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 97bf9ad7a..051014c6a 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -101,7 +101,7 @@ MatchStructureResult SurfaceBaseImpl::MatchesStructure(const SurfaceParams& rhs) std::optional> SurfaceBaseImpl::GetLayerMipmap( const GPUVAddr candidate_gpu_addr) const { if (gpu_addr == candidate_gpu_addr) { - return {{0,0}}; + return {{0, 0}}; } if (candidate_gpu_addr < gpu_addr) { return {}; diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 662221adc..252b18538 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -254,7 +254,8 @@ public: } return {}; } else { - return GetView(ViewParams(view_params.target, layer, 1, mipmap, end_mipmap - mipmap + 1)); + return GetView( + ViewParams(view_params.target, layer, 1, mipmap, end_mipmap - mipmap + 1)); } } diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index d86ddeb76..b720856f2 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -18,6 +18,7 @@ #include "common/common_types.h" #include "common/math_util.h" #include "core/memory.h" +#include "core/settings.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/gpu.h" diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 9a2f4198a..7e8295944 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -256,7 +256,8 @@ std::vector UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, } void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, - u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height_bit) { + u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, + u32 block_height_bit) { const u32 block_height = 1U << block_height_bit; const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) / gob_size_x}; @@ -278,8 +279,8 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 } void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, - u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height_bit, - u32 offset_x, u32 offset_y) { + u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, + u32 block_height_bit, u32 offset_x, u32 offset_y) { const u32 block_height = 1U << block_height_bit; for (u32 line = 0; line < subrect_height; ++line) { const u32 y2 = line + offset_y; -- cgit v1.2.3 From 0837290992e0873f270cd032d2d0e5b91b643267 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 24 Jun 2019 02:08:52 -0300 Subject: texture_cache/surface_base: Address feedback --- src/video_core/texture_cache/surface_base.cpp | 4 ++++ src/video_core/texture_cache/surface_base.h | 8 ++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 051014c6a..7a0fdb19b 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -19,6 +19,10 @@ using Tegra::Texture::ConvertFromGuestToHost; using VideoCore::MortonSwizzleMode; using VideoCore::Surface::SurfaceCompression; +StagingCache::StagingCache() = default; + +StagingCache::~StagingCache() = default; + SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params) : params{params}, mipmap_sizes(params.num_levels), mipmap_offsets(params.num_levels), gpu_addr{gpu_addr}, host_memory_size{ diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 252b18538..d632630ce 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -40,13 +40,17 @@ enum class MatchTopologyResult : u32 { class StagingCache { public: - StagingCache() {} - ~StagingCache() = default; + explicit StagingCache(); + ~StagingCache(); std::vector& GetBuffer(std::size_t index) { return staging_buffer[index]; } + const std::vector& GetBuffer(std::size_t index) const { + return staging_buffer[index]; + } + void SetSize(std::size_t size) { staging_buffer.resize(size); } -- cgit v1.2.3 From 34841a41c308aa1336f71fbce3006302452302d1 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 24 Jun 2019 02:09:56 -0300 Subject: texture_cache/surface_view: Address feedback --- src/video_core/texture_cache/surface_view.h | 1 - 1 file changed, 1 deletion(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_view.h b/src/video_core/texture_cache/surface_view.h index 1ef4509ce..04ca5639b 100644 --- a/src/video_core/texture_cache/surface_view.h +++ b/src/video_core/texture_cache/surface_view.h @@ -44,7 +44,6 @@ struct ViewParams { class ViewBase { public: ViewBase(const ViewParams& params) : params{params} {} - ~ViewBase() = default; const ViewParams& GetViewParams() const { return params; -- cgit v1.2.3 From 7565389700a5741460a118d1fcc5e14fccb4b413 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 24 Jun 2019 02:15:57 -0300 Subject: texture_cache: Include "core/core.h" --- src/video_core/texture_cache/texture_cache.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index b720856f2..a91b2a220 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -17,6 +17,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "common/math_util.h" +#include "core/core.h" #include "core/memory.h" #include "core/settings.h" #include "video_core/engines/fermi_2d.h" @@ -30,10 +31,6 @@ #include "video_core/texture_cache/surface_params.h" #include "video_core/texture_cache/surface_view.h" -namespace Core { -class System; -} - namespace Tegra::Texture { struct FullTextureInfo; } -- cgit v1.2.3 From 58c8a44e7aa18f768db39a36870d8b279257e1d8 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 25 Jun 2019 17:26:00 -0400 Subject: texture_cache: Query MemoryManager from the system --- src/video_core/gpu.cpp | 1 - src/video_core/rasterizer_interface.h | 2 -- src/video_core/renderer_opengl/gl_rasterizer.cpp | 4 ---- src/video_core/renderer_opengl/gl_rasterizer.h | 2 -- src/video_core/texture_cache/texture_cache.h | 18 +++++++----------- 5 files changed, 7 insertions(+), 20 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 619e06a0e..52706505b 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -32,7 +32,6 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) { GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} { auto& rasterizer{renderer.Rasterizer()}; memory_manager = std::make_unique(rasterizer); - rasterizer.InitMemoryMananger(*memory_manager); dma_pusher = std::make_unique(*this); maxwell_3d = std::make_unique(system, rasterizer, *memory_manager); fermi_2d = std::make_unique(rasterizer, *memory_manager); diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 6007e8c2e..5ee4f8e8e 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -28,8 +28,6 @@ class RasterizerInterface { public: virtual ~RasterizerInterface() {} - virtual void InitMemoryMananger(Tegra::MemoryManager& memory_manager) = 0; - /// Draw the current batch of vertex arrays virtual void DrawArrays() = 0; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index c9f3a35e6..f45a3c5ef 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -97,10 +97,6 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind RasterizerOpenGL::~RasterizerOpenGL() {} -void RasterizerOpenGL::InitMemoryMananger(Tegra::MemoryManager& memory_manager) { - texture_cache.InitMemoryMananger(memory_manager); -} - void RasterizerOpenGL::CheckExtensions() { if (!GLAD_GL_ARB_texture_filter_anisotropic && !GLAD_GL_EXT_texture_filter_anisotropic) { LOG_WARNING( diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 33582ac42..bf67e3a70 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -57,8 +57,6 @@ public: ScreenInfo& info); ~RasterizerOpenGL() override; - void InitMemoryMananger(Tegra::MemoryManager& memory_manager) override; - void DrawArrays() override; void Clear() override; void FlushAll() override; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index a91b2a220..1516fcea3 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -52,10 +52,6 @@ class TextureCache { using IntervalType = typename IntervalMap::interval_type; public: - void InitMemoryMananger(Tegra::MemoryManager& memory_manager) { - this->memory_manager = &memory_manager; - } - void InvalidateRegion(CacheAddr addr, std::size_t size) { std::lock_guard lock{mutex}; @@ -278,15 +274,16 @@ protected: void Register(TSurface surface) { const GPUVAddr gpu_addr = surface->GetGpuAddr(); - const CacheAddr cache_ptr = ToCacheAddr(memory_manager->GetPointer(gpu_addr)); + const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr)); const std::size_t size = surface->GetSizeInBytes(); - const std::optional cpu_addr = memory_manager->GpuToCpuAddress(gpu_addr); + const std::optional cpu_addr = + system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); if (!cache_ptr || !cpu_addr) { LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", gpu_addr); return; } - bool continuouty = memory_manager->IsBlockContinuous(gpu_addr, size); + bool continuouty = system.GPU().MemoryManager().IsBlockContinuous(gpu_addr, size); surface->MarkAsContinuous(continuouty); surface->SetCacheAddr(cache_ptr); surface->SetCpuAddr(*cpu_addr); @@ -552,7 +549,7 @@ private: std::pair GetSurface(const GPUVAddr gpu_addr, const SurfaceParams& params, bool preserve_contents, bool is_render) { - const auto host_ptr{memory_manager->GetPointer(gpu_addr)}; + const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; const auto cache_addr{ToCacheAddr(host_ptr)}; // Step 0: guarantee a valid surface @@ -693,7 +690,7 @@ private: void LoadSurface(const TSurface& surface) { staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); - surface->LoadBuffer(*memory_manager, staging_cache); + surface->LoadBuffer(system.GPU().MemoryManager(), staging_cache); surface->UploadTexture(staging_cache.GetBuffer(0)); surface->MarkAsModified(false, Tick()); } @@ -704,7 +701,7 @@ private: } staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); surface->DownloadTexture(staging_cache.GetBuffer(0)); - surface->FlushBuffer(*memory_manager, staging_cache); + surface->FlushBuffer(system.GPU().MemoryManager(), staging_cache); surface->MarkAsModified(false, Tick()); } @@ -778,7 +775,6 @@ private: }; VideoCore::RasterizerInterface& rasterizer; - Tegra::MemoryManager* memory_manager; u64 ticks{}; -- cgit v1.2.3 From fb234560b060e528d66a77815330766e5aa88594 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 25 Jun 2019 17:42:50 -0400 Subject: copy_params: use constexpr for constructor --- src/video_core/texture_cache/copy_params.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/copy_params.h b/src/video_core/texture_cache/copy_params.h index 8cf010142..9c21a0649 100644 --- a/src/video_core/texture_cache/copy_params.h +++ b/src/video_core/texture_cache/copy_params.h @@ -9,13 +9,14 @@ namespace VideoCommon { struct CopyParams { - CopyParams(u32 source_x, u32 source_y, u32 source_z, u32 dest_x, u32 dest_y, u32 dest_z, - u32 source_level, u32 dest_level, u32 width, u32 height, u32 depth) + constexpr CopyParams(u32 source_x, u32 source_y, u32 source_z, u32 dest_x, u32 dest_y, + u32 dest_z, u32 source_level, u32 dest_level, u32 width, u32 height, + u32 depth) : source_x{source_x}, source_y{source_y}, source_z{source_z}, dest_x{dest_x}, dest_y{dest_y}, dest_z{dest_z}, source_level{source_level}, dest_level{dest_level}, width{width}, height{height}, depth{depth} {} - CopyParams(u32 width, u32 height, u32 depth, u32 level) + constexpr CopyParams(u32 width, u32 height, u32 depth, u32 level) : source_x{}, source_y{}, source_z{}, dest_x{}, dest_y{}, dest_z{}, source_level{level}, dest_level{level}, width{width}, height{height}, depth{depth} {} -- cgit v1.2.3 From c0abc7124d6ecd17f9da5ee5b3de9cb3dbf3ce1f Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 25 Jun 2019 18:03:25 -0400 Subject: surface_params: Corrections, asserts and documentation. --- src/video_core/texture_cache/surface_params.cpp | 4 +- src/video_core/texture_cache/surface_params.h | 97 ++++++++++++++----------- 2 files changed, 58 insertions(+), 43 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index a670fc1a9..340ed2ca0 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -269,11 +269,11 @@ std::size_t SurfaceParams::GetConvertedMipmapOffset(u32 level) const { } std::size_t SurfaceParams::GetConvertedMipmapSize(u32 level) const { - constexpr std::size_t rgb8_bpp = 4ULL; + constexpr std::size_t rgba8_bpp = 4ULL; const std::size_t width_t = GetMipWidth(level); const std::size_t height_t = GetMipHeight(level); const std::size_t depth_t = is_layered ? depth : GetMipDepth(level); - return width_t * height_t * depth_t * rgb8_bpp; + return width_t * height_t * depth_t * rgba8_bpp; } std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) const { diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index c51e174cd..4dfb882f0 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -95,25 +95,21 @@ public: /// Returns the block depth of a given mipmap level. u32 GetMipBlockDepth(u32 level) const; + /// returns the best possible row/pitch alignment for the surface. u32 GetRowAlignment(u32 level) const { const u32 bpp = GetCompressionType() == SurfaceCompression::Converted ? 4 : GetBytesPerPixel(); return 1U << Common::CountTrailingZeroes32(GetMipWidth(level) * bpp); } - // Helper used for out of class size calculations - static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height, - const u32 block_depth) { - return Common::AlignBits(out_size, - Tegra::Texture::GetGOBSizeShift() + block_height + block_depth); - } - /// Returns the offset in bytes in guest memory of a given mipmap level. std::size_t GetGuestMipmapLevelOffset(u32 level) const; /// Returns the offset in bytes in host memory (linear) of a given mipmap level. std::size_t GetHostMipmapLevelOffset(u32 level) const; + /// Returns the offset in bytes in host memory (linear) of a given mipmap level + // for a texture that is converted in host gpu. std::size_t GetConvertedMipmapOffset(u32 level) const; /// Returns the size in bytes in guest memory of a given mipmap level. @@ -139,40 +135,7 @@ public: return GetInnerMipmapMemorySize(level, true, false); } - static u32 ConvertWidth(u32 width, VideoCore::Surface::PixelFormat pixel_format_from, - VideoCore::Surface::PixelFormat pixel_format_to) { - const u32 bw1 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_from); - const u32 bw2 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_to); - return (width * bw2 + bw1 - 1) / bw1; - } - - static u32 ConvertHeight(u32 height, VideoCore::Surface::PixelFormat pixel_format_from, - VideoCore::Surface::PixelFormat pixel_format_to) { - const u32 bh1 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_from); - const u32 bh2 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_to); - return (height * bh2 + bh1 - 1) / bh1; - } - - // this finds the maximun possible width between 2 2D layers of different formats - static u32 IntersectWidth(const SurfaceParams& src_params, const SurfaceParams& dst_params, - const u32 src_level, const u32 dst_level) { - const u32 bw1 = src_params.GetDefaultBlockWidth(); - const u32 bw2 = dst_params.GetDefaultBlockWidth(); - const u32 t_src_width = (src_params.GetMipWidth(src_level) * bw2 + bw1 - 1) / bw1; - const u32 t_dst_width = (dst_params.GetMipWidth(dst_level) * bw1 + bw2 - 1) / bw2; - return std::min(t_src_width, t_dst_width); - } - - // this finds the maximun possible height between 2 2D layers of different formats - static u32 IntersectHeight(const SurfaceParams& src_params, const SurfaceParams& dst_params, - const u32 src_level, const u32 dst_level) { - const u32 bh1 = src_params.GetDefaultBlockHeight(); - const u32 bh2 = dst_params.GetDefaultBlockHeight(); - const u32 t_src_height = (src_params.GetMipHeight(src_level) * bh2 + bh1 - 1) / bh1; - const u32 t_dst_height = (dst_params.GetMipHeight(dst_level) * bh1 + bh2 - 1) / bh2; - return std::min(t_src_height, t_dst_height); - } - + /// Returns the max possible mipmap that the texture can have in host gpu u32 MaxPossibleMipmap() const { const u32 max_mipmap_w = Common::Log2Ceil32(width) + 1U; const u32 max_mipmap_h = Common::Log2Ceil32(height) + 1U; @@ -182,6 +145,7 @@ public: return std::max(max_mipmap, Common::Log2Ceil32(depth) + 1U); } + /// Returns if the guest surface is a compressed surface. bool IsCompressed() const { return GetDefaultBlockHeight() > 1 || GetDefaultBlockWidth() > 1; } @@ -212,16 +176,67 @@ public: pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat; } + /// Returns how the compression should be handled for this texture. Values + /// are: None(no compression), Compressed(texture is compressed), + /// Converted(texture is converted before upload/ after download), + /// Rearranged(texture is swizzled before upload/after download). SurfaceCompression GetCompressionType() const { return VideoCore::Surface::GetFormatCompressionType(pixel_format); } + /// Returns is the surface is a TextureBuffer type of surface. bool IsBuffer() const { return target == VideoCore::Surface::SurfaceTarget::TextureBuffer; } + /// Returns the debug name of the texture for use in graphic debuggers. std::string TargetName() const; + // Helper used for out of class size calculations + static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height, + const u32 block_depth) { + return Common::AlignBits(out_size, + Tegra::Texture::GetGOBSizeShift() + block_height + block_depth); + } + + /// Converts a width from a type of surface into another. This helps represent the + /// equivalent value between compressed/non-compressed textures. + static u32 ConvertWidth(u32 width, VideoCore::Surface::PixelFormat pixel_format_from, + VideoCore::Surface::PixelFormat pixel_format_to) { + const u32 bw1 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_from); + const u32 bw2 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_to); + return (width * bw2 + bw1 - 1) / bw1; + } + + /// Converts a height from a type of surface into another. This helps represent the + /// equivalent value between compressed/non-compressed textures. + static u32 ConvertHeight(u32 height, VideoCore::Surface::PixelFormat pixel_format_from, + VideoCore::Surface::PixelFormat pixel_format_to) { + const u32 bh1 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_from); + const u32 bh2 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_to); + return (height * bh2 + bh1 - 1) / bh1; + } + + // Finds the maximun possible width between 2 2D layers of different formats + static u32 IntersectWidth(const SurfaceParams& src_params, const SurfaceParams& dst_params, + const u32 src_level, const u32 dst_level) { + const u32 bw1 = src_params.GetDefaultBlockWidth(); + const u32 bw2 = dst_params.GetDefaultBlockWidth(); + const u32 t_src_width = (src_params.GetMipWidth(src_level) * bw2 + bw1 - 1) / bw1; + const u32 t_dst_width = (dst_params.GetMipWidth(dst_level) * bw1 + bw2 - 1) / bw2; + return std::min(t_src_width, t_dst_width); + } + + // Finds the maximun possible height between 2 2D layers of different formats + static u32 IntersectHeight(const SurfaceParams& src_params, const SurfaceParams& dst_params, + const u32 src_level, const u32 dst_level) { + const u32 bh1 = src_params.GetDefaultBlockHeight(); + const u32 bh2 = dst_params.GetDefaultBlockHeight(); + const u32 t_src_height = (src_params.GetMipHeight(src_level) * bh2 + bh1 - 1) / bh1; + const u32 t_dst_height = (dst_params.GetMipHeight(dst_level) * bh1 + bh2 - 1) / bh2; + return std::min(t_src_height, t_dst_height); + } + bool is_tiled; bool srgb_conversion; bool is_layered; -- cgit v1.2.3 From 88bc39374fd7cffd2864229ae60bdab3aebb37ea Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 25 Jun 2019 18:36:19 -0400 Subject: texture_cache: Corrections, documentation and asserts --- src/video_core/texture_cache/texture_cache.h | 84 ++++++++++++++-------------- 1 file changed, 42 insertions(+), 42 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 1516fcea3..fb6ca41ff 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -60,10 +60,10 @@ public: } } - /** + /*** * `Guard` guarantees that rendertargets don't unregister themselves if the * collide. Protection is currently only done on 3D slices. - **/ + ***/ void GuardRenderTargets(bool new_guard) { guard_render_targets = new_guard; } @@ -191,19 +191,21 @@ public: } void SetEmptyDepthBuffer() { - if (depth_buffer.target != nullptr) { - depth_buffer.target->MarkAsRenderTarget(false); - depth_buffer.target = nullptr; - depth_buffer.view = nullptr; + if (depth_buffer.target == nullptr) { + return; } + depth_buffer.target->MarkAsRenderTarget(false); + depth_buffer.target = nullptr; + depth_buffer.view = nullptr; } void SetEmptyColorBuffer(std::size_t index) { - if (render_targets[index].target != nullptr) { - render_targets[index].target->MarkAsRenderTarget(false); - render_targets[index].target = nullptr; - render_targets[index].view = nullptr; + if (render_targets[index].target == nullptr) { + return; } + render_targets[index].target->MarkAsRenderTarget(false); + render_targets[index].target = nullptr; + render_targets[index].view = nullptr; } void DoFermiCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, @@ -283,8 +285,8 @@ protected: gpu_addr); return; } - bool continuouty = system.GPU().MemoryManager().IsBlockContinuous(gpu_addr, size); - surface->MarkAsContinuous(continuouty); + const bool continuous = system.GPU().MemoryManager().IsBlockContinuous(gpu_addr, size); + surface->MarkAsContinuous(continuous); surface->SetCacheAddr(cache_ptr); surface->SetCpuAddr(*cpu_addr); RegisterInnerCache(surface); @@ -381,8 +383,8 @@ private: const SurfaceParams& params, const GPUVAddr gpu_addr, const bool preserve_contents, const MatchTopologyResult untopological) { - const bool do_load = Settings::values.use_accurate_gpu_emulation && preserve_contents; - for (auto surface : overlaps) { + const bool do_load = preserve_contents && Settings::values.use_accurate_gpu_emulation; + for (auto& surface : overlaps) { Unregister(surface); } switch (PickStrategy(overlaps, params, gpu_addr, untopological)) { @@ -394,7 +396,7 @@ private: [](const TSurface& a, const TSurface& b) -> bool { return a->GetModificationTick() < b->GetModificationTick(); }); - for (auto surface : overlaps) { + for (auto& surface : overlaps) { FlushSurface(surface); } return InitializeSurface(gpu_addr, params, preserve_contents); @@ -460,19 +462,19 @@ private: const SurfaceParams& params, bool is_render) { const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); const bool matches_target = current_surface->MatchTarget(params.target); - auto match_check = ([&]() -> std::pair { + const auto match_check = ([&]() -> std::pair { if (matches_target) { return {current_surface, current_surface->GetMainView()}; } return {current_surface, current_surface->EmplaceOverview(params)}; }); - if (is_mirage) { - if (!is_render && siblings_table[current_surface->GetFormat()] == params.pixel_format) { - return match_check(); - } - return RebuildSurface(current_surface, params, is_render); + if (!is_mirage) { + return match_check(); + } + if (!is_render && siblings_table[current_surface->GetFormat()] == params.pixel_format) { + return match_check(); } - return match_check(); + return RebuildSurface(current_surface, params, is_render); } /** @@ -493,7 +495,7 @@ private: bool modified = false; TSurface new_surface = GetUncachedSurface(gpu_addr, params); u32 passed_tests = 0; - for (auto surface : overlaps) { + for (auto& surface : overlaps) { const SurfaceParams& src_params = surface->GetSurfaceParams(); if (src_params.is_layered || src_params.num_levels > 1) { // We send this cases to recycle as they are more complex to handle @@ -504,8 +506,7 @@ private: if (!mipmap_layer) { continue; } - const u32 layer{mipmap_layer->first}; - const u32 mipmap{mipmap_layer->second}; + const auto [layer, mipmap] = *mipmap_layer; if (new_surface->GetMipmapSize(mipmap) != candidate_size) { continue; } @@ -519,7 +520,7 @@ private: } if (passed_tests == 0) { return {}; - // In Accurate GPU all test should pass, else we recycle + // In Accurate GPU all tests should pass, else we recycle } else if (Settings::values.use_accurate_gpu_emulation && passed_tests != overlaps.size()) { return {}; } @@ -548,7 +549,6 @@ private: **/ std::pair GetSurface(const GPUVAddr gpu_addr, const SurfaceParams& params, bool preserve_contents, bool is_render) { - const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; const auto cache_addr{ToCacheAddr(host_ptr)}; @@ -570,17 +570,17 @@ private: auto iter = l1_cache.find(cache_addr); if (iter != l1_cache.end()) { TSurface& current_surface = iter->second; - auto topological_result = current_surface->MatchesTopology(params); + const auto topological_result = current_surface->MatchesTopology(params); if (topological_result != MatchTopologyResult::FullMatch) { std::vector overlaps{current_surface}; return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, topological_result); } - MatchStructureResult s_result = current_surface->MatchesStructure(params); - if (s_result != MatchStructureResult::None && + const auto struct_result = current_surface->MatchesStructure(params); + if (struct_result != MatchStructureResult::None && (params.target != SurfaceTarget::Texture3D || current_surface->MatchTarget(params.target))) { - if (s_result == MatchStructureResult::FullMatch) { + if (struct_result == MatchStructureResult::FullMatch) { return ManageStructuralMatch(current_surface, params, is_render); } else { return RebuildSurface(current_surface, params, is_render); @@ -602,8 +602,8 @@ private: // Now we need to figure the relationship between the texture and its overlaps // we do a topological test to ensure we can find some relationship. If it fails // inmediatly recycle the texture - for (auto surface : overlaps) { - auto topological_result = surface->MatchesTopology(params); + for (const auto& surface : overlaps) { + const auto topological_result = surface->MatchesTopology(params); if (topological_result != MatchTopologyResult::FullMatch) { return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, topological_result); @@ -620,7 +620,7 @@ private: if (current_surface->GetGpuAddr() == gpu_addr) { std::optional> view = TryReconstructSurface(overlaps, params, gpu_addr); - if (view.has_value()) { + if (view) { return *view; } } @@ -630,7 +630,7 @@ private: // Now we check if the candidate is a mipmap/layer of the overlap std::optional view = current_surface->EmplaceView(params, gpu_addr, candidate_size); - if (view.has_value()) { + if (view) { const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); if (is_mirage) { // On a mirage view, we need to recreate the surface under this new view @@ -669,7 +669,7 @@ private: // using the overlaps. If a single overlap fails, this will fail. std::optional> view = TryReconstructSurface(overlaps, params, gpu_addr); - if (view.has_value()) { + if (view) { return *view; } } @@ -738,16 +738,16 @@ private: std::vector surfaces; while (start <= end) { std::vector& list = registry[start]; - for (auto& s : list) { - if (!s->IsPicked() && s->Overlaps(cache_addr, cache_addr_end)) { - s->MarkAsPicked(true); - surfaces.push_back(s); + for (auto& surface : list) { + if (!surface->IsPicked() && surface->Overlaps(cache_addr, cache_addr_end)) { + surface->MarkAsPicked(true); + surfaces.push_back(surface); } } start++; } - for (auto& s : surfaces) { - s->MarkAsPicked(false); + for (auto& surface : surfaces) { + surface->MarkAsPicked(false); } return surfaces; } -- cgit v1.2.3 From 223ca8075399463e51d4afea1adb0c5b6fba8588 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 25 Jun 2019 19:35:08 -0400 Subject: texture_cache: Correct variable naming. --- src/video_core/texture_cache/texture_cache.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index fb6ca41ff..b5b0e91ef 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -225,9 +225,9 @@ public: } const CacheAddr page = cache_addr >> registry_page_bits; std::vector& list = registry[page]; - for (auto& s : list) { - if (s->GetCacheAddr() == cache_addr) { - return s; + for (auto& surface : list) { + if (surface->GetCacheAddr() == cache_addr) { + return surface; } } return nullptr; -- cgit v1.2.3 From 3f3c3ca5f96fd5742524703f20b531338fa2e5f7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 29 Jun 2019 17:29:39 -0300 Subject: texture_cache: Address feedback --- src/video_core/renderer_opengl/gl_texture_cache.cpp | 5 +---- src/video_core/renderer_opengl/gl_texture_cache.h | 15 --------------- src/video_core/texture_cache/surface_base.h | 2 +- src/video_core/texture_cache/texture_cache.h | 21 +++++++++++---------- 4 files changed, 13 insertions(+), 30 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 672f26f37..97014a676 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -236,10 +236,7 @@ CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& param true); } -CachedSurface::~CachedSurface() { - views.clear(); - main_view = nullptr; -} +CachedSurface::~CachedSurface() = default; void CachedSurface::DownloadTexture(std::vector& staging_buffer) { MICROPROFILE_SCOPE(OpenGL_Texture_Download); diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 8da81dba3..d4c6e9a30 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -89,21 +89,6 @@ public: return surface.GetSurfaceParams(); } - u32 GetWidth() const { - const auto& owner_params = GetSurfaceParams(); - return owner_params.GetMipWidth(params.base_level); - } - - u32 GetHeight() const { - const auto& owner_params = GetSurfaceParams(); - return owner_params.GetMipHeight(params.base_level); - } - - u32 GetDepth() const { - const auto& owner_params = GetSurfaceParams(); - return owner_params.GetMipDepth(params.base_level); - } - void ApplySwizzle(Tegra::Texture::SwizzleSource x_source, Tegra::Texture::SwizzleSource y_source, Tegra::Texture::SwizzleSource z_source, diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index d632630ce..eaed6545d 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -294,8 +294,8 @@ protected: virtual TView CreateView(const ViewParams& view_key) = 0; - std::unordered_map views; TView main_view; + std::unordered_map views; private: TView GetView(const ViewParams& key) { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index b5b0e91ef..9436a5ff2 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -79,10 +79,9 @@ public: if (surfaces.empty()) { return; } - std::sort(surfaces.begin(), surfaces.end(), - [](const TSurface& a, const TSurface& b) -> bool { - return a->GetModificationTick() < b->GetModificationTick(); - }); + std::sort(surfaces.begin(), surfaces.end(), [](const TSurface& a, const TSurface& b) { + return a->GetModificationTick() < b->GetModificationTick(); + }); for (const auto& surface : surfaces) { FlushSurface(surface); } @@ -181,13 +180,15 @@ public: } void MarkColorBufferInUse(std::size_t index) { - if (render_targets[index].target) - render_targets[index].target->MarkAsModified(true, Tick()); + if (auto& render_target = render_targets[index].target) { + render_target->MarkAsModified(true, Tick()); + } } void MarkDepthBufferInUse() { - if (depth_buffer.target) + if (depth_buffer.target) { depth_buffer.target->MarkAsModified(true, Tick()); + } } void SetEmptyDepthBuffer() { @@ -245,11 +246,11 @@ protected: } SetEmptyDepthBuffer(); staging_cache.SetSize(2); - auto make_siblings = ([this](PixelFormat a, PixelFormat b) { + const auto make_siblings = [this](PixelFormat a, PixelFormat b) { siblings_table[a] = b; siblings_table[b] = a; - }); - const u32 max_formats = static_cast(PixelFormat::Max); + }; + const auto max_formats = static_cast(PixelFormat::Max); siblings_table.reserve(max_formats); for (u32 i = 0; i < max_formats; i++) { siblings_table[static_cast(i)] = PixelFormat::Invalid; -- cgit v1.2.3 From dd9ace502bfd2239ceddad8c5c41baf0e10e2144 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 29 Jun 2019 18:54:13 -0300 Subject: texture_cache: Use std::array for siblings_table --- src/video_core/texture_cache/texture_cache.h | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 9436a5ff2..9fcf87744 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -4,6 +4,8 @@ #pragma once +#include +#include #include #include #include @@ -244,20 +246,19 @@ protected: for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { SetEmptyColorBuffer(i); } + SetEmptyDepthBuffer(); staging_cache.SetSize(2); + const auto make_siblings = [this](PixelFormat a, PixelFormat b) { - siblings_table[a] = b; - siblings_table[b] = a; + siblings_table[static_cast(a)] = b; + siblings_table[static_cast(b)] = a; }; - const auto max_formats = static_cast(PixelFormat::Max); - siblings_table.reserve(max_formats); - for (u32 i = 0; i < max_formats; i++) { - siblings_table[static_cast(i)] = PixelFormat::Invalid; - } + std::fill(siblings_table.begin(), siblings_table.end(), PixelFormat::Invalid); make_siblings(PixelFormat::Z16, PixelFormat::R16U); make_siblings(PixelFormat::Z32F, PixelFormat::R32F); make_siblings(PixelFormat::Z32FS8, PixelFormat::RG32F); + sampled_textures_stack.resize(64); } @@ -426,7 +427,8 @@ private: const auto& cr_params = current_surface->GetSurfaceParams(); TSurface new_surface; if (cr_params.pixel_format != params.pixel_format && !is_render && - siblings_table[cr_params.pixel_format] == params.pixel_format) { + siblings_table[static_cast(cr_params.pixel_format)] == + params.pixel_format) { SurfaceParams new_params = params; new_params.pixel_format = cr_params.pixel_format; new_params.component_type = cr_params.component_type; @@ -472,7 +474,8 @@ private: if (!is_mirage) { return match_check(); } - if (!is_render && siblings_table[current_surface->GetFormat()] == params.pixel_format) { + if (!is_render && siblings_table[static_cast(current_surface->GetFormat())] == + params.pixel_format) { return match_check(); } return RebuildSurface(current_surface, params, is_render); @@ -786,7 +789,7 @@ private: // The siblings table is for formats that can inter exchange with one another // without causing issues. This is only valid when a conflict occurs on a non // rendering use. - std::unordered_map siblings_table; + std::array(PixelFormat::Max)> siblings_table; // The internal Cache is different for the Texture Cache. It's based on buckets // of 1MB. This fits better for the purpose of this cache as textures are normaly -- cgit v1.2.3 From f6f1a8f26a302dc33df635625c490f0d65880059 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 29 Jun 2019 19:52:37 -0300 Subject: texture_cache: Style changes --- src/video_core/surface.h | 18 +++++++++--------- src/video_core/texture_cache/surface_params.h | 9 +++------ src/video_core/texture_cache/texture_cache.h | 3 +-- 3 files changed, 13 insertions(+), 17 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/surface.h b/src/video_core/surface.h index bfdbc3b81..83f31c12c 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -439,11 +439,11 @@ static constexpr u32 GetBytesPerPixel(PixelFormat pixel_format) { return GetFormatBpp(pixel_format) / CHAR_BIT; } -enum class SurfaceCompression : u8 { - None = 0, - Compressed = 1, - Converted = 2, - Rearranged = 3, +enum class SurfaceCompression { + None, // Not compressed + Compressed, // Texture is compressed + Converted, // Texture is converted before upload or after download + Rearranged, // Texture is swizzled before upload or after download }; constexpr std::array compression_type_table = {{ @@ -513,11 +513,11 @@ constexpr std::array compression_type_table SurfaceCompression::None, // Z32FS8 }}; -static constexpr SurfaceCompression GetFormatCompressionType(PixelFormat format) { - if (format == PixelFormat::Invalid) +constexpr SurfaceCompression GetFormatCompressionType(PixelFormat format) { + if (format == PixelFormat::Invalid) { return SurfaceCompression::None; - - ASSERT(static_cast(format) < compression_type_table.size()); + } + DEBUG_ASSERT(static_cast(format) < compression_type_table.size()); return compression_type_table[static_cast(format)]; } diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index 4dfb882f0..358d6757c 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -95,7 +95,7 @@ public: /// Returns the block depth of a given mipmap level. u32 GetMipBlockDepth(u32 level) const; - /// returns the best possible row/pitch alignment for the surface. + /// Returns the best possible row/pitch alignment for the surface. u32 GetRowAlignment(u32 level) const { const u32 bpp = GetCompressionType() == SurfaceCompression::Converted ? 4 : GetBytesPerPixel(); @@ -109,7 +109,7 @@ public: std::size_t GetHostMipmapLevelOffset(u32 level) const; /// Returns the offset in bytes in host memory (linear) of a given mipmap level - // for a texture that is converted in host gpu. + /// for a texture that is converted in host gpu. std::size_t GetConvertedMipmapOffset(u32 level) const; /// Returns the size in bytes in guest memory of a given mipmap level. @@ -176,10 +176,7 @@ public: pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat; } - /// Returns how the compression should be handled for this texture. Values - /// are: None(no compression), Compressed(texture is compressed), - /// Converted(texture is converted before upload/ after download), - /// Rearranged(texture is swizzled before upload/after download). + /// Returns how the compression should be handled for this texture. SurfaceCompression GetCompressionType() const { return VideoCore::Surface::GetFormatCompressionType(pixel_format); } diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 9fcf87744..3df3e17dd 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -571,8 +571,7 @@ private: // Step 1 // Check Level 1 Cache for a fast structural match. If candidate surface // matches at certain level we are pretty much done. - auto iter = l1_cache.find(cache_addr); - if (iter != l1_cache.end()) { + if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) { TSurface& current_surface = iter->second; const auto topological_result = current_surface->MatchesTopology(params); if (topological_result != MatchTopologyResult::FullMatch) { -- cgit v1.2.3 From 8eae66907e043e6e26d78cfc4b5cde7ea93a4f77 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 29 Jun 2019 20:10:31 -0300 Subject: texture_cache: Use std::vector reservation for sampled_textures --- src/video_core/texture_cache/texture_cache.h | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 3df3e17dd..8edae3d97 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -97,25 +97,19 @@ public: return {}; } const auto params{SurfaceParams::CreateForTexture(system, config, entry)}; - auto pair = GetSurface(gpu_addr, params, true, false); + const auto [surface, view] = GetSurface(gpu_addr, params, true, false); if (guard_samplers) { - if (sampled_textures_stack_pointer == sampled_textures_stack.size()) { - sampled_textures_stack.resize(sampled_textures_stack.size() * 2); - } - sampled_textures_stack[sampled_textures_stack_pointer] = pair.first; - sampled_textures_stack_pointer++; + sampled_textures.push_back(surface); } - return pair.second; + return view; } bool TextureBarrier() { - bool must_do = false; - for (u32 i = 0; i < sampled_textures_stack_pointer; i++) { - must_do |= sampled_textures_stack[i]->IsRenderTarget(); - sampled_textures_stack[i] = nullptr; - } - sampled_textures_stack_pointer = 0; - return must_do; + const bool any_rt = + std::any_of(sampled_textures.begin(), sampled_textures.end(), + [](const auto& surface) { return surface->IsRenderTarget(); }); + sampled_textures.clear(); + return any_rt; } TView GetDepthBufferSurface(bool preserve_contents) { @@ -259,7 +253,7 @@ protected: make_siblings(PixelFormat::Z32F, PixelFormat::R32F); make_siblings(PixelFormat::Z32FS8, PixelFormat::RG32F); - sampled_textures_stack.resize(64); + sampled_textures.reserve(64); } ~TextureCache() = default; @@ -809,8 +803,7 @@ private: render_targets; FramebufferTargetInfo depth_buffer; - std::vector sampled_textures_stack{}; - u32 sampled_textures_stack_pointer{}; + std::vector sampled_textures; StagingCache staging_cache; std::recursive_mutex mutex; -- cgit v1.2.3 From 6e1db6b7038329a9716763c8bdf14cc5b578fec1 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 29 Jun 2019 20:47:46 -0300 Subject: texture_cache: Pack sibling queries inside a method --- src/video_core/texture_cache/texture_cache.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 8edae3d97..c9e72531a 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -421,8 +421,7 @@ private: const auto& cr_params = current_surface->GetSurfaceParams(); TSurface new_surface; if (cr_params.pixel_format != params.pixel_format && !is_render && - siblings_table[static_cast(cr_params.pixel_format)] == - params.pixel_format) { + GetSiblingFormat(cr_params.pixel_format) == params.pixel_format) { SurfaceParams new_params = params; new_params.pixel_format = cr_params.pixel_format; new_params.component_type = cr_params.component_type; @@ -459,17 +458,16 @@ private: const SurfaceParams& params, bool is_render) { const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); const bool matches_target = current_surface->MatchTarget(params.target); - const auto match_check = ([&]() -> std::pair { + const auto match_check = [&]() -> std::pair { if (matches_target) { return {current_surface, current_surface->GetMainView()}; } return {current_surface, current_surface->EmplaceOverview(params)}; - }); + }; if (!is_mirage) { return match_check(); } - if (!is_render && siblings_table[static_cast(current_surface->GetFormat())] == - params.pixel_format) { + if (!is_render && GetSiblingFormat(current_surface->GetFormat()) == params.pixel_format) { return match_check(); } return RebuildSurface(current_surface, params, is_render); @@ -766,6 +764,10 @@ private: return {}; } + constexpr PixelFormat GetSiblingFormat(PixelFormat format) const { + return siblings_table[static_cast(format)]; + } + struct FramebufferTargetInfo { TSurface target; TView view; -- cgit v1.2.3 From 30b176f92b67ec7a9b1ce08cf89d50abd125f8a8 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 4 Jul 2019 19:38:19 -0400 Subject: texture_cache: Correct Texture Buffer Uploading --- src/video_core/renderer_opengl/gl_texture_cache.cpp | 17 +++++++++++++++-- src/video_core/renderer_opengl/gl_texture_cache.h | 1 + src/video_core/texture_cache/surface_params.cpp | 2 ++ 3 files changed, 18 insertions(+), 2 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 97014a676..780526b66 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -141,6 +141,8 @@ const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType compon GLenum GetTextureTarget(const SurfaceTarget& target) { switch (target) { + case SurfaceTarget::TextureBuffer: + return GL_TEXTURE_BUFFER; case SurfaceTarget::Texture1D: return GL_TEXTURE_1D; case SurfaceTarget::Texture2D: @@ -191,7 +193,8 @@ void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) { } } -OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum internal_format) { +OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum internal_format, + OGLBuffer& texture_buffer) { OGLTexture texture; texture.Create(target); @@ -199,6 +202,11 @@ OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum inte case SurfaceTarget::Texture1D: glTextureStorage1D(texture.handle, params.emulated_levels, internal_format, params.width); break; + case SurfaceTarget::TextureBuffer: + texture_buffer.Create(); + glNamedBufferStorage(texture_buffer.handle, params.width * params.GetBytesPerPixel(), + nullptr, GL_DYNAMIC_STORAGE_BIT); + glTextureBuffer(texture.handle, internal_format, texture_buffer.handle); case SurfaceTarget::Texture2D: case SurfaceTarget::TextureCubemap: glTextureStorage2D(texture.handle, params.emulated_levels, internal_format, params.width, @@ -229,7 +237,7 @@ CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& param type = tuple.type; is_compressed = tuple.compressed; target = GetTextureTarget(params.target); - texture = CreateTexture(params, target, internal_format); + texture = CreateTexture(params, target, internal_format, texture_buffer); DecorateSurfaceName(); main_view = CreateViewInner( ViewParams(params.target, 0, params.is_layered ? params.depth : 1, 0, params.num_levels), @@ -316,6 +324,11 @@ void CachedSurface::UploadTextureMipmap(u32 level, std::vector& staging_buff glTextureSubImage1D(texture.handle, level, 0, params.GetMipWidth(level), format, type, buffer); break; + case SurfaceTarget::TextureBuffer: + ASSERT(level == 0); + glNamedBufferSubData(texture_buffer.handle, 0, + params.GetMipWidth(level) * params.GetBytesPerPixel(), buffer); + break; case SurfaceTarget::Texture1DArray: case SurfaceTarget::Texture2D: glTextureSubImage2D(texture.handle, level, 0, 0, params.GetMipWidth(level), diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index d4c6e9a30..e7cc66fbb 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -67,6 +67,7 @@ private: u32 view_count{}; OGLTexture texture; + OGLBuffer texture_buffer; }; class CachedSurfaceView final : public VideoCommon::ViewBase { diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 340ed2ca0..9c56e2b4f 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -310,6 +310,8 @@ std::string SurfaceParams::TargetName() const { switch (target) { case SurfaceTarget::Texture1D: return "1D"; + case SurfaceTarget::TextureBuffer: + return "TexBuffer"; case SurfaceTarget::Texture2D: return "2D"; case SurfaceTarget::Texture3D: -- cgit v1.2.3 From 3b9d89839dc62e9e63a3cbe9636cf85276babdfb Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 4 Jul 2019 21:10:59 -0400 Subject: texture_cache: Address Feedback --- src/common/CMakeLists.txt | 1 + src/common/binary_find.h | 21 +++++++++++++++++++++ src/common/common_funcs.h | 10 ---------- src/video_core/renderer_opengl/gl_shader_cache.cpp | 6 ++++-- src/video_core/renderer_opengl/gl_texture_cache.cpp | 6 +++--- src/video_core/renderer_opengl/gl_texture_cache.h | 9 ++++----- src/video_core/texture_cache/surface_base.h | 4 ++-- 7 files changed, 35 insertions(+), 22 deletions(-) create mode 100644 src/common/binary_find.h (limited to 'src/video_core/texture_cache') diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 8ae05137b..2554add28 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -75,6 +75,7 @@ add_library(common STATIC assert.h detached_tasks.cpp detached_tasks.h + binary_find.h bit_field.h bit_util.h cityhash.cpp diff --git a/src/common/binary_find.h b/src/common/binary_find.h new file mode 100644 index 000000000..5cc523bf9 --- /dev/null +++ b/src/common/binary_find.h @@ -0,0 +1,21 @@ +// Copyright 2019 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +namespace Common { + +template > +ForwardIt BinaryFind(ForwardIt first, ForwardIt last, const T& value, Compare comp = {}) { + // Note: BOTH type T and the type after ForwardIt is dereferenced + // must be implicitly convertible to BOTH Type1 and Type2, used in Compare. + // This is stricter than lower_bound requirement (see above) + + first = std::lower_bound(first, last, value, comp); + return first != last && !comp(value, *first) ? first : last; +} + +} // namespace Common diff --git a/src/common/common_funcs.h b/src/common/common_funcs.h index 00a5698f3..04ecac959 100644 --- a/src/common/common_funcs.h +++ b/src/common/common_funcs.h @@ -61,14 +61,4 @@ constexpr u32 MakeMagic(char a, char b, char c, char d) { return a | b << 8 | c << 16 | d << 24; } -template > -ForwardIt BinaryFind(ForwardIt first, ForwardIt last, const T& value, Compare comp = {}) { - // Note: BOTH type T and the type after ForwardIt is dereferenced - // must be implicitly convertible to BOTH Type1 and Type2, used in Compare. - // This is stricter than lower_bound requirement (see above) - - first = std::lower_bound(first, last, value, comp); - return first != last && !comp(value, *first) ? first : last; -} - } // namespace Common diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 718703091..1bd182d98 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -103,14 +103,16 @@ constexpr std::tuple GetPrimitiveDescription(GLen /// Calculates the size of a program stream std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { constexpr std::size_t start_offset = 10; - constexpr u64 key = 0xE2400FFFFF07000FULL; + // This is the encoded version of BRA that jumps to itself. All Nvidia + // shaders end with one. + constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL; constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL; std::size_t offset = start_offset; std::size_t size = start_offset * sizeof(u64); while (offset < program.size()) { const u64 instruction = program[offset]; if (!IsSchedInstruction(offset, start_offset)) { - if ((instruction & mask) == key) { + if ((instruction & mask) == self_jumping_branch) { // End on Maxwell's "nop" instruction break; } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 780526b66..08ae1a429 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -267,7 +267,7 @@ void CachedSurface::DownloadTexture(std::vector& staging_buffer) { } } -void CachedSurface::UploadTexture(std::vector& staging_buffer) { +void CachedSurface::UploadTexture(const std::vector& staging_buffer) { MICROPROFILE_SCOPE(OpenGL_Texture_Upload); SCOPE_EXIT({ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); }); for (u32 level = 0; level < params.emulated_levels; ++level) { @@ -275,7 +275,7 @@ void CachedSurface::UploadTexture(std::vector& staging_buffer) { } } -void CachedSurface::UploadTextureMipmap(u32 level, std::vector& staging_buffer) { +void CachedSurface::UploadTextureMipmap(u32 level, const std::vector& staging_buffer) { glPixelStorei(GL_UNPACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level))); glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast(params.GetMipWidth(level))); @@ -284,7 +284,7 @@ void CachedSurface::UploadTextureMipmap(u32 level, std::vector& staging_buff const std::size_t mip_offset = compression_type == SurfaceCompression::Converted ? params.GetConvertedMipmapOffset(level) : params.GetHostMipmapLevelOffset(level); - u8* buffer{staging_buffer.data() + mip_offset}; + const u8* buffer{staging_buffer.data() + mip_offset}; if (is_compressed) { const auto image_size{static_cast(params.GetHostMipmapSize(level))}; switch (params.target) { diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index e7cc66fbb..ff6ab6988 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -39,7 +39,7 @@ public: explicit CachedSurface(GPUVAddr gpu_addr, const SurfaceParams& params); ~CachedSurface(); - void UploadTexture(std::vector& staging_buffer) override; + void UploadTexture(const std::vector& staging_buffer) override; void DownloadTexture(std::vector& staging_buffer) override; GLenum GetTarget() const { @@ -57,7 +57,7 @@ protected: View CreateViewInner(const ViewParams& view_key, bool is_proxy); private: - void UploadTextureMipmap(u32 level, std::vector& staging_buffer); + void UploadTextureMipmap(u32 level, const std::vector& staging_buffer); GLenum internal_format{}; GLenum format{}; @@ -72,14 +72,13 @@ private: class CachedSurfaceView final : public VideoCommon::ViewBase { public: - explicit CachedSurfaceView(CachedSurface& surface, const ViewParams& params, - const bool is_proxy); + explicit CachedSurfaceView(CachedSurface& surface, const ViewParams& params, bool is_proxy); ~CachedSurfaceView(); /// Attaches this texture view to the current bound GL_DRAW_FRAMEBUFFER void Attach(GLenum attachment, GLenum target) const; - GLuint GetTexture() { + GLuint GetTexture() const { if (is_proxy) { return surface.GetTexture(); } diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index eaed6545d..8ba386a8a 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -9,7 +9,7 @@ #include #include "common/assert.h" -#include "common/common_funcs.h" +#include "common/binary_find.h" #include "common/common_types.h" #include "video_core/gpu.h" #include "video_core/morton.h" @@ -191,7 +191,7 @@ private: template class SurfaceBase : public SurfaceBaseImpl { public: - virtual void UploadTexture(std::vector& staging_buffer) = 0; + virtual void UploadTexture(const std::vector& staging_buffer) = 0; virtual void DownloadTexture(std::vector& staging_buffer) = 0; -- cgit v1.2.3