From 5665d055476fa793192523c3cb6fe06369d58674 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 4 Jul 2021 22:48:41 -0400 Subject: astc_decoder: Optimize the use EncodingData This buffer was a list of EncodingData structures sorted by their bit length, with some duplication from the cpu decoder implementation. We can take advantage of its sorted property to optimize its usage in the shader. Thanks to wwylele for the optimization idea. --- src/video_core/renderer_vulkan/vk_compute_pass.cpp | 42 +++++----------------- 1 file changed, 9 insertions(+), 33 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 561cf5e11..328813a57 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -30,16 +30,13 @@ namespace Vulkan { using Tegra::Texture::SWIZZLE_TABLE; -using Tegra::Texture::ASTC::ASTC_ENCODINGS_VALUES; -using namespace Tegra::Texture::ASTC; namespace { constexpr u32 ASTC_BINDING_INPUT_BUFFER = 0; -constexpr u32 ASTC_BINDING_ENC_BUFFER = 1; -constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 2; -constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 3; -constexpr size_t ASTC_NUM_BINDINGS = 4; +constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 1; +constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 2; +constexpr size_t ASTC_NUM_BINDINGS = 3; template inline constexpr VkPushConstantRange COMPUTE_PUSH_CONSTANT_RANGE{ @@ -75,7 +72,7 @@ constexpr DescriptorBankInfo INPUT_OUTPUT_BANK_INFO{ .score = 2, }; -constexpr std::array ASTC_DESCRIPTOR_SET_BINDINGS{{ +constexpr std::array ASTC_DESCRIPTOR_SET_BINDINGS{{ { .binding = ASTC_BINDING_INPUT_BUFFER, .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, @@ -83,13 +80,6 @@ constexpr std::array ASTC_DESCRIPTOR_SET_BINDIN .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, .pImmutableSamplers = nullptr, }, - { - .binding = ASTC_BINDING_ENC_BUFFER, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }, { .binding = ASTC_BINDING_SWIZZLE_BUFFER, .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, @@ -108,12 +98,12 @@ constexpr std::array ASTC_DESCRIPTOR_SET_BINDIN constexpr DescriptorBankInfo ASTC_BANK_INFO{ .uniform_buffers = 0, - .storage_buffers = 3, + .storage_buffers = 2, .texture_buffers = 0, .image_buffers = 0, .textures = 0, .images = 1, - .score = 4, + .score = 3, }; constexpr VkDescriptorUpdateTemplateEntryKHR INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE{ @@ -135,14 +125,6 @@ constexpr std::array .offset = ASTC_BINDING_INPUT_BUFFER * sizeof(DescriptorUpdateEntry), .stride = sizeof(DescriptorUpdateEntry), }, - { - .dstBinding = ASTC_BINDING_ENC_BUFFER, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .offset = ASTC_BINDING_ENC_BUFFER * sizeof(DescriptorUpdateEntry), - .stride = sizeof(DescriptorUpdateEntry), - }, { .dstBinding = ASTC_BINDING_SWIZZLE_BUFFER, .dstArrayElement = 0, @@ -355,7 +337,7 @@ ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, ASTCDecoderPass::~ASTCDecoderPass() = default; void ASTCDecoderPass::MakeDataBuffer() { - constexpr size_t TOTAL_BUFFER_SIZE = sizeof(ASTC_ENCODINGS_VALUES) + sizeof(SWIZZLE_TABLE); + constexpr size_t TOTAL_BUFFER_SIZE = sizeof(SWIZZLE_TABLE); data_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, @@ -369,11 +351,7 @@ void ASTCDecoderPass::MakeDataBuffer() { data_buffer_commit = memory_allocator.Commit(data_buffer, MemoryUsage::Upload); const auto staging_ref = staging_buffer_pool.Request(TOTAL_BUFFER_SIZE, MemoryUsage::Upload); - std::memcpy(staging_ref.mapped_span.data(), &ASTC_ENCODINGS_VALUES, - sizeof(ASTC_ENCODINGS_VALUES)); - // Tack on the swizzle table at the end of the buffer - std::memcpy(staging_ref.mapped_span.data() + sizeof(ASTC_ENCODINGS_VALUES), &SWIZZLE_TABLE, - sizeof(SWIZZLE_TABLE)); + std::memcpy(staging_ref.mapped_span.data(), &SWIZZLE_TABLE, sizeof(SWIZZLE_TABLE)); scheduler.Record([src = staging_ref.buffer, offset = staging_ref.offset, dst = *data_buffer, TOTAL_BUFFER_SIZE](vk::CommandBuffer cmdbuf) { @@ -443,9 +421,7 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, update_descriptor_queue.Acquire(); update_descriptor_queue.AddBuffer(map.buffer, input_offset, image.guest_size_bytes - swizzle.buffer_offset); - update_descriptor_queue.AddBuffer(*data_buffer, 0, sizeof(ASTC_ENCODINGS_VALUES)); - update_descriptor_queue.AddBuffer(*data_buffer, sizeof(ASTC_ENCODINGS_VALUES), - sizeof(SWIZZLE_TABLE)); + update_descriptor_queue.AddBuffer(*data_buffer, 0, sizeof(SWIZZLE_TABLE)); update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level)); const void* const descriptor_data{update_descriptor_queue.UpdateData()}; -- cgit v1.2.3 From 5ab80535118e593ef3add3ce2b5935437e1dc1d3 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 31 Jul 2021 22:24:15 -0400 Subject: astc_decoder: Compute offset swizzles in-shader Alleviates the dependency on the swizzle table and a uniform which is constant for all ASTC texture sizes. --- src/video_core/host_shaders/astc_decoder.comp | 46 +++++---------- src/video_core/renderer_opengl/util_shaders.cpp | 16 +++--- src/video_core/renderer_vulkan/vk_compute_pass.cpp | 67 ++-------------------- src/video_core/renderer_vulkan/vk_compute_pass.h | 5 -- 4 files changed, 25 insertions(+), 109 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp index 392f09c68..74ce058a9 100644 --- a/src/video_core/host_shaders/astc_decoder.comp +++ b/src/video_core/host_shaders/astc_decoder.comp @@ -10,8 +10,7 @@ #define END_PUSH_CONSTANTS }; #define UNIFORM(n) #define BINDING_INPUT_BUFFER 0 -#define BINDING_SWIZZLE_BUFFER 1 -#define BINDING_OUTPUT_IMAGE 2 +#define BINDING_OUTPUT_IMAGE 1 #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv @@ -19,7 +18,6 @@ #define END_PUSH_CONSTANTS #define UNIFORM(n) layout(location = n) uniform #define BINDING_INPUT_BUFFER 0 -#define BINDING_SWIZZLE_BUFFER 1 #define BINDING_OUTPUT_IMAGE 0 #endif @@ -28,13 +26,11 @@ layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in; BEGIN_PUSH_CONSTANTS UNIFORM(1) uvec2 block_dims; - -UNIFORM(2) uint bytes_per_block_log2; -UNIFORM(3) uint layer_stride; -UNIFORM(4) uint block_size; -UNIFORM(5) uint x_shift; -UNIFORM(6) uint block_height; -UNIFORM(7) uint block_height_mask; +UNIFORM(2) uint layer_stride; +UNIFORM(3) uint block_size; +UNIFORM(4) uint x_shift; +UNIFORM(5) uint block_height; +UNIFORM(6) uint block_height_mask; END_PUSH_CONSTANTS struct EncodingData { @@ -53,35 +49,17 @@ struct TexelWeightParams { bool void_extent_hdr; }; -// Swizzle data -layout(binding = BINDING_SWIZZLE_BUFFER, std430) readonly buffer SwizzleTable { - uint swizzle_table[]; -}; - layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU32 { uvec4 astc_data[]; }; layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2DArray dest_image; -const uint GOB_SIZE_X = 64; -const uint GOB_SIZE_Y = 8; -const uint GOB_SIZE_Z = 1; -const uint GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z; - const uint GOB_SIZE_X_SHIFT = 6; const uint GOB_SIZE_Y_SHIFT = 3; -const uint GOB_SIZE_Z_SHIFT = 0; -const uint GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT; - -const uvec2 SWIZZLE_MASK = uvec2(GOB_SIZE_X - 1, GOB_SIZE_Y - 1); - -const int BLOCK_SIZE_IN_BYTES = 16; +const uint GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT; -const int BLOCK_INFO_ERROR = 0; -const int BLOCK_INFO_VOID_EXTENT_HDR = 1; -const int BLOCK_INFO_VOID_EXTENT_LDR = 2; -const int BLOCK_INFO_NORMAL = 3; +const uint BYTES_PER_BLOCK_LOG2 = 4; const int JUST_BITS = 0; const int QUINT = 1; @@ -168,8 +146,10 @@ int texel_vector_index = 0; uint unquantized_texel_weights[2][144]; uint SwizzleOffset(uvec2 pos) { - pos = pos & SWIZZLE_MASK; - return swizzle_table[pos.y * 64 + pos.x]; + uint x = pos.x; + uint y = pos.y; + return ((x % 64) / 32) * 256 + ((y % 8) / 2) * 64 + ((x % 32) / 16) * 32 + + (y % 2) * 16 + (x % 16); } // Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)] @@ -1253,7 +1233,7 @@ void DecompressBlock(ivec3 coord) { void main() { uvec3 pos = gl_GlobalInvocationID; - pos.x <<= bytes_per_block_log2; + pos.x <<= BYTES_PER_BLOCK_LOG2; // Read as soon as possible due to its latency const uint swizzle = SwizzleOffset(pos.xy); diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index a2b264700..4e6f7cb00 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -68,7 +68,6 @@ UtilShaders::~UtilShaders() = default; void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, std::span swizzles) { static constexpr GLuint BINDING_INPUT_BUFFER = 0; - static constexpr GLuint BINDING_SWIZZLE_BUFFER = 1; static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; const Extent2D tile_size{ @@ -76,10 +75,9 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, .height = VideoCore::Surface::DefaultBlockHeight(image.info.format), }; program_manager.BindComputeProgram(astc_decoder_program.handle); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); - glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); glUniform2ui(1, tile_size.width, tile_size.height); + // Ensure buffer data is valid before dispatching glFlush(); for (const SwizzleParameters& swizzle : swizzles) { @@ -90,13 +88,13 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); ASSERT(params.origin == (std::array{0, 0, 0})); ASSERT(params.destination == (std::array{0, 0, 0})); + ASSERT(params.bytes_per_block_log2 == 4); - glUniform1ui(2, params.bytes_per_block_log2); - glUniform1ui(3, params.layer_stride); - glUniform1ui(4, params.block_size); - glUniform1ui(5, params.x_shift); - glUniform1ui(6, params.block_height); - glUniform1ui(7, params.block_height_mask); + glUniform1ui(2, params.layer_stride); + glUniform1ui(3, params.block_size); + glUniform1ui(4, params.x_shift); + glUniform1ui(5, params.block_height); + glUniform1ui(6, params.block_height_mask); // ASTC texture data glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset, diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 328813a57..d13d58e8c 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -34,9 +34,8 @@ using Tegra::Texture::SWIZZLE_TABLE; namespace { constexpr u32 ASTC_BINDING_INPUT_BUFFER = 0; -constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 1; -constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 2; -constexpr size_t ASTC_NUM_BINDINGS = 3; +constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 1; +constexpr size_t ASTC_NUM_BINDINGS = 2; template inline constexpr VkPushConstantRange COMPUTE_PUSH_CONSTANT_RANGE{ @@ -80,13 +79,6 @@ constexpr std::array ASTC_DESCR .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, .pImmutableSamplers = nullptr, }, - { - .binding = ASTC_BINDING_SWIZZLE_BUFFER, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }, { .binding = ASTC_BINDING_OUTPUT_IMAGE, .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, @@ -98,12 +90,12 @@ constexpr std::array ASTC_DESCR constexpr DescriptorBankInfo ASTC_BANK_INFO{ .uniform_buffers = 0, - .storage_buffers = 2, + .storage_buffers = 1, .texture_buffers = 0, .image_buffers = 0, .textures = 0, .images = 1, - .score = 3, + .score = 2, }; constexpr VkDescriptorUpdateTemplateEntryKHR INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE{ @@ -125,14 +117,6 @@ constexpr std::array .offset = ASTC_BINDING_INPUT_BUFFER * sizeof(DescriptorUpdateEntry), .stride = sizeof(DescriptorUpdateEntry), }, - { - .dstBinding = ASTC_BINDING_SWIZZLE_BUFFER, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .offset = ASTC_BINDING_SWIZZLE_BUFFER * sizeof(DescriptorUpdateEntry), - .stride = sizeof(DescriptorUpdateEntry), - }, { .dstBinding = ASTC_BINDING_OUTPUT_IMAGE, .dstArrayElement = 0, @@ -145,7 +129,6 @@ constexpr std::array struct AstcPushConstants { std::array blocks_dims; - u32 bytes_per_block_log2; u32 layer_stride; u32 block_size; u32 x_shift; @@ -336,42 +319,6 @@ ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, ASTCDecoderPass::~ASTCDecoderPass() = default; -void ASTCDecoderPass::MakeDataBuffer() { - constexpr size_t TOTAL_BUFFER_SIZE = sizeof(SWIZZLE_TABLE); - data_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .size = TOTAL_BUFFER_SIZE, - .usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 0, - .pQueueFamilyIndices = nullptr, - }); - data_buffer_commit = memory_allocator.Commit(data_buffer, MemoryUsage::Upload); - - const auto staging_ref = staging_buffer_pool.Request(TOTAL_BUFFER_SIZE, MemoryUsage::Upload); - std::memcpy(staging_ref.mapped_span.data(), &SWIZZLE_TABLE, sizeof(SWIZZLE_TABLE)); - - scheduler.Record([src = staging_ref.buffer, offset = staging_ref.offset, dst = *data_buffer, - TOTAL_BUFFER_SIZE](vk::CommandBuffer cmdbuf) { - static constexpr VkMemoryBarrier write_barrier{ - .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, - .pNext = nullptr, - .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, - }; - const VkBufferCopy copy{ - .srcOffset = offset, - .dstOffset = 0, - .size = TOTAL_BUFFER_SIZE, - }; - cmdbuf.CopyBuffer(src, dst, copy); - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - 0, write_barrier); - }); -} - void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, std::span swizzles) { using namespace VideoCommon::Accelerated; @@ -380,9 +327,6 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, VideoCore::Surface::DefaultBlockHeight(image.info.format), }; scheduler.RequestOutsideRenderPassOperationContext(); - if (!data_buffer) { - MakeDataBuffer(); - } const VkPipeline vk_pipeline = *pipeline; const VkImageAspectFlags aspect_mask = image.AspectMask(); const VkImage vk_image = image.Handle(); @@ -421,7 +365,6 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, update_descriptor_queue.Acquire(); update_descriptor_queue.AddBuffer(map.buffer, input_offset, image.guest_size_bytes - swizzle.buffer_offset); - update_descriptor_queue.AddBuffer(*data_buffer, 0, sizeof(SWIZZLE_TABLE)); update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level)); const void* const descriptor_data{update_descriptor_queue.UpdateData()}; @@ -429,11 +372,11 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); ASSERT(params.origin == (std::array{0, 0, 0})); ASSERT(params.destination == (std::array{0, 0, 0})); + ASSERT(params.bytes_per_block_log2 == 4); scheduler.Record([this, num_dispatches_x, num_dispatches_y, num_dispatches_z, block_dims, params, descriptor_data](vk::CommandBuffer cmdbuf) { const AstcPushConstants uniforms{ .blocks_dims = block_dims, - .bytes_per_block_log2 = params.bytes_per_block_log2, .layer_stride = params.layer_stride, .block_size = params.block_size, .x_shift = params.x_shift, diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index 114aef2bd..c7b92cce0 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h @@ -96,15 +96,10 @@ public: std::span swizzles); private: - void MakeDataBuffer(); - VKScheduler& scheduler; StagingBufferPool& staging_buffer_pool; VKUpdateDescriptorQueue& update_descriptor_queue; MemoryAllocator& memory_allocator; - - vk::Buffer data_buffer; - MemoryCommit data_buffer_commit; }; } // namespace Vulkan -- cgit v1.2.3 From c439fc9be994583801418743ab202fb63d1c83a0 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 31 Jul 2021 23:55:20 -0400 Subject: astc_decoder: Reduce workgroup size This reduces the amount of over dispatching when there are odd dimensions (i.e. ASTC 8x5), which rarely evenly divide into 32x32. --- src/video_core/host_shaders/astc_decoder.comp | 2 +- src/video_core/renderer_opengl/util_shaders.cpp | 4 ++-- src/video_core/renderer_vulkan/vk_compute_pass.cpp | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp index 74ce058a9..f34c5f5d9 100644 --- a/src/video_core/host_shaders/astc_decoder.comp +++ b/src/video_core/host_shaders/astc_decoder.comp @@ -22,7 +22,7 @@ #endif -layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in; +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; BEGIN_PUSH_CONSTANTS UNIFORM(1) uvec2 block_dims; diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 4e6f7cb00..333f35a1c 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -82,8 +82,8 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, glFlush(); for (const SwizzleParameters& swizzle : swizzles) { const size_t input_offset = swizzle.buffer_offset + map.offset; - const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 32U); - const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 32U); + const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 8U); + const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 8U); const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); ASSERT(params.origin == (std::array{0, 0, 0})); diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index d13d58e8c..3e96c0f60 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -358,8 +358,8 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, }); for (const VideoCommon::SwizzleParameters& swizzle : swizzles) { const size_t input_offset = swizzle.buffer_offset + map.offset; - const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 32U); - const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 32U); + const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 8U); + const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 8U); const u32 num_dispatches_z = image.info.resources.layers; update_descriptor_queue.Acquire(); -- cgit v1.2.3 From f9563c8f248677894b886373f18c016fb189e416 Mon Sep 17 00:00:00 2001 From: yzct12345 <87620833+yzct12345@users.noreply.github.com> Date: Thu, 5 Aug 2021 13:52:30 +0000 Subject: texture_cache: Split templates out --- src/video_core/CMakeLists.txt | 3 + .../renderer_opengl/gl_texture_cache.cpp | 5 +- .../renderer_opengl/gl_texture_cache_templates.cpp | 10 + .../renderer_vulkan/vk_texture_cache.cpp | 2 +- .../renderer_vulkan/vk_texture_cache_templates.cpp | 10 + src/video_core/texture_cache/texture_cache.h | 1528 +------------------- .../texture_cache/texture_cache_templates.h | 1507 +++++++++++++++++++ 7 files changed, 1533 insertions(+), 1532 deletions(-) create mode 100644 src/video_core/renderer_opengl/gl_texture_cache_templates.cpp create mode 100644 src/video_core/renderer_vulkan/vk_texture_cache_templates.cpp create mode 100644 src/video_core/texture_cache/texture_cache_templates.h (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 1eb67c051..1250cca6f 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -97,6 +97,7 @@ add_library(video_core STATIC renderer_opengl/gl_stream_buffer.h renderer_opengl/gl_texture_cache.cpp renderer_opengl/gl_texture_cache.h + renderer_opengl/gl_texture_cache_templates.cpp renderer_opengl/gl_query_cache.cpp renderer_opengl/gl_query_cache.h renderer_opengl/maxwell_to_gl.h @@ -155,6 +156,7 @@ add_library(video_core STATIC renderer_vulkan/vk_swapchain.h renderer_vulkan/vk_texture_cache.cpp renderer_vulkan/vk_texture_cache.h + renderer_vulkan/vk_texture_cache_templates.cpp renderer_vulkan/vk_update_descriptor.cpp renderer_vulkan/vk_update_descriptor.h shader_cache.cpp @@ -186,6 +188,7 @@ add_library(video_core STATIC texture_cache/samples_helper.h texture_cache/slot_vector.h texture_cache/texture_cache.h + texture_cache/texture_cache_templates.h texture_cache/types.h texture_cache/util.cpp texture_cache/util.h diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index c373c9cb4..26b423f5e 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -1,4 +1,4 @@ -// Copyright 2019 yuzu Emulator Project +// Copyright 2021 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. @@ -18,10 +18,7 @@ #include "video_core/renderer_opengl/maxwell_to_gl.h" #include "video_core/renderer_opengl/util_shaders.h" #include "video_core/surface.h" -#include "video_core/texture_cache/format_lookup_table.h" #include "video_core/texture_cache/samples_helper.h" -#include "video_core/texture_cache/texture_cache.h" -#include "video_core/textures/decoders.h" namespace OpenGL { namespace { diff --git a/src/video_core/renderer_opengl/gl_texture_cache_templates.cpp b/src/video_core/renderer_opengl/gl_texture_cache_templates.cpp new file mode 100644 index 000000000..00ed06447 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_texture_cache_templates.cpp @@ -0,0 +1,10 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "video_core/renderer_opengl/gl_texture_cache.h" +#include "video_core/texture_cache/texture_cache_templates.h" + +namespace VideoCommon { +template class VideoCommon::TextureCache; +} diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 8e029bcb3..b0496556d 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1,4 +1,4 @@ -// Copyright 2019 yuzu Emulator Project +// Copyright 2021 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. diff --git a/src/video_core/renderer_vulkan/vk_texture_cache_templates.cpp b/src/video_core/renderer_vulkan/vk_texture_cache_templates.cpp new file mode 100644 index 000000000..fd8978954 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_texture_cache_templates.cpp @@ -0,0 +1,10 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "video_core/renderer_vulkan/vk_texture_cache.h" +#include "video_core/texture_cache/texture_cache_templates.h" + +namespace VideoCommon { +template class VideoCommon::TextureCache; +} diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index f34c9d9ca..a4f6e9422 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1,4 +1,4 @@ -// Copyright 2019 yuzu Emulator Project +// Copyright 2021 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. @@ -164,14 +164,6 @@ public: const Tegra::Engines::Fermi2D::Surface& src, const Tegra::Engines::Fermi2D::Config& copy); - /// Invalidate the contents of the color buffer index - /// These contents become unspecified, the cache can assume aggressive optimizations. - void InvalidateColorBuffer(size_t index); - - /// Invalidate the contents of the depth buffer - /// These contents become unspecified, the cache can assume aggressive optimizations. - void InvalidateDepthBuffer(); - /// Try to find a cached image view in the given CPU address [[nodiscard]] ImageView* TryFindFramebufferImageView(VAddr cpu_addr); @@ -407,1522 +399,4 @@ private: typename SlotVector::Iterator deletion_iterator; }; -template -TextureCache

::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_, - Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_) - : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, - kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} { - // Configure null sampler - TSCEntry sampler_descriptor{}; - sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear); - sampler_descriptor.mag_filter.Assign(Tegra::Texture::TextureFilter::Linear); - sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear); - sampler_descriptor.cubemap_anisotropy.Assign(1); - - // Make sure the first index is reserved for the null resources - // This way the null resource becomes a compile time constant - void(slot_image_views.insert(runtime, NullImageParams{})); - void(slot_samplers.insert(runtime, sampler_descriptor)); - - deletion_iterator = slot_images.begin(); - - if constexpr (HAS_DEVICE_MEMORY_INFO) { - const auto device_memory = runtime.GetDeviceLocalMemory(); - const u64 possible_expected_memory = (device_memory * 3) / 10; - const u64 possible_critical_memory = (device_memory * 6) / 10; - expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY); - critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY); - minimum_memory = 0; - } else { - // on OGL we can be more conservatives as the driver takes care. - expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; - critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; - minimum_memory = expected_memory; - } -} - -template -void TextureCache

::RunGarbageCollector() { - const bool high_priority_mode = total_used_memory >= expected_memory; - const bool aggressive_mode = total_used_memory >= critical_memory; - const u64 ticks_to_destroy = high_priority_mode ? 60 : 100; - int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64); - for (; num_iterations > 0; --num_iterations) { - if (deletion_iterator == slot_images.end()) { - deletion_iterator = slot_images.begin(); - if (deletion_iterator == slot_images.end()) { - break; - } - } - auto [image_id, image_tmp] = *deletion_iterator; - Image* image = image_tmp; // fix clang error. - const bool is_alias = True(image->flags & ImageFlagBits::Alias); - const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap); - const bool must_download = image->IsSafeDownload(); - bool should_care = is_bad_overlap || is_alias || (high_priority_mode && !must_download); - const u64 ticks_needed = - is_bad_overlap - ? ticks_to_destroy >> 4 - : ((should_care && aggressive_mode) ? ticks_to_destroy >> 1 : ticks_to_destroy); - should_care |= aggressive_mode; - if (should_care && image->frame_tick + ticks_needed < frame_tick) { - if (is_bad_overlap) { - const bool overlap_check = std::ranges::all_of( - image->overlapping_images, [&, image](const ImageId& overlap_id) { - auto& overlap = slot_images[overlap_id]; - return overlap.frame_tick >= image->frame_tick; - }); - if (!overlap_check) { - ++deletion_iterator; - continue; - } - } - if (!is_bad_overlap && must_download) { - const bool alias_check = std::ranges::none_of( - image->aliased_images, [&, image](const AliasedImage& alias) { - auto& alias_image = slot_images[alias.id]; - return (alias_image.frame_tick < image->frame_tick) || - (alias_image.modification_tick < image->modification_tick); - }); - - if (alias_check) { - auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes); - const auto copies = FullDownloadCopies(image->info); - image->DownloadMemory(map, copies); - runtime.Finish(); - SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span); - } - } - if (True(image->flags & ImageFlagBits::Tracked)) { - UntrackImage(*image, image_id); - } - UnregisterImage(image_id); - DeleteImage(image_id); - if (is_bad_overlap) { - ++num_iterations; - } - } - ++deletion_iterator; - } -} - -template -void TextureCache

::TickFrame() { - if (Settings::values.use_caches_gc.GetValue() && total_used_memory > minimum_memory) { - RunGarbageCollector(); - } - sentenced_images.Tick(); - sentenced_framebuffers.Tick(); - sentenced_image_view.Tick(); - ++frame_tick; -} - -template -const typename P::ImageView& TextureCache

::GetImageView(ImageViewId id) const noexcept { - return slot_image_views[id]; -} - -template -typename P::ImageView& TextureCache

::GetImageView(ImageViewId id) noexcept { - return slot_image_views[id]; -} - -template -void TextureCache

::MarkModification(ImageId id) noexcept { - MarkModification(slot_images[id]); -} - -template -void TextureCache

::FillGraphicsImageViews(std::span indices, - std::span image_view_ids) { - FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids); -} - -template -void TextureCache

::FillComputeImageViews(std::span indices, - std::span image_view_ids) { - FillImageViews(compute_image_table, compute_image_view_ids, indices, image_view_ids); -} - -template -typename P::Sampler* TextureCache

::GetGraphicsSampler(u32 index) { - if (index > graphics_sampler_table.Limit()) { - LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); - return &slot_samplers[NULL_SAMPLER_ID]; - } - const auto [descriptor, is_new] = graphics_sampler_table.Read(index); - SamplerId& id = graphics_sampler_ids[index]; - if (is_new) { - id = FindSampler(descriptor); - } - return &slot_samplers[id]; -} - -template -typename P::Sampler* TextureCache

::GetComputeSampler(u32 index) { - if (index > compute_sampler_table.Limit()) { - LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); - return &slot_samplers[NULL_SAMPLER_ID]; - } - const auto [descriptor, is_new] = compute_sampler_table.Read(index); - SamplerId& id = compute_sampler_ids[index]; - if (is_new) { - id = FindSampler(descriptor); - } - return &slot_samplers[id]; -} - -template -void TextureCache

::SynchronizeGraphicsDescriptors() { - using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex; - const bool linked_tsc = maxwell3d.regs.sampler_index == SamplerIndex::ViaHeaderIndex; - const u32 tic_limit = maxwell3d.regs.tic.limit; - const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d.regs.tsc.limit; - if (graphics_sampler_table.Synchornize(maxwell3d.regs.tsc.Address(), tsc_limit)) { - graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); - } - if (graphics_image_table.Synchornize(maxwell3d.regs.tic.Address(), tic_limit)) { - graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); - } -} - -template -void TextureCache

::SynchronizeComputeDescriptors() { - const bool linked_tsc = kepler_compute.launch_description.linked_tsc; - const u32 tic_limit = kepler_compute.regs.tic.limit; - const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute.regs.tsc.limit; - const GPUVAddr tsc_gpu_addr = kepler_compute.regs.tsc.Address(); - if (compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) { - compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); - } - if (compute_image_table.Synchornize(kepler_compute.regs.tic.Address(), tic_limit)) { - compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); - } -} - -template -void TextureCache

::UpdateRenderTargets(bool is_clear) { - using namespace VideoCommon::Dirty; - auto& flags = maxwell3d.dirty.flags; - if (!flags[Dirty::RenderTargets]) { - for (size_t index = 0; index < NUM_RT; ++index) { - ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; - PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id)); - } - const ImageViewId depth_buffer_id = render_targets.depth_buffer_id; - PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); - return; - } - flags[Dirty::RenderTargets] = false; - - // Render target control is used on all render targets, so force look ups when this one is up - const bool force = flags[Dirty::RenderTargetControl]; - flags[Dirty::RenderTargetControl] = false; - - for (size_t index = 0; index < NUM_RT; ++index) { - ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; - if (flags[Dirty::ColorBuffer0 + index] || force) { - flags[Dirty::ColorBuffer0 + index] = false; - BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear)); - } - PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id)); - } - if (flags[Dirty::ZetaBuffer] || force) { - flags[Dirty::ZetaBuffer] = false; - BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear)); - } - const ImageViewId depth_buffer_id = render_targets.depth_buffer_id; - PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); - - for (size_t index = 0; index < NUM_RT; ++index) { - render_targets.draw_buffers[index] = static_cast(maxwell3d.regs.rt_control.Map(index)); - } - render_targets.size = Extent2D{ - maxwell3d.regs.render_area.width, - maxwell3d.regs.render_area.height, - }; -} - -template -typename P::Framebuffer* TextureCache

::GetFramebuffer() { - return &slot_framebuffers[GetFramebufferId(render_targets)]; -} - -template -void TextureCache

::FillImageViews(DescriptorTable& table, - std::span cached_image_view_ids, - std::span indices, - std::span image_view_ids) { - ASSERT(indices.size() <= image_view_ids.size()); - do { - has_deleted_images = false; - std::ranges::transform(indices, image_view_ids.begin(), [&](u32 index) { - return VisitImageView(table, cached_image_view_ids, index); - }); - } while (has_deleted_images); -} - -template -ImageViewId TextureCache

::VisitImageView(DescriptorTable& table, - std::span cached_image_view_ids, - u32 index) { - if (index > table.Limit()) { - LOG_DEBUG(HW_GPU, "Invalid image view index={}", index); - return NULL_IMAGE_VIEW_ID; - } - const auto [descriptor, is_new] = table.Read(index); - ImageViewId& image_view_id = cached_image_view_ids[index]; - if (is_new) { - image_view_id = FindImageView(descriptor); - } - if (image_view_id != NULL_IMAGE_VIEW_ID) { - PrepareImageView(image_view_id, false, false); - } - return image_view_id; -} - -template -FramebufferId TextureCache

::GetFramebufferId(const RenderTargets& key) { - const auto [pair, is_new] = framebuffers.try_emplace(key); - FramebufferId& framebuffer_id = pair->second; - if (!is_new) { - return framebuffer_id; - } - std::array color_buffers; - std::ranges::transform(key.color_buffer_ids, color_buffers.begin(), - [this](ImageViewId id) { return id ? &slot_image_views[id] : nullptr; }); - ImageView* const depth_buffer = - key.depth_buffer_id ? &slot_image_views[key.depth_buffer_id] : nullptr; - framebuffer_id = slot_framebuffers.insert(runtime, color_buffers, depth_buffer, key); - return framebuffer_id; -} - -template -void TextureCache

::WriteMemory(VAddr cpu_addr, size_t size) { - ForEachImageInRegion(cpu_addr, size, [this](ImageId image_id, Image& image) { - if (True(image.flags & ImageFlagBits::CpuModified)) { - return; - } - image.flags |= ImageFlagBits::CpuModified; - if (True(image.flags & ImageFlagBits::Tracked)) { - UntrackImage(image, image_id); - } - }); -} - -template -void TextureCache

::DownloadMemory(VAddr cpu_addr, size_t size) { - std::vector images; - ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) { - if (!image.IsSafeDownload()) { - return; - } - image.flags &= ~ImageFlagBits::GpuModified; - images.push_back(image_id); - }); - if (images.empty()) { - return; - } - std::ranges::sort(images, [this](ImageId lhs, ImageId rhs) { - return slot_images[lhs].modification_tick < slot_images[rhs].modification_tick; - }); - for (const ImageId image_id : images) { - Image& image = slot_images[image_id]; - auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); - const auto copies = FullDownloadCopies(image.info); - image.DownloadMemory(map, copies); - runtime.Finish(); - SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); - } -} - -template -void TextureCache

::UnmapMemory(VAddr cpu_addr, size_t size) { - std::vector deleted_images; - ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); - for (const ImageId id : deleted_images) { - Image& image = slot_images[id]; - if (True(image.flags & ImageFlagBits::Tracked)) { - UntrackImage(image, id); - } - UnregisterImage(id); - DeleteImage(id); - } -} - -template -void TextureCache

::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) { - std::vector deleted_images; - ForEachImageInRegionGPU(gpu_addr, size, - [&](ImageId id, Image&) { deleted_images.push_back(id); }); - for (const ImageId id : deleted_images) { - Image& image = slot_images[id]; - if (True(image.flags & ImageFlagBits::Remapped)) { - continue; - } - image.flags |= ImageFlagBits::Remapped; - if (True(image.flags & ImageFlagBits::Tracked)) { - UntrackImage(image, id); - } - } -} - -template -void TextureCache

::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, - const Tegra::Engines::Fermi2D::Surface& src, - const Tegra::Engines::Fermi2D::Config& copy) { - const BlitImages images = GetBlitImages(dst, src); - const ImageId dst_id = images.dst_id; - const ImageId src_id = images.src_id; - PrepareImage(src_id, false, false); - PrepareImage(dst_id, true, false); - - ImageBase& dst_image = slot_images[dst_id]; - const ImageBase& src_image = slot_images[src_id]; - - // TODO: Deduplicate - const std::optional src_base = src_image.TryFindBase(src.Address()); - const SubresourceRange src_range{.base = src_base.value(), .extent = {1, 1}}; - const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range); - const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info); - const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples); - const Region2D src_region{ - Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y}, - Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y}, - }; - - const std::optional dst_base = dst_image.TryFindBase(dst.Address()); - const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}}; - const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range); - const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); - const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples); - const Region2D dst_region{ - Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y}, - Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y}, - }; - - // Always call this after src_framebuffer_id was queried, as the address might be invalidated. - Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; - if constexpr (FRAMEBUFFER_BLITS) { - // OpenGL blits from framebuffers, not images - Framebuffer* const src_framebuffer = &slot_framebuffers[src_framebuffer_id]; - runtime.BlitFramebuffer(dst_framebuffer, src_framebuffer, dst_region, src_region, - copy.filter, copy.operation); - } else { - // Vulkan can blit images, but it lacks format reinterpretations - // Provide a framebuffer in case it's necessary - ImageView& dst_view = slot_image_views[dst_view_id]; - ImageView& src_view = slot_image_views[src_view_id]; - runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter, - copy.operation); - } -} - -template -void TextureCache

::InvalidateColorBuffer(size_t index) { - ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; - color_buffer_id = FindColorBuffer(index, false); - if (!color_buffer_id) { - LOG_ERROR(HW_GPU, "Invalidating invalid color buffer in index={}", index); - return; - } - // When invalidating a color buffer, the old contents are no longer relevant - ImageView& color_buffer = slot_image_views[color_buffer_id]; - Image& image = slot_images[color_buffer.image_id]; - image.flags &= ~ImageFlagBits::CpuModified; - image.flags &= ~ImageFlagBits::GpuModified; - - runtime.InvalidateColorBuffer(color_buffer, index); -} - -template -void TextureCache

::InvalidateDepthBuffer() { - ImageViewId& depth_buffer_id = render_targets.depth_buffer_id; - depth_buffer_id = FindDepthBuffer(false); - if (!depth_buffer_id) { - LOG_ERROR(HW_GPU, "Invalidating invalid depth buffer"); - return; - } - // When invalidating the depth buffer, the old contents are no longer relevant - ImageBase& image = slot_images[slot_image_views[depth_buffer_id].image_id]; - image.flags &= ~ImageFlagBits::CpuModified; - image.flags &= ~ImageFlagBits::GpuModified; - - ImageView& depth_buffer = slot_image_views[depth_buffer_id]; - runtime.InvalidateDepthBuffer(depth_buffer); -} - -template -typename P::ImageView* TextureCache

::TryFindFramebufferImageView(VAddr cpu_addr) { - // TODO: Properly implement this - const auto it = page_table.find(cpu_addr >> PAGE_BITS); - if (it == page_table.end()) { - return nullptr; - } - const auto& image_map_ids = it->second; - for (const ImageMapId map_id : image_map_ids) { - const ImageMapView& map = slot_map_views[map_id]; - const ImageBase& image = slot_images[map.image_id]; - if (image.cpu_addr != cpu_addr) { - continue; - } - if (image.image_view_ids.empty()) { - continue; - } - return &slot_image_views[image.image_view_ids.at(0)]; - } - return nullptr; -} - -template -bool TextureCache

::HasUncommittedFlushes() const noexcept { - return !uncommitted_downloads.empty(); -} - -template -bool TextureCache

::ShouldWaitAsyncFlushes() const noexcept { - return !committed_downloads.empty() && !committed_downloads.front().empty(); -} - -template -void TextureCache

::CommitAsyncFlushes() { - // This is intentionally passing the value by copy - committed_downloads.push(uncommitted_downloads); - uncommitted_downloads.clear(); -} - -template -void TextureCache

::PopAsyncFlushes() { - if (committed_downloads.empty()) { - return; - } - const std::span download_ids = committed_downloads.front(); - if (download_ids.empty()) { - committed_downloads.pop(); - return; - } - size_t total_size_bytes = 0; - for (const ImageId image_id : download_ids) { - total_size_bytes += slot_images[image_id].unswizzled_size_bytes; - } - auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); - const size_t original_offset = download_map.offset; - for (const ImageId image_id : download_ids) { - Image& image = slot_images[image_id]; - const auto copies = FullDownloadCopies(image.info); - image.DownloadMemory(download_map, copies); - download_map.offset += image.unswizzled_size_bytes; - } - // Wait for downloads to finish - runtime.Finish(); - - download_map.offset = original_offset; - std::span download_span = download_map.mapped_span; - for (const ImageId image_id : download_ids) { - const ImageBase& image = slot_images[image_id]; - const auto copies = FullDownloadCopies(image.info); - SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, download_span); - download_map.offset += image.unswizzled_size_bytes; - download_span = download_span.subspan(image.unswizzled_size_bytes); - } - committed_downloads.pop(); -} - -template -bool TextureCache

::IsRegionGpuModified(VAddr addr, size_t size) { - bool is_modified = false; - ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) { - if (False(image.flags & ImageFlagBits::GpuModified)) { - return false; - } - is_modified = true; - return true; - }); - return is_modified; -} - -template -void TextureCache

::RefreshContents(Image& image, ImageId image_id) { - if (False(image.flags & ImageFlagBits::CpuModified)) { - // Only upload modified images - return; - } - image.flags &= ~ImageFlagBits::CpuModified; - TrackImage(image, image_id); - - if (image.info.num_samples > 1) { - LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); - return; - } - auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image)); - UploadImageContents(image, staging); - runtime.InsertUploadMemoryBarrier(); -} - -template -template -void TextureCache

::UploadImageContents(Image& image, StagingBuffer& staging) { - const std::span mapped_span = staging.mapped_span; - const GPUVAddr gpu_addr = image.gpu_addr; - - if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { - gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); - const auto uploads = FullUploadSwizzles(image.info); - runtime.AccelerateImageUpload(image, staging, uploads); - } else if (True(image.flags & ImageFlagBits::Converted)) { - std::vector unswizzled_data(image.unswizzled_size_bytes); - auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); - ConvertImage(unswizzled_data, image.info, mapped_span, copies); - image.UploadMemory(staging, copies); - } else { - const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); - image.UploadMemory(staging, copies); - } -} - -template -ImageViewId TextureCache

::FindImageView(const TICEntry& config) { - if (!IsValidEntry(gpu_memory, config)) { - return NULL_IMAGE_VIEW_ID; - } - const auto [pair, is_new] = image_views.try_emplace(config); - ImageViewId& image_view_id = pair->second; - if (is_new) { - image_view_id = CreateImageView(config); - } - return image_view_id; -} - -template -ImageViewId TextureCache

::CreateImageView(const TICEntry& config) { - const ImageInfo info(config); - if (info.type == ImageType::Buffer) { - const ImageViewInfo view_info(config, 0); - return slot_image_views.insert(runtime, info, view_info, config.Address()); - } - const u32 layer_offset = config.BaseLayer() * info.layer_stride; - const GPUVAddr image_gpu_addr = config.Address() - layer_offset; - const ImageId image_id = FindOrInsertImage(info, image_gpu_addr); - if (!image_id) { - return NULL_IMAGE_VIEW_ID; - } - ImageBase& image = slot_images[image_id]; - const SubresourceBase base = image.TryFindBase(config.Address()).value(); - ASSERT(base.level == 0); - const ImageViewInfo view_info(config, base.layer); - const ImageViewId image_view_id = FindOrEmplaceImageView(image_id, view_info); - ImageViewBase& image_view = slot_image_views[image_view_id]; - image_view.flags |= ImageViewFlagBits::Strong; - image.flags |= ImageFlagBits::Strong; - return image_view_id; -} - -template -ImageId TextureCache

::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr, - RelaxedOptions options) { - if (const ImageId image_id = FindImage(info, gpu_addr, options); image_id) { - return image_id; - } - return InsertImage(info, gpu_addr, options); -} - -template -ImageId TextureCache

::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, - RelaxedOptions options) { - std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - if (!cpu_addr) { - cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); - if (!cpu_addr) { - return ImageId{}; - } - } - const bool broken_views = runtime.HasBrokenTextureViewFormats(); - const bool native_bgr = runtime.HasNativeBgr(); - ImageId image_id; - const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { - if (True(existing_image.flags & ImageFlagBits::Remapped)) { - return false; - } - if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) { - const bool strict_size = False(options & RelaxedOptions::Size) && - True(existing_image.flags & ImageFlagBits::Strong); - const ImageInfo& existing = existing_image.info; - if (existing_image.gpu_addr == gpu_addr && existing.type == info.type && - existing.pitch == info.pitch && - IsPitchLinearSameSize(existing, info, strict_size) && - IsViewCompatible(existing.format, info.format, broken_views, native_bgr)) { - image_id = existing_image_id; - return true; - } - } else if (IsSubresource(info, existing_image, gpu_addr, options, broken_views, - native_bgr)) { - image_id = existing_image_id; - return true; - } - return false; - }; - ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda); - return image_id; -} - -template -ImageId TextureCache

::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, - RelaxedOptions options) { - std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - if (!cpu_addr) { - const auto size = CalculateGuestSizeInBytes(info); - cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size); - if (!cpu_addr) { - const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space; - virtual_invalid_space += Common::AlignUp(size, 32); - cpu_addr = std::optional(fake_addr); - } - } - ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); - const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr); - const Image& image = slot_images[image_id]; - // Using "image.gpu_addr" instead of "gpu_addr" is important because it might be different - const auto [it, is_new] = image_allocs_table.try_emplace(image.gpu_addr); - if (is_new) { - it->second = slot_image_allocs.insert(); - } - slot_image_allocs[it->second].images.push_back(image_id); - return image_id; -} - -template -ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr) { - ImageInfo new_info = info; - const size_t size_bytes = CalculateGuestSizeInBytes(new_info); - const bool broken_views = runtime.HasBrokenTextureViewFormats(); - const bool native_bgr = runtime.HasNativeBgr(); - std::vector overlap_ids; - std::unordered_set overlaps_found; - std::vector left_aliased_ids; - std::vector right_aliased_ids; - std::unordered_set ignore_textures; - std::vector bad_overlap_ids; - const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) { - if (True(overlap.flags & ImageFlagBits::Remapped)) { - ignore_textures.insert(overlap_id); - return; - } - if (info.type == ImageType::Linear) { - if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) { - // Alias linear images with the same pitch - left_aliased_ids.push_back(overlap_id); - } - return; - } - overlaps_found.insert(overlap_id); - static constexpr bool strict_size = true; - const std::optional solution = ResolveOverlap( - new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr); - if (solution) { - gpu_addr = solution->gpu_addr; - cpu_addr = solution->cpu_addr; - new_info.resources = solution->resources; - overlap_ids.push_back(overlap_id); - return; - } - static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format; - const ImageBase new_image_base(new_info, gpu_addr, cpu_addr); - if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) { - left_aliased_ids.push_back(overlap_id); - overlap.flags |= ImageFlagBits::Alias; - } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options, - broken_views, native_bgr)) { - right_aliased_ids.push_back(overlap_id); - overlap.flags |= ImageFlagBits::Alias; - } else { - bad_overlap_ids.push_back(overlap_id); - overlap.flags |= ImageFlagBits::BadOverlap; - } - }; - ForEachImageInRegion(cpu_addr, size_bytes, region_check); - const auto region_check_gpu = [&](ImageId overlap_id, ImageBase& overlap) { - if (!overlaps_found.contains(overlap_id)) { - if (True(overlap.flags & ImageFlagBits::Remapped)) { - ignore_textures.insert(overlap_id); - } - if (overlap.gpu_addr == gpu_addr && overlap.guest_size_bytes == size_bytes) { - ignore_textures.insert(overlap_id); - } - } - }; - ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu); - const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); - Image& new_image = slot_images[new_image_id]; - - if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) { - new_image.flags |= ImageFlagBits::Sparse; - } - - for (const ImageId overlap_id : ignore_textures) { - Image& overlap = slot_images[overlap_id]; - if (True(overlap.flags & ImageFlagBits::GpuModified)) { - UNIMPLEMENTED(); - } - if (True(overlap.flags & ImageFlagBits::Tracked)) { - UntrackImage(overlap, overlap_id); - } - UnregisterImage(overlap_id); - DeleteImage(overlap_id); - } - - // TODO: Only upload what we need - RefreshContents(new_image, new_image_id); - - for (const ImageId overlap_id : overlap_ids) { - Image& overlap = slot_images[overlap_id]; - if (overlap.info.num_samples != new_image.info.num_samples) { - LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented"); - } else { - const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value(); - const auto copies = MakeShrinkImageCopies(new_info, overlap.info, base); - runtime.CopyImage(new_image, overlap, copies); - } - if (True(overlap.flags & ImageFlagBits::Tracked)) { - UntrackImage(overlap, overlap_id); - } - UnregisterImage(overlap_id); - DeleteImage(overlap_id); - } - ImageBase& new_image_base = new_image; - for (const ImageId aliased_id : right_aliased_ids) { - ImageBase& aliased = slot_images[aliased_id]; - AddImageAlias(new_image_base, aliased, new_image_id, aliased_id); - new_image.flags |= ImageFlagBits::Alias; - } - for (const ImageId aliased_id : left_aliased_ids) { - ImageBase& aliased = slot_images[aliased_id]; - AddImageAlias(aliased, new_image_base, aliased_id, new_image_id); - new_image.flags |= ImageFlagBits::Alias; - } - for (const ImageId aliased_id : bad_overlap_ids) { - ImageBase& aliased = slot_images[aliased_id]; - aliased.overlapping_images.push_back(new_image_id); - new_image.overlapping_images.push_back(aliased_id); - new_image.flags |= ImageFlagBits::BadOverlap; - } - RegisterImage(new_image_id); - return new_image_id; -} - -template -typename TextureCache

::BlitImages TextureCache

::GetBlitImages( - const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src) { - static constexpr auto FIND_OPTIONS = RelaxedOptions::Format | RelaxedOptions::Samples; - const GPUVAddr dst_addr = dst.Address(); - const GPUVAddr src_addr = src.Address(); - ImageInfo dst_info(dst); - ImageInfo src_info(src); - ImageId dst_id; - ImageId src_id; - do { - has_deleted_images = false; - dst_id = FindImage(dst_info, dst_addr, FIND_OPTIONS); - src_id = FindImage(src_info, src_addr, FIND_OPTIONS); - const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr; - const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr; - DeduceBlitImages(dst_info, src_info, dst_image, src_image); - if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) { - continue; - } - if (!dst_id) { - dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{}); - } - if (!src_id) { - src_id = InsertImage(src_info, src_addr, RelaxedOptions{}); - } - } while (has_deleted_images); - return BlitImages{ - .dst_id = dst_id, - .src_id = src_id, - .dst_format = dst_info.format, - .src_format = src_info.format, - }; -} - -template -SamplerId TextureCache

::FindSampler(const TSCEntry& config) { - if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) { - return NULL_SAMPLER_ID; - } - const auto [pair, is_new] = samplers.try_emplace(config); - if (is_new) { - pair->second = slot_samplers.insert(runtime, config); - } - return pair->second; -} - -template -ImageViewId TextureCache

::FindColorBuffer(size_t index, bool is_clear) { - const auto& regs = maxwell3d.regs; - if (index >= regs.rt_control.count) { - return ImageViewId{}; - } - const auto& rt = regs.rt[index]; - const GPUVAddr gpu_addr = rt.Address(); - if (gpu_addr == 0) { - return ImageViewId{}; - } - if (rt.format == Tegra::RenderTargetFormat::NONE) { - return ImageViewId{}; - } - const ImageInfo info(regs, index); - return FindRenderTargetView(info, gpu_addr, is_clear); -} - -template -ImageViewId TextureCache

::FindDepthBuffer(bool is_clear) { - const auto& regs = maxwell3d.regs; - if (!regs.zeta_enable) { - return ImageViewId{}; - } - const GPUVAddr gpu_addr = regs.zeta.Address(); - if (gpu_addr == 0) { - return ImageViewId{}; - } - const ImageInfo info(regs); - return FindRenderTargetView(info, gpu_addr, is_clear); -} - -template -ImageViewId TextureCache

::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, - bool is_clear) { - const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{}; - const ImageId image_id = FindOrInsertImage(info, gpu_addr, options); - if (!image_id) { - return NULL_IMAGE_VIEW_ID; - } - Image& image = slot_images[image_id]; - const ImageViewType view_type = RenderTargetImageViewType(info); - SubresourceBase base; - if (image.info.type == ImageType::Linear) { - base = SubresourceBase{.level = 0, .layer = 0}; - } else { - base = image.TryFindBase(gpu_addr).value(); - } - const s32 layers = image.info.type == ImageType::e3D ? info.size.depth : info.resources.layers; - const SubresourceRange range{ - .base = base, - .extent = {.levels = 1, .layers = layers}, - }; - return FindOrEmplaceImageView(image_id, ImageViewInfo(view_type, info.format, range)); -} - -template -template -void TextureCache

::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) { - using FuncReturn = typename std::invoke_result::type; - static constexpr bool BOOL_BREAK = std::is_same_v; - boost::container::small_vector images; - boost::container::small_vector maps; - ForEachCPUPage(cpu_addr, size, [this, &images, &maps, cpu_addr, size, func](u64 page) { - const auto it = page_table.find(page); - if (it == page_table.end()) { - if constexpr (BOOL_BREAK) { - return false; - } else { - return; - } - } - for (const ImageMapId map_id : it->second) { - ImageMapView& map = slot_map_views[map_id]; - if (map.picked) { - continue; - } - if (!map.Overlaps(cpu_addr, size)) { - continue; - } - map.picked = true; - maps.push_back(map_id); - Image& image = slot_images[map.image_id]; - if (True(image.flags & ImageFlagBits::Picked)) { - continue; - } - image.flags |= ImageFlagBits::Picked; - images.push_back(map.image_id); - if constexpr (BOOL_BREAK) { - if (func(map.image_id, image)) { - return true; - } - } else { - func(map.image_id, image); - } - } - if constexpr (BOOL_BREAK) { - return false; - } - }); - for (const ImageId image_id : images) { - slot_images[image_id].flags &= ~ImageFlagBits::Picked; - } - for (const ImageMapId map_id : maps) { - slot_map_views[map_id].picked = false; - } -} - -template -template -void TextureCache

::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func) { - using FuncReturn = typename std::invoke_result::type; - static constexpr bool BOOL_BREAK = std::is_same_v; - boost::container::small_vector images; - ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { - const auto it = gpu_page_table.find(page); - if (it == gpu_page_table.end()) { - if constexpr (BOOL_BREAK) { - return false; - } else { - return; - } - } - for (const ImageId image_id : it->second) { - Image& image = slot_images[image_id]; - if (True(image.flags & ImageFlagBits::Picked)) { - continue; - } - if (!image.OverlapsGPU(gpu_addr, size)) { - continue; - } - image.flags |= ImageFlagBits::Picked; - images.push_back(image_id); - if constexpr (BOOL_BREAK) { - if (func(image_id, image)) { - return true; - } - } else { - func(image_id, image); - } - } - if constexpr (BOOL_BREAK) { - return false; - } - }); - for (const ImageId image_id : images) { - slot_images[image_id].flags &= ~ImageFlagBits::Picked; - } -} - -template -template -void TextureCache

::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func) { - using FuncReturn = typename std::invoke_result::type; - static constexpr bool BOOL_BREAK = std::is_same_v; - boost::container::small_vector images; - ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { - const auto it = sparse_page_table.find(page); - if (it == sparse_page_table.end()) { - if constexpr (BOOL_BREAK) { - return false; - } else { - return; - } - } - for (const ImageId image_id : it->second) { - Image& image = slot_images[image_id]; - if (True(image.flags & ImageFlagBits::Picked)) { - continue; - } - if (!image.OverlapsGPU(gpu_addr, size)) { - continue; - } - image.flags |= ImageFlagBits::Picked; - images.push_back(image_id); - if constexpr (BOOL_BREAK) { - if (func(image_id, image)) { - return true; - } - } else { - func(image_id, image); - } - } - if constexpr (BOOL_BREAK) { - return false; - } - }); - for (const ImageId image_id : images) { - slot_images[image_id].flags &= ~ImageFlagBits::Picked; - } -} - -template -template -void TextureCache

::ForEachSparseSegment(ImageBase& image, Func&& func) { - using FuncReturn = typename std::invoke_result::type; - static constexpr bool RETURNS_BOOL = std::is_same_v; - const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes); - for (auto& segment : segments) { - const auto gpu_addr = segment.first; - const auto size = segment.second; - std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - ASSERT(cpu_addr); - if constexpr (RETURNS_BOOL) { - if (func(gpu_addr, *cpu_addr, size)) { - break; - } - } else { - func(gpu_addr, *cpu_addr, size); - } - } -} - -template -ImageViewId TextureCache

::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) { - Image& image = slot_images[image_id]; - if (const ImageViewId image_view_id = image.FindView(info); image_view_id) { - return image_view_id; - } - const ImageViewId image_view_id = slot_image_views.insert(runtime, info, image_id, image); - image.InsertView(info, image_view_id); - return image_view_id; -} - -template -void TextureCache

::RegisterImage(ImageId image_id) { - ImageBase& image = slot_images[image_id]; - ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), - "Trying to register an already registered image"); - image.flags |= ImageFlagBits::Registered; - u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); - if ((IsPixelFormatASTC(image.info.format) && - True(image.flags & ImageFlagBits::AcceleratedUpload)) || - True(image.flags & ImageFlagBits::Converted)) { - tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); - } - total_used_memory += Common::AlignUp(tentative_size, 1024); - ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, - [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); }); - if (False(image.flags & ImageFlagBits::Sparse)) { - auto map_id = - slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id); - ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, - [this, map_id](u64 page) { page_table[page].push_back(map_id); }); - image.map_view_id = map_id; - return; - } - std::vector sparse_maps{}; - ForEachSparseSegment( - image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { - auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); - ForEachCPUPage(cpu_addr, size, - [this, map_id](u64 page) { page_table[page].push_back(map_id); }); - sparse_maps.push_back(map_id); - }); - sparse_views.emplace(image_id, std::move(sparse_maps)); - ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, - [this, image_id](u64 page) { sparse_page_table[page].push_back(image_id); }); -} - -template -void TextureCache

::UnregisterImage(ImageId image_id) { - Image& image = slot_images[image_id]; - ASSERT_MSG(True(image.flags & ImageFlagBits::Registered), - "Trying to unregister an already registered image"); - image.flags &= ~ImageFlagBits::Registered; - image.flags &= ~ImageFlagBits::BadOverlap; - u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); - if ((IsPixelFormatASTC(image.info.format) && - True(image.flags & ImageFlagBits::AcceleratedUpload)) || - True(image.flags & ImageFlagBits::Converted)) { - tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); - } - total_used_memory -= Common::AlignUp(tentative_size, 1024); - const auto& clear_page_table = - [this, image_id]( - u64 page, - std::unordered_map, IdentityHash>& selected_page_table) { - const auto page_it = selected_page_table.find(page); - if (page_it == selected_page_table.end()) { - UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); - return; - } - std::vector& image_ids = page_it->second; - const auto vector_it = std::ranges::find(image_ids, image_id); - if (vector_it == image_ids.end()) { - UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", - page << PAGE_BITS); - return; - } - image_ids.erase(vector_it); - }; - ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, - [this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); }); - if (False(image.flags & ImageFlagBits::Sparse)) { - const auto map_id = image.map_view_id; - ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) { - const auto page_it = page_table.find(page); - if (page_it == page_table.end()) { - UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); - return; - } - std::vector& image_map_ids = page_it->second; - const auto vector_it = std::ranges::find(image_map_ids, map_id); - if (vector_it == image_map_ids.end()) { - UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", - page << PAGE_BITS); - return; - } - image_map_ids.erase(vector_it); - }); - slot_map_views.erase(map_id); - return; - } - ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) { - clear_page_table(page, sparse_page_table); - }); - auto it = sparse_views.find(image_id); - ASSERT(it != sparse_views.end()); - auto& sparse_maps = it->second; - for (auto& map_view_id : sparse_maps) { - const auto& map_range = slot_map_views[map_view_id]; - const VAddr cpu_addr = map_range.cpu_addr; - const std::size_t size = map_range.size; - ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) { - const auto page_it = page_table.find(page); - if (page_it == page_table.end()) { - UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); - return; - } - std::vector& image_map_ids = page_it->second; - auto vector_it = image_map_ids.begin(); - while (vector_it != image_map_ids.end()) { - ImageMapView& map = slot_map_views[*vector_it]; - if (map.image_id != image_id) { - vector_it++; - continue; - } - if (!map.picked) { - map.picked = true; - } - vector_it = image_map_ids.erase(vector_it); - } - }); - slot_map_views.erase(map_view_id); - } - sparse_views.erase(it); -} - -template -void TextureCache

::TrackImage(ImageBase& image, ImageId image_id) { - ASSERT(False(image.flags & ImageFlagBits::Tracked)); - image.flags |= ImageFlagBits::Tracked; - if (False(image.flags & ImageFlagBits::Sparse)) { - rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); - return; - } - if (True(image.flags & ImageFlagBits::Registered)) { - auto it = sparse_views.find(image_id); - ASSERT(it != sparse_views.end()); - auto& sparse_maps = it->second; - for (auto& map_view_id : sparse_maps) { - const auto& map = slot_map_views[map_view_id]; - const VAddr cpu_addr = map.cpu_addr; - const std::size_t size = map.size; - rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); - } - return; - } - ForEachSparseSegment(image, - [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { - rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); - }); -} - -template -void TextureCache

::UntrackImage(ImageBase& image, ImageId image_id) { - ASSERT(True(image.flags & ImageFlagBits::Tracked)); - image.flags &= ~ImageFlagBits::Tracked; - if (False(image.flags & ImageFlagBits::Sparse)) { - rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); - return; - } - ASSERT(True(image.flags & ImageFlagBits::Registered)); - auto it = sparse_views.find(image_id); - ASSERT(it != sparse_views.end()); - auto& sparse_maps = it->second; - for (auto& map_view_id : sparse_maps) { - const auto& map = slot_map_views[map_view_id]; - const VAddr cpu_addr = map.cpu_addr; - const std::size_t size = map.size; - rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); - } -} - -template -void TextureCache

::DeleteImage(ImageId image_id) { - ImageBase& image = slot_images[image_id]; - const GPUVAddr gpu_addr = image.gpu_addr; - const auto alloc_it = image_allocs_table.find(gpu_addr); - if (alloc_it == image_allocs_table.end()) { - UNREACHABLE_MSG("Trying to delete an image alloc that does not exist in address 0x{:x}", - gpu_addr); - return; - } - const ImageAllocId alloc_id = alloc_it->second; - std::vector& alloc_images = slot_image_allocs[alloc_id].images; - const auto alloc_image_it = std::ranges::find(alloc_images, image_id); - if (alloc_image_it == alloc_images.end()) { - UNREACHABLE_MSG("Trying to delete an image that does not exist"); - return; - } - ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked"); - ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered"); - - // Mark render targets as dirty - auto& dirty = maxwell3d.dirty.flags; - dirty[Dirty::RenderTargets] = true; - dirty[Dirty::ZetaBuffer] = true; - for (size_t rt = 0; rt < NUM_RT; ++rt) { - dirty[Dirty::ColorBuffer0 + rt] = true; - } - const std::span image_view_ids = image.image_view_ids; - for (const ImageViewId image_view_id : image_view_ids) { - std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{}); - if (render_targets.depth_buffer_id == image_view_id) { - render_targets.depth_buffer_id = ImageViewId{}; - } - } - RemoveImageViewReferences(image_view_ids); - RemoveFramebuffers(image_view_ids); - - for (const AliasedImage& alias : image.aliased_images) { - ImageBase& other_image = slot_images[alias.id]; - [[maybe_unused]] const size_t num_removed_aliases = - std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) { - return other_alias.id == image_id; - }); - other_image.CheckAliasState(); - ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}", - num_removed_aliases); - } - for (const ImageId overlap_id : image.overlapping_images) { - ImageBase& other_image = slot_images[overlap_id]; - [[maybe_unused]] const size_t num_removed_overlaps = std::erase_if( - other_image.overlapping_images, - [image_id](const ImageId other_overlap_id) { return other_overlap_id == image_id; }); - other_image.CheckBadOverlapState(); - ASSERT_MSG(num_removed_overlaps == 1, "Invalid number of removed overlapps: {}", - num_removed_overlaps); - } - for (const ImageViewId image_view_id : image_view_ids) { - sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); - slot_image_views.erase(image_view_id); - } - sentenced_images.Push(std::move(slot_images[image_id])); - slot_images.erase(image_id); - - alloc_images.erase(alloc_image_it); - if (alloc_images.empty()) { - image_allocs_table.erase(alloc_it); - } - if constexpr (ENABLE_VALIDATION) { - std::ranges::fill(graphics_image_view_ids, CORRUPT_ID); - std::ranges::fill(compute_image_view_ids, CORRUPT_ID); - } - graphics_image_table.Invalidate(); - compute_image_table.Invalidate(); - has_deleted_images = true; -} - -template -void TextureCache

::RemoveImageViewReferences(std::span removed_views) { - auto it = image_views.begin(); - while (it != image_views.end()) { - const auto found = std::ranges::find(removed_views, it->second); - if (found != removed_views.end()) { - it = image_views.erase(it); - } else { - ++it; - } - } -} - -template -void TextureCache

::RemoveFramebuffers(std::span removed_views) { - auto it = framebuffers.begin(); - while (it != framebuffers.end()) { - if (it->first.Contains(removed_views)) { - it = framebuffers.erase(it); - } else { - ++it; - } - } -} - -template -void TextureCache

::MarkModification(ImageBase& image) noexcept { - image.flags |= ImageFlagBits::GpuModified; - image.modification_tick = ++modification_tick; -} - -template -void TextureCache

::SynchronizeAliases(ImageId image_id) { - boost::container::small_vector aliased_images; - ImageBase& image = slot_images[image_id]; - u64 most_recent_tick = image.modification_tick; - for (const AliasedImage& aliased : image.aliased_images) { - ImageBase& aliased_image = slot_images[aliased.id]; - if (image.modification_tick < aliased_image.modification_tick) { - most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); - aliased_images.push_back(&aliased); - } - } - if (aliased_images.empty()) { - return; - } - image.modification_tick = most_recent_tick; - std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) { - const ImageBase& lhs_image = slot_images[lhs->id]; - const ImageBase& rhs_image = slot_images[rhs->id]; - return lhs_image.modification_tick < rhs_image.modification_tick; - }); - for (const AliasedImage* const aliased : aliased_images) { - CopyImage(image_id, aliased->id, aliased->copies); - } -} - -template -void TextureCache

::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) { - Image& image = slot_images[image_id]; - if (invalidate) { - image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified); - if (False(image.flags & ImageFlagBits::Tracked)) { - TrackImage(image, image_id); - } - } else { - RefreshContents(image, image_id); - SynchronizeAliases(image_id); - } - if (is_modification) { - MarkModification(image); - } - image.frame_tick = frame_tick; -} - -template -void TextureCache

::PrepareImageView(ImageViewId image_view_id, bool is_modification, - bool invalidate) { - if (!image_view_id) { - return; - } - const ImageViewBase& image_view = slot_image_views[image_view_id]; - if (image_view.IsBuffer()) { - return; - } - PrepareImage(image_view.image_id, is_modification, invalidate); -} - -template -void TextureCache

::CopyImage(ImageId dst_id, ImageId src_id, std::span copies) { - Image& dst = slot_images[dst_id]; - Image& src = slot_images[src_id]; - const auto dst_format_type = GetFormatType(dst.info.format); - const auto src_format_type = GetFormatType(src.info.format); - if (src_format_type == dst_format_type) { - if constexpr (HAS_EMULATED_COPIES) { - if (!runtime.CanImageBeCopied(dst, src)) { - return runtime.EmulateCopyImage(dst, src, copies); - } - } - return runtime.CopyImage(dst, src, copies); - } - UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D); - UNIMPLEMENTED_IF(src.info.type != ImageType::e2D); - for (const ImageCopy& copy : copies) { - UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1); - UNIMPLEMENTED_IF(copy.src_subresource.num_layers != 1); - UNIMPLEMENTED_IF(copy.src_offset != Offset3D{}); - UNIMPLEMENTED_IF(copy.dst_offset != Offset3D{}); - - const SubresourceBase dst_base{ - .level = copy.dst_subresource.base_level, - .layer = copy.dst_subresource.base_layer, - }; - const SubresourceBase src_base{ - .level = copy.src_subresource.base_level, - .layer = copy.src_subresource.base_layer, - }; - const SubresourceExtent dst_extent{.levels = 1, .layers = 1}; - const SubresourceExtent src_extent{.levels = 1, .layers = 1}; - const SubresourceRange dst_range{.base = dst_base, .extent = dst_extent}; - const SubresourceRange src_range{.base = src_base, .extent = src_extent}; - const ImageViewInfo dst_view_info(ImageViewType::e2D, dst.info.format, dst_range); - const ImageViewInfo src_view_info(ImageViewType::e2D, src.info.format, src_range); - const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); - Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; - const ImageViewId src_view_id = FindOrEmplaceImageView(src_id, src_view_info); - ImageView& dst_view = slot_image_views[dst_view_id]; - ImageView& src_view = slot_image_views[src_view_id]; - [[maybe_unused]] const Extent3D expected_size{ - .width = std::min(dst_view.size.width, src_view.size.width), - .height = std::min(dst_view.size.height, src_view.size.height), - .depth = std::min(dst_view.size.depth, src_view.size.depth), - }; - UNIMPLEMENTED_IF(copy.extent != expected_size); - - runtime.ConvertImage(dst_framebuffer, dst_view, src_view); - } -} - -template -void TextureCache

::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id) { - if (*old_id == new_id) { - return; - } - if (*old_id) { - const ImageViewBase& old_view = slot_image_views[*old_id]; - if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) { - uncommitted_downloads.push_back(old_view.image_id); - } - } - *old_id = new_id; -} - -template -std::pair TextureCache

::RenderTargetFromImage( - ImageId image_id, const ImageViewInfo& view_info) { - const ImageViewId view_id = FindOrEmplaceImageView(image_id, view_info); - const ImageBase& image = slot_images[image_id]; - const bool is_color = GetFormatType(image.info.format) == SurfaceType::ColorTexture; - const ImageViewId color_view_id = is_color ? view_id : ImageViewId{}; - const ImageViewId depth_view_id = is_color ? ImageViewId{} : view_id; - const Extent3D extent = MipSize(image.info.size, view_info.range.base.level); - const u32 num_samples = image.info.num_samples; - const auto [samples_x, samples_y] = SamplesLog2(num_samples); - const FramebufferId framebuffer_id = GetFramebufferId(RenderTargets{ - .color_buffer_ids = {color_view_id}, - .depth_buffer_id = depth_view_id, - .size = {extent.width >> samples_x, extent.height >> samples_y}, - }); - return {framebuffer_id, view_id}; -} - -template -bool TextureCache

::IsFullClear(ImageViewId id) { - if (!id) { - return true; - } - const ImageViewBase& image_view = slot_image_views[id]; - const ImageBase& image = slot_images[image_view.image_id]; - const Extent3D size = image_view.size; - const auto& regs = maxwell3d.regs; - const auto& scissor = regs.scissor_test[0]; - if (image.info.resources.levels > 1 || image.info.resources.layers > 1) { - // Images with multiple resources can't be cleared in a single call - return false; - } - if (regs.clear_flags.scissor == 0) { - // If scissor testing is disabled, the clear is always full - return true; - } - // Make sure the clear covers all texels in the subresource - return scissor.min_x == 0 && scissor.min_y == 0 && scissor.max_x >= size.width && - scissor.max_y >= size.height; -} - } // namespace VideoCommon diff --git a/src/video_core/texture_cache/texture_cache_templates.h b/src/video_core/texture_cache/texture_cache_templates.h new file mode 100644 index 000000000..8440d23d1 --- /dev/null +++ b/src/video_core/texture_cache/texture_cache_templates.h @@ -0,0 +1,1507 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "video_core/texture_cache/texture_cache.h" + +namespace VideoCommon { + +using Tegra::Texture::SwizzleSource; +using Tegra::Texture::TextureType; +using Tegra::Texture::TICEntry; +using Tegra::Texture::TSCEntry; +using VideoCore::Surface::GetFormatType; +using VideoCore::Surface::IsCopyCompatible; +using VideoCore::Surface::PixelFormat; +using VideoCore::Surface::PixelFormatFromDepthFormat; +using VideoCore::Surface::PixelFormatFromRenderTargetFormat; +using VideoCore::Surface::SurfaceType; +using namespace Common::Literals; + +template +TextureCache

::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_, + Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_) + : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, + kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} { + // Configure null sampler + TSCEntry sampler_descriptor{}; + sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear); + sampler_descriptor.mag_filter.Assign(Tegra::Texture::TextureFilter::Linear); + sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear); + sampler_descriptor.cubemap_anisotropy.Assign(1); + + // Make sure the first index is reserved for the null resources + // This way the null resource becomes a compile time constant + void(slot_image_views.insert(runtime, NullImageParams{})); + void(slot_samplers.insert(runtime, sampler_descriptor)); + + deletion_iterator = slot_images.begin(); + + if constexpr (HAS_DEVICE_MEMORY_INFO) { + const auto device_memory = runtime.GetDeviceLocalMemory(); + const u64 possible_expected_memory = (device_memory * 3) / 10; + const u64 possible_critical_memory = (device_memory * 6) / 10; + expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY); + critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY); + minimum_memory = 0; + } else { + // on OGL we can be more conservatives as the driver takes care. + expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; + critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; + minimum_memory = expected_memory; + } +} + +template +void TextureCache

::RunGarbageCollector() { + const bool high_priority_mode = total_used_memory >= expected_memory; + const bool aggressive_mode = total_used_memory >= critical_memory; + const u64 ticks_to_destroy = high_priority_mode ? 60 : 100; + int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64); + for (; num_iterations > 0; --num_iterations) { + if (deletion_iterator == slot_images.end()) { + deletion_iterator = slot_images.begin(); + if (deletion_iterator == slot_images.end()) { + break; + } + } + auto [image_id, image_tmp] = *deletion_iterator; + Image* image = image_tmp; // fix clang error. + const bool is_alias = True(image->flags & ImageFlagBits::Alias); + const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap); + const bool must_download = image->IsSafeDownload(); + bool should_care = is_bad_overlap || is_alias || (high_priority_mode && !must_download); + const u64 ticks_needed = + is_bad_overlap + ? ticks_to_destroy >> 4 + : ((should_care && aggressive_mode) ? ticks_to_destroy >> 1 : ticks_to_destroy); + should_care |= aggressive_mode; + if (should_care && image->frame_tick + ticks_needed < frame_tick) { + if (is_bad_overlap) { + const bool overlap_check = std::ranges::all_of( + image->overlapping_images, [&, image](const ImageId& overlap_id) { + auto& overlap = slot_images[overlap_id]; + return overlap.frame_tick >= image->frame_tick; + }); + if (!overlap_check) { + ++deletion_iterator; + continue; + } + } + if (!is_bad_overlap && must_download) { + const bool alias_check = std::ranges::none_of( + image->aliased_images, [&, image](const AliasedImage& alias) { + auto& alias_image = slot_images[alias.id]; + return (alias_image.frame_tick < image->frame_tick) || + (alias_image.modification_tick < image->modification_tick); + }); + + if (alias_check) { + auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes); + const auto copies = FullDownloadCopies(image->info); + image->DownloadMemory(map, copies); + runtime.Finish(); + SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span); + } + } + if (True(image->flags & ImageFlagBits::Tracked)) { + UntrackImage(*image, image_id); + } + UnregisterImage(image_id); + DeleteImage(image_id); + if (is_bad_overlap) { + ++num_iterations; + } + } + ++deletion_iterator; + } +} + +template +void TextureCache

::TickFrame() { + if (Settings::values.use_caches_gc.GetValue() && total_used_memory > minimum_memory) { + RunGarbageCollector(); + } + sentenced_images.Tick(); + sentenced_framebuffers.Tick(); + sentenced_image_view.Tick(); + ++frame_tick; +} + +template +const typename P::ImageView& TextureCache

::GetImageView(ImageViewId id) const noexcept { + return slot_image_views[id]; +} + +template +typename P::ImageView& TextureCache

::GetImageView(ImageViewId id) noexcept { + return slot_image_views[id]; +} + +template +void TextureCache

::MarkModification(ImageId id) noexcept { + MarkModification(slot_images[id]); +} + +template +void TextureCache

::FillGraphicsImageViews(std::span indices, + std::span image_view_ids) { + FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids); +} + +template +void TextureCache

::FillComputeImageViews(std::span indices, + std::span image_view_ids) { + FillImageViews(compute_image_table, compute_image_view_ids, indices, image_view_ids); +} + +template +typename P::Sampler* TextureCache

::GetGraphicsSampler(u32 index) { + if (index > graphics_sampler_table.Limit()) { + LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); + return &slot_samplers[NULL_SAMPLER_ID]; + } + const auto [descriptor, is_new] = graphics_sampler_table.Read(index); + SamplerId& id = graphics_sampler_ids[index]; + if (is_new) { + id = FindSampler(descriptor); + } + return &slot_samplers[id]; +} + +template +typename P::Sampler* TextureCache

::GetComputeSampler(u32 index) { + if (index > compute_sampler_table.Limit()) { + LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); + return &slot_samplers[NULL_SAMPLER_ID]; + } + const auto [descriptor, is_new] = compute_sampler_table.Read(index); + SamplerId& id = compute_sampler_ids[index]; + if (is_new) { + id = FindSampler(descriptor); + } + return &slot_samplers[id]; +} + +template +void TextureCache

::SynchronizeGraphicsDescriptors() { + using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex; + const bool linked_tsc = maxwell3d.regs.sampler_index == SamplerIndex::ViaHeaderIndex; + const u32 tic_limit = maxwell3d.regs.tic.limit; + const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d.regs.tsc.limit; + if (graphics_sampler_table.Synchornize(maxwell3d.regs.tsc.Address(), tsc_limit)) { + graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); + } + if (graphics_image_table.Synchornize(maxwell3d.regs.tic.Address(), tic_limit)) { + graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); + } +} + +template +void TextureCache

::SynchronizeComputeDescriptors() { + const bool linked_tsc = kepler_compute.launch_description.linked_tsc; + const u32 tic_limit = kepler_compute.regs.tic.limit; + const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute.regs.tsc.limit; + const GPUVAddr tsc_gpu_addr = kepler_compute.regs.tsc.Address(); + if (compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) { + compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); + } + if (compute_image_table.Synchornize(kepler_compute.regs.tic.Address(), tic_limit)) { + compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); + } +} + +template +void TextureCache

::UpdateRenderTargets(bool is_clear) { + using namespace VideoCommon::Dirty; + auto& flags = maxwell3d.dirty.flags; + if (!flags[Dirty::RenderTargets]) { + for (size_t index = 0; index < NUM_RT; ++index) { + ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; + PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id)); + } + const ImageViewId depth_buffer_id = render_targets.depth_buffer_id; + PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); + return; + } + flags[Dirty::RenderTargets] = false; + + // Render target control is used on all render targets, so force look ups when this one is up + const bool force = flags[Dirty::RenderTargetControl]; + flags[Dirty::RenderTargetControl] = false; + + for (size_t index = 0; index < NUM_RT; ++index) { + ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; + if (flags[Dirty::ColorBuffer0 + index] || force) { + flags[Dirty::ColorBuffer0 + index] = false; + BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear)); + } + PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id)); + } + if (flags[Dirty::ZetaBuffer] || force) { + flags[Dirty::ZetaBuffer] = false; + BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear)); + } + const ImageViewId depth_buffer_id = render_targets.depth_buffer_id; + PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); + + for (size_t index = 0; index < NUM_RT; ++index) { + render_targets.draw_buffers[index] = static_cast(maxwell3d.regs.rt_control.Map(index)); + } + render_targets.size = Extent2D{ + maxwell3d.regs.render_area.width, + maxwell3d.regs.render_area.height, + }; +} + +template +typename P::Framebuffer* TextureCache

::GetFramebuffer() { + return &slot_framebuffers[GetFramebufferId(render_targets)]; +} + +template +void TextureCache

::FillImageViews(DescriptorTable& table, + std::span cached_image_view_ids, + std::span indices, + std::span image_view_ids) { + ASSERT(indices.size() <= image_view_ids.size()); + do { + has_deleted_images = false; + std::ranges::transform(indices, image_view_ids.begin(), [&](u32 index) { + return VisitImageView(table, cached_image_view_ids, index); + }); + } while (has_deleted_images); +} + +template +ImageViewId TextureCache

::VisitImageView(DescriptorTable& table, + std::span cached_image_view_ids, + u32 index) { + if (index > table.Limit()) { + LOG_DEBUG(HW_GPU, "Invalid image view index={}", index); + return NULL_IMAGE_VIEW_ID; + } + const auto [descriptor, is_new] = table.Read(index); + ImageViewId& image_view_id = cached_image_view_ids[index]; + if (is_new) { + image_view_id = FindImageView(descriptor); + } + if (image_view_id != NULL_IMAGE_VIEW_ID) { + PrepareImageView(image_view_id, false, false); + } + return image_view_id; +} + +template +FramebufferId TextureCache

::GetFramebufferId(const RenderTargets& key) { + const auto [pair, is_new] = framebuffers.try_emplace(key); + FramebufferId& framebuffer_id = pair->second; + if (!is_new) { + return framebuffer_id; + } + std::array color_buffers; + std::ranges::transform(key.color_buffer_ids, color_buffers.begin(), + [this](ImageViewId id) { return id ? &slot_image_views[id] : nullptr; }); + ImageView* const depth_buffer = + key.depth_buffer_id ? &slot_image_views[key.depth_buffer_id] : nullptr; + framebuffer_id = slot_framebuffers.insert(runtime, color_buffers, depth_buffer, key); + return framebuffer_id; +} + +template +void TextureCache

::WriteMemory(VAddr cpu_addr, size_t size) { + ForEachImageInRegion(cpu_addr, size, [this](ImageId image_id, Image& image) { + if (True(image.flags & ImageFlagBits::CpuModified)) { + return; + } + image.flags |= ImageFlagBits::CpuModified; + if (True(image.flags & ImageFlagBits::Tracked)) { + UntrackImage(image, image_id); + } + }); +} + +template +void TextureCache

::DownloadMemory(VAddr cpu_addr, size_t size) { + std::vector images; + ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) { + if (!image.IsSafeDownload()) { + return; + } + image.flags &= ~ImageFlagBits::GpuModified; + images.push_back(image_id); + }); + if (images.empty()) { + return; + } + std::ranges::sort(images, [this](ImageId lhs, ImageId rhs) { + return slot_images[lhs].modification_tick < slot_images[rhs].modification_tick; + }); + for (const ImageId image_id : images) { + Image& image = slot_images[image_id]; + auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); + const auto copies = FullDownloadCopies(image.info); + image.DownloadMemory(map, copies); + runtime.Finish(); + SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); + } +} + +template +void TextureCache

::UnmapMemory(VAddr cpu_addr, size_t size) { + std::vector deleted_images; + ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); + for (const ImageId id : deleted_images) { + Image& image = slot_images[id]; + if (True(image.flags & ImageFlagBits::Tracked)) { + UntrackImage(image, id); + } + UnregisterImage(id); + DeleteImage(id); + } +} + +template +void TextureCache

::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) { + std::vector deleted_images; + ForEachImageInRegionGPU(gpu_addr, size, + [&](ImageId id, Image&) { deleted_images.push_back(id); }); + for (const ImageId id : deleted_images) { + Image& image = slot_images[id]; + if (True(image.flags & ImageFlagBits::Remapped)) { + continue; + } + image.flags |= ImageFlagBits::Remapped; + if (True(image.flags & ImageFlagBits::Tracked)) { + UntrackImage(image, id); + } + } +} + +template +void TextureCache

::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, + const Tegra::Engines::Fermi2D::Surface& src, + const Tegra::Engines::Fermi2D::Config& copy) { + const BlitImages images = GetBlitImages(dst, src); + const ImageId dst_id = images.dst_id; + const ImageId src_id = images.src_id; + PrepareImage(src_id, false, false); + PrepareImage(dst_id, true, false); + + ImageBase& dst_image = slot_images[dst_id]; + const ImageBase& src_image = slot_images[src_id]; + + // TODO: Deduplicate + const std::optional src_base = src_image.TryFindBase(src.Address()); + const SubresourceRange src_range{.base = src_base.value(), .extent = {1, 1}}; + const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range); + const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info); + const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples); + const Region2D src_region{ + Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y}, + Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y}, + }; + + const std::optional dst_base = dst_image.TryFindBase(dst.Address()); + const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}}; + const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range); + const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); + const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples); + const Region2D dst_region{ + Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y}, + Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y}, + }; + + // Always call this after src_framebuffer_id was queried, as the address might be invalidated. + Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; + if constexpr (FRAMEBUFFER_BLITS) { + // OpenGL blits from framebuffers, not images + Framebuffer* const src_framebuffer = &slot_framebuffers[src_framebuffer_id]; + runtime.BlitFramebuffer(dst_framebuffer, src_framebuffer, dst_region, src_region, + copy.filter, copy.operation); + } else { + // Vulkan can blit images, but it lacks format reinterpretations + // Provide a framebuffer in case it's necessary + ImageView& dst_view = slot_image_views[dst_view_id]; + ImageView& src_view = slot_image_views[src_view_id]; + runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter, + copy.operation); + } +} + +template +typename P::ImageView* TextureCache

::TryFindFramebufferImageView(VAddr cpu_addr) { + // TODO: Properly implement this + const auto it = page_table.find(cpu_addr >> PAGE_BITS); + if (it == page_table.end()) { + return nullptr; + } + const auto& image_map_ids = it->second; + for (const ImageMapId map_id : image_map_ids) { + const ImageMapView& map = slot_map_views[map_id]; + const ImageBase& image = slot_images[map.image_id]; + if (image.cpu_addr != cpu_addr) { + continue; + } + if (image.image_view_ids.empty()) { + continue; + } + return &slot_image_views[image.image_view_ids.at(0)]; + } + return nullptr; +} + +template +bool TextureCache

::HasUncommittedFlushes() const noexcept { + return !uncommitted_downloads.empty(); +} + +template +bool TextureCache

::ShouldWaitAsyncFlushes() const noexcept { + return !committed_downloads.empty() && !committed_downloads.front().empty(); +} + +template +void TextureCache

::CommitAsyncFlushes() { + // This is intentionally passing the value by copy + committed_downloads.push(uncommitted_downloads); + uncommitted_downloads.clear(); +} + +template +void TextureCache

::PopAsyncFlushes() { + if (committed_downloads.empty()) { + return; + } + const std::span download_ids = committed_downloads.front(); + if (download_ids.empty()) { + committed_downloads.pop(); + return; + } + size_t total_size_bytes = 0; + for (const ImageId image_id : download_ids) { + total_size_bytes += slot_images[image_id].unswizzled_size_bytes; + } + auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); + const size_t original_offset = download_map.offset; + for (const ImageId image_id : download_ids) { + Image& image = slot_images[image_id]; + const auto copies = FullDownloadCopies(image.info); + image.DownloadMemory(download_map, copies); + download_map.offset += image.unswizzled_size_bytes; + } + // Wait for downloads to finish + runtime.Finish(); + + download_map.offset = original_offset; + std::span download_span = download_map.mapped_span; + for (const ImageId image_id : download_ids) { + const ImageBase& image = slot_images[image_id]; + const auto copies = FullDownloadCopies(image.info); + SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, download_span); + download_map.offset += image.unswizzled_size_bytes; + download_span = download_span.subspan(image.unswizzled_size_bytes); + } + committed_downloads.pop(); +} + +template +bool TextureCache

::IsRegionGpuModified(VAddr addr, size_t size) { + bool is_modified = false; + ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) { + if (False(image.flags & ImageFlagBits::GpuModified)) { + return false; + } + is_modified = true; + return true; + }); + return is_modified; +} + +template +void TextureCache

::RefreshContents(Image& image, ImageId image_id) { + if (False(image.flags & ImageFlagBits::CpuModified)) { + // Only upload modified images + return; + } + image.flags &= ~ImageFlagBits::CpuModified; + TrackImage(image, image_id); + + if (image.info.num_samples > 1) { + LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); + return; + } + auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image)); + UploadImageContents(image, staging); + runtime.InsertUploadMemoryBarrier(); +} + +template +template +void TextureCache

::UploadImageContents(Image& image, StagingBuffer& staging) { + const std::span mapped_span = staging.mapped_span; + const GPUVAddr gpu_addr = image.gpu_addr; + + if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { + gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); + const auto uploads = FullUploadSwizzles(image.info); + runtime.AccelerateImageUpload(image, staging, uploads); + } else if (True(image.flags & ImageFlagBits::Converted)) { + std::vector unswizzled_data(image.unswizzled_size_bytes); + auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); + ConvertImage(unswizzled_data, image.info, mapped_span, copies); + image.UploadMemory(staging, copies); + } else { + const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); + image.UploadMemory(staging, copies); + } +} + +template +ImageViewId TextureCache

::FindImageView(const TICEntry& config) { + if (!IsValidEntry(gpu_memory, config)) { + return NULL_IMAGE_VIEW_ID; + } + const auto [pair, is_new] = image_views.try_emplace(config); + ImageViewId& image_view_id = pair->second; + if (is_new) { + image_view_id = CreateImageView(config); + } + return image_view_id; +} + +template +ImageViewId TextureCache

::CreateImageView(const TICEntry& config) { + const ImageInfo info(config); + if (info.type == ImageType::Buffer) { + const ImageViewInfo view_info(config, 0); + return slot_image_views.insert(runtime, info, view_info, config.Address()); + } + const u32 layer_offset = config.BaseLayer() * info.layer_stride; + const GPUVAddr image_gpu_addr = config.Address() - layer_offset; + const ImageId image_id = FindOrInsertImage(info, image_gpu_addr); + if (!image_id) { + return NULL_IMAGE_VIEW_ID; + } + ImageBase& image = slot_images[image_id]; + const SubresourceBase base = image.TryFindBase(config.Address()).value(); + ASSERT(base.level == 0); + const ImageViewInfo view_info(config, base.layer); + const ImageViewId image_view_id = FindOrEmplaceImageView(image_id, view_info); + ImageViewBase& image_view = slot_image_views[image_view_id]; + image_view.flags |= ImageViewFlagBits::Strong; + image.flags |= ImageFlagBits::Strong; + return image_view_id; +} + +template +ImageId TextureCache

::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr, + RelaxedOptions options) { + if (const ImageId image_id = FindImage(info, gpu_addr, options); image_id) { + return image_id; + } + return InsertImage(info, gpu_addr, options); +} + +template +ImageId TextureCache

::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, + RelaxedOptions options) { + std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + if (!cpu_addr) { + cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); + if (!cpu_addr) { + return ImageId{}; + } + } + const bool broken_views = runtime.HasBrokenTextureViewFormats(); + const bool native_bgr = runtime.HasNativeBgr(); + ImageId image_id; + const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { + if (True(existing_image.flags & ImageFlagBits::Remapped)) { + return false; + } + if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) { + const bool strict_size = False(options & RelaxedOptions::Size) && + True(existing_image.flags & ImageFlagBits::Strong); + const ImageInfo& existing = existing_image.info; + if (existing_image.gpu_addr == gpu_addr && existing.type == info.type && + existing.pitch == info.pitch && + IsPitchLinearSameSize(existing, info, strict_size) && + IsViewCompatible(existing.format, info.format, broken_views, native_bgr)) { + image_id = existing_image_id; + return true; + } + } else if (IsSubresource(info, existing_image, gpu_addr, options, broken_views, + native_bgr)) { + image_id = existing_image_id; + return true; + } + return false; + }; + ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda); + return image_id; +} + +template +ImageId TextureCache

::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, + RelaxedOptions options) { + std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + if (!cpu_addr) { + const auto size = CalculateGuestSizeInBytes(info); + cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size); + if (!cpu_addr) { + const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space; + virtual_invalid_space += Common::AlignUp(size, 32); + cpu_addr = std::optional(fake_addr); + } + } + ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); + const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr); + const Image& image = slot_images[image_id]; + // Using "image.gpu_addr" instead of "gpu_addr" is important because it might be different + const auto [it, is_new] = image_allocs_table.try_emplace(image.gpu_addr); + if (is_new) { + it->second = slot_image_allocs.insert(); + } + slot_image_allocs[it->second].images.push_back(image_id); + return image_id; +} + +template +ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr) { + ImageInfo new_info = info; + const size_t size_bytes = CalculateGuestSizeInBytes(new_info); + const bool broken_views = runtime.HasBrokenTextureViewFormats(); + const bool native_bgr = runtime.HasNativeBgr(); + std::vector overlap_ids; + std::unordered_set overlaps_found; + std::vector left_aliased_ids; + std::vector right_aliased_ids; + std::unordered_set ignore_textures; + std::vector bad_overlap_ids; + const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) { + if (True(overlap.flags & ImageFlagBits::Remapped)) { + ignore_textures.insert(overlap_id); + return; + } + if (info.type == ImageType::Linear) { + if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) { + // Alias linear images with the same pitch + left_aliased_ids.push_back(overlap_id); + } + return; + } + overlaps_found.insert(overlap_id); + static constexpr bool strict_size = true; + const std::optional solution = ResolveOverlap( + new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr); + if (solution) { + gpu_addr = solution->gpu_addr; + cpu_addr = solution->cpu_addr; + new_info.resources = solution->resources; + overlap_ids.push_back(overlap_id); + return; + } + static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format; + const ImageBase new_image_base(new_info, gpu_addr, cpu_addr); + if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) { + left_aliased_ids.push_back(overlap_id); + overlap.flags |= ImageFlagBits::Alias; + } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options, + broken_views, native_bgr)) { + right_aliased_ids.push_back(overlap_id); + overlap.flags |= ImageFlagBits::Alias; + } else { + bad_overlap_ids.push_back(overlap_id); + overlap.flags |= ImageFlagBits::BadOverlap; + } + }; + ForEachImageInRegion(cpu_addr, size_bytes, region_check); + const auto region_check_gpu = [&](ImageId overlap_id, ImageBase& overlap) { + if (!overlaps_found.contains(overlap_id)) { + if (True(overlap.flags & ImageFlagBits::Remapped)) { + ignore_textures.insert(overlap_id); + } + if (overlap.gpu_addr == gpu_addr && overlap.guest_size_bytes == size_bytes) { + ignore_textures.insert(overlap_id); + } + } + }; + ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu); + const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); + Image& new_image = slot_images[new_image_id]; + + if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) { + new_image.flags |= ImageFlagBits::Sparse; + } + + for (const ImageId overlap_id : ignore_textures) { + Image& overlap = slot_images[overlap_id]; + if (True(overlap.flags & ImageFlagBits::GpuModified)) { + UNIMPLEMENTED(); + } + if (True(overlap.flags & ImageFlagBits::Tracked)) { + UntrackImage(overlap, overlap_id); + } + UnregisterImage(overlap_id); + DeleteImage(overlap_id); + } + + // TODO: Only upload what we need + RefreshContents(new_image, new_image_id); + + for (const ImageId overlap_id : overlap_ids) { + Image& overlap = slot_images[overlap_id]; + if (overlap.info.num_samples != new_image.info.num_samples) { + LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented"); + } else { + const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value(); + const auto copies = MakeShrinkImageCopies(new_info, overlap.info, base); + runtime.CopyImage(new_image, overlap, copies); + } + if (True(overlap.flags & ImageFlagBits::Tracked)) { + UntrackImage(overlap, overlap_id); + } + UnregisterImage(overlap_id); + DeleteImage(overlap_id); + } + ImageBase& new_image_base = new_image; + for (const ImageId aliased_id : right_aliased_ids) { + ImageBase& aliased = slot_images[aliased_id]; + AddImageAlias(new_image_base, aliased, new_image_id, aliased_id); + new_image.flags |= ImageFlagBits::Alias; + } + for (const ImageId aliased_id : left_aliased_ids) { + ImageBase& aliased = slot_images[aliased_id]; + AddImageAlias(aliased, new_image_base, aliased_id, new_image_id); + new_image.flags |= ImageFlagBits::Alias; + } + for (const ImageId aliased_id : bad_overlap_ids) { + ImageBase& aliased = slot_images[aliased_id]; + aliased.overlapping_images.push_back(new_image_id); + new_image.overlapping_images.push_back(aliased_id); + new_image.flags |= ImageFlagBits::BadOverlap; + } + RegisterImage(new_image_id); + return new_image_id; +} + +template +typename TextureCache

::BlitImages TextureCache

::GetBlitImages( + const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src) { + static constexpr auto FIND_OPTIONS = RelaxedOptions::Format | RelaxedOptions::Samples; + const GPUVAddr dst_addr = dst.Address(); + const GPUVAddr src_addr = src.Address(); + ImageInfo dst_info(dst); + ImageInfo src_info(src); + ImageId dst_id; + ImageId src_id; + do { + has_deleted_images = false; + dst_id = FindImage(dst_info, dst_addr, FIND_OPTIONS); + src_id = FindImage(src_info, src_addr, FIND_OPTIONS); + const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr; + const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr; + DeduceBlitImages(dst_info, src_info, dst_image, src_image); + if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) { + continue; + } + if (!dst_id) { + dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{}); + } + if (!src_id) { + src_id = InsertImage(src_info, src_addr, RelaxedOptions{}); + } + } while (has_deleted_images); + return BlitImages{ + .dst_id = dst_id, + .src_id = src_id, + .dst_format = dst_info.format, + .src_format = src_info.format, + }; +} + +template +SamplerId TextureCache

::FindSampler(const TSCEntry& config) { + if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) { + return NULL_SAMPLER_ID; + } + const auto [pair, is_new] = samplers.try_emplace(config); + if (is_new) { + pair->second = slot_samplers.insert(runtime, config); + } + return pair->second; +} + +template +ImageViewId TextureCache

::FindColorBuffer(size_t index, bool is_clear) { + const auto& regs = maxwell3d.regs; + if (index >= regs.rt_control.count) { + return ImageViewId{}; + } + const auto& rt = regs.rt[index]; + const GPUVAddr gpu_addr = rt.Address(); + if (gpu_addr == 0) { + return ImageViewId{}; + } + if (rt.format == Tegra::RenderTargetFormat::NONE) { + return ImageViewId{}; + } + const ImageInfo info(regs, index); + return FindRenderTargetView(info, gpu_addr, is_clear); +} + +template +ImageViewId TextureCache

::FindDepthBuffer(bool is_clear) { + const auto& regs = maxwell3d.regs; + if (!regs.zeta_enable) { + return ImageViewId{}; + } + const GPUVAddr gpu_addr = regs.zeta.Address(); + if (gpu_addr == 0) { + return ImageViewId{}; + } + const ImageInfo info(regs); + return FindRenderTargetView(info, gpu_addr, is_clear); +} + +template +ImageViewId TextureCache

::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, + bool is_clear) { + const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{}; + const ImageId image_id = FindOrInsertImage(info, gpu_addr, options); + if (!image_id) { + return NULL_IMAGE_VIEW_ID; + } + Image& image = slot_images[image_id]; + const ImageViewType view_type = RenderTargetImageViewType(info); + SubresourceBase base; + if (image.info.type == ImageType::Linear) { + base = SubresourceBase{.level = 0, .layer = 0}; + } else { + base = image.TryFindBase(gpu_addr).value(); + } + const s32 layers = image.info.type == ImageType::e3D ? info.size.depth : info.resources.layers; + const SubresourceRange range{ + .base = base, + .extent = {.levels = 1, .layers = layers}, + }; + return FindOrEmplaceImageView(image_id, ImageViewInfo(view_type, info.format, range)); +} + +template +template +void TextureCache

::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) { + using FuncReturn = typename std::invoke_result::type; + static constexpr bool BOOL_BREAK = std::is_same_v; + boost::container::small_vector images; + boost::container::small_vector maps; + ForEachCPUPage(cpu_addr, size, [this, &images, &maps, cpu_addr, size, func](u64 page) { + const auto it = page_table.find(page); + if (it == page_table.end()) { + if constexpr (BOOL_BREAK) { + return false; + } else { + return; + } + } + for (const ImageMapId map_id : it->second) { + ImageMapView& map = slot_map_views[map_id]; + if (map.picked) { + continue; + } + if (!map.Overlaps(cpu_addr, size)) { + continue; + } + map.picked = true; + maps.push_back(map_id); + Image& image = slot_images[map.image_id]; + if (True(image.flags & ImageFlagBits::Picked)) { + continue; + } + image.flags |= ImageFlagBits::Picked; + images.push_back(map.image_id); + if constexpr (BOOL_BREAK) { + if (func(map.image_id, image)) { + return true; + } + } else { + func(map.image_id, image); + } + } + if constexpr (BOOL_BREAK) { + return false; + } + }); + for (const ImageId image_id : images) { + slot_images[image_id].flags &= ~ImageFlagBits::Picked; + } + for (const ImageMapId map_id : maps) { + slot_map_views[map_id].picked = false; + } +} + +template +template +void TextureCache

::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func) { + using FuncReturn = typename std::invoke_result::type; + static constexpr bool BOOL_BREAK = std::is_same_v; + boost::container::small_vector images; + ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { + const auto it = gpu_page_table.find(page); + if (it == gpu_page_table.end()) { + if constexpr (BOOL_BREAK) { + return false; + } else { + return; + } + } + for (const ImageId image_id : it->second) { + Image& image = slot_images[image_id]; + if (True(image.flags & ImageFlagBits::Picked)) { + continue; + } + if (!image.OverlapsGPU(gpu_addr, size)) { + continue; + } + image.flags |= ImageFlagBits::Picked; + images.push_back(image_id); + if constexpr (BOOL_BREAK) { + if (func(image_id, image)) { + return true; + } + } else { + func(image_id, image); + } + } + if constexpr (BOOL_BREAK) { + return false; + } + }); + for (const ImageId image_id : images) { + slot_images[image_id].flags &= ~ImageFlagBits::Picked; + } +} + +template +template +void TextureCache

::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func) { + using FuncReturn = typename std::invoke_result::type; + static constexpr bool BOOL_BREAK = std::is_same_v; + boost::container::small_vector images; + ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { + const auto it = sparse_page_table.find(page); + if (it == sparse_page_table.end()) { + if constexpr (BOOL_BREAK) { + return false; + } else { + return; + } + } + for (const ImageId image_id : it->second) { + Image& image = slot_images[image_id]; + if (True(image.flags & ImageFlagBits::Picked)) { + continue; + } + if (!image.OverlapsGPU(gpu_addr, size)) { + continue; + } + image.flags |= ImageFlagBits::Picked; + images.push_back(image_id); + if constexpr (BOOL_BREAK) { + if (func(image_id, image)) { + return true; + } + } else { + func(image_id, image); + } + } + if constexpr (BOOL_BREAK) { + return false; + } + }); + for (const ImageId image_id : images) { + slot_images[image_id].flags &= ~ImageFlagBits::Picked; + } +} + +template +template +void TextureCache

::ForEachSparseSegment(ImageBase& image, Func&& func) { + using FuncReturn = typename std::invoke_result::type; + static constexpr bool RETURNS_BOOL = std::is_same_v; + const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes); + for (auto& segment : segments) { + const auto gpu_addr = segment.first; + const auto size = segment.second; + std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + ASSERT(cpu_addr); + if constexpr (RETURNS_BOOL) { + if (func(gpu_addr, *cpu_addr, size)) { + break; + } + } else { + func(gpu_addr, *cpu_addr, size); + } + } +} + +template +ImageViewId TextureCache

::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) { + Image& image = slot_images[image_id]; + if (const ImageViewId image_view_id = image.FindView(info); image_view_id) { + return image_view_id; + } + const ImageViewId image_view_id = slot_image_views.insert(runtime, info, image_id, image); + image.InsertView(info, image_view_id); + return image_view_id; +} + +template +void TextureCache

::RegisterImage(ImageId image_id) { + ImageBase& image = slot_images[image_id]; + ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), + "Trying to register an already registered image"); + image.flags |= ImageFlagBits::Registered; + u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); + if ((IsPixelFormatASTC(image.info.format) && + True(image.flags & ImageFlagBits::AcceleratedUpload)) || + True(image.flags & ImageFlagBits::Converted)) { + tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); + } + total_used_memory += Common::AlignUp(tentative_size, 1024); + ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, + [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); }); + if (False(image.flags & ImageFlagBits::Sparse)) { + auto map_id = + slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id); + ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, + [this, map_id](u64 page) { page_table[page].push_back(map_id); }); + image.map_view_id = map_id; + return; + } + std::vector sparse_maps{}; + ForEachSparseSegment( + image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { + auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); + ForEachCPUPage(cpu_addr, size, + [this, map_id](u64 page) { page_table[page].push_back(map_id); }); + sparse_maps.push_back(map_id); + }); + sparse_views.emplace(image_id, std::move(sparse_maps)); + ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, + [this, image_id](u64 page) { sparse_page_table[page].push_back(image_id); }); +} + +template +void TextureCache

::UnregisterImage(ImageId image_id) { + Image& image = slot_images[image_id]; + ASSERT_MSG(True(image.flags & ImageFlagBits::Registered), + "Trying to unregister an already registered image"); + image.flags &= ~ImageFlagBits::Registered; + image.flags &= ~ImageFlagBits::BadOverlap; + u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); + if ((IsPixelFormatASTC(image.info.format) && + True(image.flags & ImageFlagBits::AcceleratedUpload)) || + True(image.flags & ImageFlagBits::Converted)) { + tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); + } + total_used_memory -= Common::AlignUp(tentative_size, 1024); + const auto& clear_page_table = + [this, image_id]( + u64 page, + std::unordered_map, IdentityHash>& selected_page_table) { + const auto page_it = selected_page_table.find(page); + if (page_it == selected_page_table.end()) { + UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); + return; + } + std::vector& image_ids = page_it->second; + const auto vector_it = std::ranges::find(image_ids, image_id); + if (vector_it == image_ids.end()) { + UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", + page << PAGE_BITS); + return; + } + image_ids.erase(vector_it); + }; + ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, + [this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); }); + if (False(image.flags & ImageFlagBits::Sparse)) { + const auto map_id = image.map_view_id; + ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) { + const auto page_it = page_table.find(page); + if (page_it == page_table.end()) { + UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); + return; + } + std::vector& image_map_ids = page_it->second; + const auto vector_it = std::ranges::find(image_map_ids, map_id); + if (vector_it == image_map_ids.end()) { + UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", + page << PAGE_BITS); + return; + } + image_map_ids.erase(vector_it); + }); + slot_map_views.erase(map_id); + return; + } + ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) { + clear_page_table(page, sparse_page_table); + }); + auto it = sparse_views.find(image_id); + ASSERT(it != sparse_views.end()); + auto& sparse_maps = it->second; + for (auto& map_view_id : sparse_maps) { + const auto& map_range = slot_map_views[map_view_id]; + const VAddr cpu_addr = map_range.cpu_addr; + const std::size_t size = map_range.size; + ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) { + const auto page_it = page_table.find(page); + if (page_it == page_table.end()) { + UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); + return; + } + std::vector& image_map_ids = page_it->second; + auto vector_it = image_map_ids.begin(); + while (vector_it != image_map_ids.end()) { + ImageMapView& map = slot_map_views[*vector_it]; + if (map.image_id != image_id) { + vector_it++; + continue; + } + if (!map.picked) { + map.picked = true; + } + vector_it = image_map_ids.erase(vector_it); + } + }); + slot_map_views.erase(map_view_id); + } + sparse_views.erase(it); +} + +template +void TextureCache

::TrackImage(ImageBase& image, ImageId image_id) { + ASSERT(False(image.flags & ImageFlagBits::Tracked)); + image.flags |= ImageFlagBits::Tracked; + if (False(image.flags & ImageFlagBits::Sparse)) { + rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); + return; + } + if (True(image.flags & ImageFlagBits::Registered)) { + auto it = sparse_views.find(image_id); + ASSERT(it != sparse_views.end()); + auto& sparse_maps = it->second; + for (auto& map_view_id : sparse_maps) { + const auto& map = slot_map_views[map_view_id]; + const VAddr cpu_addr = map.cpu_addr; + const std::size_t size = map.size; + rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); + } + return; + } + ForEachSparseSegment(image, + [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { + rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); + }); +} + +template +void TextureCache

::UntrackImage(ImageBase& image, ImageId image_id) { + ASSERT(True(image.flags & ImageFlagBits::Tracked)); + image.flags &= ~ImageFlagBits::Tracked; + if (False(image.flags & ImageFlagBits::Sparse)) { + rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); + return; + } + ASSERT(True(image.flags & ImageFlagBits::Registered)); + auto it = sparse_views.find(image_id); + ASSERT(it != sparse_views.end()); + auto& sparse_maps = it->second; + for (auto& map_view_id : sparse_maps) { + const auto& map = slot_map_views[map_view_id]; + const VAddr cpu_addr = map.cpu_addr; + const std::size_t size = map.size; + rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); + } +} + +template +void TextureCache

::DeleteImage(ImageId image_id) { + ImageBase& image = slot_images[image_id]; + const GPUVAddr gpu_addr = image.gpu_addr; + const auto alloc_it = image_allocs_table.find(gpu_addr); + if (alloc_it == image_allocs_table.end()) { + UNREACHABLE_MSG("Trying to delete an image alloc that does not exist in address 0x{:x}", + gpu_addr); + return; + } + const ImageAllocId alloc_id = alloc_it->second; + std::vector& alloc_images = slot_image_allocs[alloc_id].images; + const auto alloc_image_it = std::ranges::find(alloc_images, image_id); + if (alloc_image_it == alloc_images.end()) { + UNREACHABLE_MSG("Trying to delete an image that does not exist"); + return; + } + ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked"); + ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered"); + + // Mark render targets as dirty + auto& dirty = maxwell3d.dirty.flags; + dirty[Dirty::RenderTargets] = true; + dirty[Dirty::ZetaBuffer] = true; + for (size_t rt = 0; rt < NUM_RT; ++rt) { + dirty[Dirty::ColorBuffer0 + rt] = true; + } + const std::span image_view_ids = image.image_view_ids; + for (const ImageViewId image_view_id : image_view_ids) { + std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{}); + if (render_targets.depth_buffer_id == image_view_id) { + render_targets.depth_buffer_id = ImageViewId{}; + } + } + RemoveImageViewReferences(image_view_ids); + RemoveFramebuffers(image_view_ids); + + for (const AliasedImage& alias : image.aliased_images) { + ImageBase& other_image = slot_images[alias.id]; + [[maybe_unused]] const size_t num_removed_aliases = + std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) { + return other_alias.id == image_id; + }); + other_image.CheckAliasState(); + ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}", + num_removed_aliases); + } + for (const ImageId overlap_id : image.overlapping_images) { + ImageBase& other_image = slot_images[overlap_id]; + [[maybe_unused]] const size_t num_removed_overlaps = std::erase_if( + other_image.overlapping_images, + [image_id](const ImageId other_overlap_id) { return other_overlap_id == image_id; }); + other_image.CheckBadOverlapState(); + ASSERT_MSG(num_removed_overlaps == 1, "Invalid number of removed overlapps: {}", + num_removed_overlaps); + } + for (const ImageViewId image_view_id : image_view_ids) { + sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); + slot_image_views.erase(image_view_id); + } + sentenced_images.Push(std::move(slot_images[image_id])); + slot_images.erase(image_id); + + alloc_images.erase(alloc_image_it); + if (alloc_images.empty()) { + image_allocs_table.erase(alloc_it); + } + if constexpr (ENABLE_VALIDATION) { + std::ranges::fill(graphics_image_view_ids, CORRUPT_ID); + std::ranges::fill(compute_image_view_ids, CORRUPT_ID); + } + graphics_image_table.Invalidate(); + compute_image_table.Invalidate(); + has_deleted_images = true; +} + +template +void TextureCache

::RemoveImageViewReferences(std::span removed_views) { + auto it = image_views.begin(); + while (it != image_views.end()) { + const auto found = std::ranges::find(removed_views, it->second); + if (found != removed_views.end()) { + it = image_views.erase(it); + } else { + ++it; + } + } +} + +template +void TextureCache

::RemoveFramebuffers(std::span removed_views) { + auto it = framebuffers.begin(); + while (it != framebuffers.end()) { + if (it->first.Contains(removed_views)) { + it = framebuffers.erase(it); + } else { + ++it; + } + } +} + +template +void TextureCache

::MarkModification(ImageBase& image) noexcept { + image.flags |= ImageFlagBits::GpuModified; + image.modification_tick = ++modification_tick; +} + +template +void TextureCache

::SynchronizeAliases(ImageId image_id) { + boost::container::small_vector aliased_images; + ImageBase& image = slot_images[image_id]; + u64 most_recent_tick = image.modification_tick; + for (const AliasedImage& aliased : image.aliased_images) { + ImageBase& aliased_image = slot_images[aliased.id]; + if (image.modification_tick < aliased_image.modification_tick) { + most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); + aliased_images.push_back(&aliased); + } + } + if (aliased_images.empty()) { + return; + } + image.modification_tick = most_recent_tick; + std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) { + const ImageBase& lhs_image = slot_images[lhs->id]; + const ImageBase& rhs_image = slot_images[rhs->id]; + return lhs_image.modification_tick < rhs_image.modification_tick; + }); + for (const AliasedImage* const aliased : aliased_images) { + CopyImage(image_id, aliased->id, aliased->copies); + } +} + +template +void TextureCache

::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) { + Image& image = slot_images[image_id]; + if (invalidate) { + image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified); + if (False(image.flags & ImageFlagBits::Tracked)) { + TrackImage(image, image_id); + } + } else { + RefreshContents(image, image_id); + SynchronizeAliases(image_id); + } + if (is_modification) { + MarkModification(image); + } + image.frame_tick = frame_tick; +} + +template +void TextureCache

::PrepareImageView(ImageViewId image_view_id, bool is_modification, + bool invalidate) { + if (!image_view_id) { + return; + } + const ImageViewBase& image_view = slot_image_views[image_view_id]; + if (image_view.IsBuffer()) { + return; + } + PrepareImage(image_view.image_id, is_modification, invalidate); +} + +template +void TextureCache

::CopyImage(ImageId dst_id, ImageId src_id, std::span copies) { + Image& dst = slot_images[dst_id]; + Image& src = slot_images[src_id]; + const auto dst_format_type = GetFormatType(dst.info.format); + const auto src_format_type = GetFormatType(src.info.format); + if (src_format_type == dst_format_type) { + if constexpr (HAS_EMULATED_COPIES) { + if (!runtime.CanImageBeCopied(dst, src)) { + return runtime.EmulateCopyImage(dst, src, copies); + } + } + return runtime.CopyImage(dst, src, copies); + } + UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D); + UNIMPLEMENTED_IF(src.info.type != ImageType::e2D); + for (const ImageCopy& copy : copies) { + UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1); + UNIMPLEMENTED_IF(copy.src_subresource.num_layers != 1); + UNIMPLEMENTED_IF(copy.src_offset != Offset3D{}); + UNIMPLEMENTED_IF(copy.dst_offset != Offset3D{}); + + const SubresourceBase dst_base{ + .level = copy.dst_subresource.base_level, + .layer = copy.dst_subresource.base_layer, + }; + const SubresourceBase src_base{ + .level = copy.src_subresource.base_level, + .layer = copy.src_subresource.base_layer, + }; + const SubresourceExtent dst_extent{.levels = 1, .layers = 1}; + const SubresourceExtent src_extent{.levels = 1, .layers = 1}; + const SubresourceRange dst_range{.base = dst_base, .extent = dst_extent}; + const SubresourceRange src_range{.base = src_base, .extent = src_extent}; + const ImageViewInfo dst_view_info(ImageViewType::e2D, dst.info.format, dst_range); + const ImageViewInfo src_view_info(ImageViewType::e2D, src.info.format, src_range); + const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); + Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; + const ImageViewId src_view_id = FindOrEmplaceImageView(src_id, src_view_info); + ImageView& dst_view = slot_image_views[dst_view_id]; + ImageView& src_view = slot_image_views[src_view_id]; + [[maybe_unused]] const Extent3D expected_size{ + .width = std::min(dst_view.size.width, src_view.size.width), + .height = std::min(dst_view.size.height, src_view.size.height), + .depth = std::min(dst_view.size.depth, src_view.size.depth), + }; + UNIMPLEMENTED_IF(copy.extent != expected_size); + + runtime.ConvertImage(dst_framebuffer, dst_view, src_view); + } +} + +template +void TextureCache

::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id) { + if (*old_id == new_id) { + return; + } + if (*old_id) { + const ImageViewBase& old_view = slot_image_views[*old_id]; + if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) { + uncommitted_downloads.push_back(old_view.image_id); + } + } + *old_id = new_id; +} + +template +std::pair TextureCache

::RenderTargetFromImage( + ImageId image_id, const ImageViewInfo& view_info) { + const ImageViewId view_id = FindOrEmplaceImageView(image_id, view_info); + const ImageBase& image = slot_images[image_id]; + const bool is_color = GetFormatType(image.info.format) == SurfaceType::ColorTexture; + const ImageViewId color_view_id = is_color ? view_id : ImageViewId{}; + const ImageViewId depth_view_id = is_color ? ImageViewId{} : view_id; + const Extent3D extent = MipSize(image.info.size, view_info.range.base.level); + const u32 num_samples = image.info.num_samples; + const auto [samples_x, samples_y] = SamplesLog2(num_samples); + const FramebufferId framebuffer_id = GetFramebufferId(RenderTargets{ + .color_buffer_ids = {color_view_id}, + .depth_buffer_id = depth_view_id, + .size = {extent.width >> samples_x, extent.height >> samples_y}, + }); + return {framebuffer_id, view_id}; +} + +template +bool TextureCache

::IsFullClear(ImageViewId id) { + if (!id) { + return true; + } + const ImageViewBase& image_view = slot_image_views[id]; + const ImageBase& image = slot_images[image_view.image_id]; + const Extent3D size = image_view.size; + const auto& regs = maxwell3d.regs; + const auto& scissor = regs.scissor_test[0]; + if (image.info.resources.levels > 1 || image.info.resources.layers > 1) { + // Images with multiple resources can't be cleared in a single call + return false; + } + if (regs.clear_flags.scissor == 0) { + // If scissor testing is disabled, the clear is always full + return true; + } + // Make sure the clear covers all texels in the subresource + return scissor.min_x == 0 && scissor.min_y == 0 && scissor.max_x >= size.width && + scissor.max_y >= size.height; +} + +} // namespace VideoCommon -- cgit v1.2.3 From 5566f3dbc0db1de41fcd291f5b7588d9e055ba85 Mon Sep 17 00:00:00 2001 From: yzct12345 <87620833+yzct12345@users.noreply.github.com> Date: Thu, 5 Aug 2021 20:46:24 +0000 Subject: texture_cache: Address ameerj's review --- src/video_core/CMakeLists.txt | 6 +- .../renderer_opengl/gl_graphics_pipeline.cpp | 2 +- src/video_core/renderer_opengl/gl_rasterizer.cpp | 2 +- src/video_core/renderer_opengl/gl_texture_cache.h | 2 +- .../renderer_opengl/gl_texture_cache_base.cpp | 10 + .../renderer_opengl/gl_texture_cache_templates.cpp | 10 - src/video_core/renderer_vulkan/vk_rasterizer.cpp | 2 +- src/video_core/renderer_vulkan/vk_texture_cache.h | 2 +- .../renderer_vulkan/vk_texture_cache_base.cpp | 10 + .../renderer_vulkan/vk_texture_cache_templates.cpp | 10 - src/video_core/texture_cache/image_view_info.cpp | 2 +- src/video_core/texture_cache/texture_cache.h | 1711 ++++++++++++++++---- src/video_core/texture_cache/texture_cache_base.h | 402 +++++ .../texture_cache/texture_cache_templates.h | 1507 ----------------- 14 files changed, 1839 insertions(+), 1839 deletions(-) create mode 100644 src/video_core/renderer_opengl/gl_texture_cache_base.cpp delete mode 100644 src/video_core/renderer_opengl/gl_texture_cache_templates.cpp create mode 100644 src/video_core/renderer_vulkan/vk_texture_cache_base.cpp delete mode 100644 src/video_core/renderer_vulkan/vk_texture_cache_templates.cpp create mode 100644 src/video_core/texture_cache/texture_cache_base.h delete mode 100644 src/video_core/texture_cache/texture_cache_templates.h (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 1250cca6f..2f6cdd216 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -97,7 +97,7 @@ add_library(video_core STATIC renderer_opengl/gl_stream_buffer.h renderer_opengl/gl_texture_cache.cpp renderer_opengl/gl_texture_cache.h - renderer_opengl/gl_texture_cache_templates.cpp + renderer_opengl/gl_texture_cache_base.cpp renderer_opengl/gl_query_cache.cpp renderer_opengl/gl_query_cache.h renderer_opengl/maxwell_to_gl.h @@ -156,7 +156,7 @@ add_library(video_core STATIC renderer_vulkan/vk_swapchain.h renderer_vulkan/vk_texture_cache.cpp renderer_vulkan/vk_texture_cache.h - renderer_vulkan/vk_texture_cache_templates.cpp + renderer_vulkan/vk_texture_cache_base.cpp renderer_vulkan/vk_update_descriptor.cpp renderer_vulkan/vk_update_descriptor.h shader_cache.cpp @@ -188,7 +188,7 @@ add_library(video_core STATIC texture_cache/samples_helper.h texture_cache/slot_vector.h texture_cache/texture_cache.h - texture_cache/texture_cache_templates.h + texture_cache/texture_cache_base.h texture_cache/types.h texture_cache/util.cpp texture_cache/util.h diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index fac0034fb..bccb37a58 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -15,7 +15,7 @@ #include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/shader_notify.h" -#include "video_core/texture_cache/texture_cache.h" +#include "video_core/texture_cache/texture_cache_base.h" #if defined(_MSC_VER) && defined(NDEBUG) #define LAMBDA_FORCEINLINE [[msvc::forceinline]] diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 41d2b73f4..b909c387e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -32,7 +32,7 @@ #include "video_core/renderer_opengl/maxwell_to_gl.h" #include "video_core/renderer_opengl/renderer_opengl.h" #include "video_core/shader_cache.h" -#include "video_core/texture_cache/texture_cache.h" +#include "video_core/texture_cache/texture_cache_base.h" namespace OpenGL { diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 921072ebe..4a4f6301c 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -12,7 +12,7 @@ #include "shader_recompiler/shader_info.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/util_shaders.h" -#include "video_core/texture_cache/texture_cache.h" +#include "video_core/texture_cache/texture_cache_base.h" namespace OpenGL { diff --git a/src/video_core/renderer_opengl/gl_texture_cache_base.cpp b/src/video_core/renderer_opengl/gl_texture_cache_base.cpp new file mode 100644 index 000000000..385358fea --- /dev/null +++ b/src/video_core/renderer_opengl/gl_texture_cache_base.cpp @@ -0,0 +1,10 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "video_core/renderer_opengl/gl_texture_cache.h" +#include "video_core/texture_cache/texture_cache.h" + +namespace VideoCommon { +template class VideoCommon::TextureCache; +} diff --git a/src/video_core/renderer_opengl/gl_texture_cache_templates.cpp b/src/video_core/renderer_opengl/gl_texture_cache_templates.cpp deleted file mode 100644 index 00ed06447..000000000 --- a/src/video_core/renderer_opengl/gl_texture_cache_templates.cpp +++ /dev/null @@ -1,10 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "video_core/renderer_opengl/gl_texture_cache.h" -#include "video_core/texture_cache/texture_cache_templates.h" - -namespace VideoCommon { -template class VideoCommon::TextureCache; -} diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 23cef2996..3ac18ea54 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -32,7 +32,7 @@ #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/shader_cache.h" -#include "video_core/texture_cache/texture_cache.h" +#include "video_core/texture_cache/texture_cache_base.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 0b73d55f8..5fe6b7ba3 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -9,7 +9,7 @@ #include "shader_recompiler/shader_info.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" -#include "video_core/texture_cache/texture_cache.h" +#include "video_core/texture_cache/texture_cache_base.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" diff --git a/src/video_core/renderer_vulkan/vk_texture_cache_base.cpp b/src/video_core/renderer_vulkan/vk_texture_cache_base.cpp new file mode 100644 index 000000000..44e688342 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_texture_cache_base.cpp @@ -0,0 +1,10 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "video_core/renderer_vulkan/vk_texture_cache.h" +#include "video_core/texture_cache/texture_cache.h" + +namespace VideoCommon { +template class VideoCommon::TextureCache; +} diff --git a/src/video_core/renderer_vulkan/vk_texture_cache_templates.cpp b/src/video_core/renderer_vulkan/vk_texture_cache_templates.cpp deleted file mode 100644 index fd8978954..000000000 --- a/src/video_core/renderer_vulkan/vk_texture_cache_templates.cpp +++ /dev/null @@ -1,10 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "video_core/renderer_vulkan/vk_texture_cache.h" -#include "video_core/texture_cache/texture_cache_templates.h" - -namespace VideoCommon { -template class VideoCommon::TextureCache; -} diff --git a/src/video_core/texture_cache/image_view_info.cpp b/src/video_core/texture_cache/image_view_info.cpp index faf5b151f..f14a92565 100644 --- a/src/video_core/texture_cache/image_view_info.cpp +++ b/src/video_core/texture_cache/image_view_info.cpp @@ -6,7 +6,7 @@ #include "common/assert.h" #include "video_core/texture_cache/image_view_info.h" -#include "video_core/texture_cache/texture_cache.h" +#include "video_core/texture_cache/texture_cache_base.h" #include "video_core/texture_cache/types.h" #include "video_core/textures/texture.h" diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index a4f6e9422..5884fa16e 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -4,48 +4,7 @@ #pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "common/alignment.h" -#include "common/common_types.h" -#include "common/literals.h" -#include "common/logging/log.h" -#include "common/settings.h" -#include "video_core/compatible_formats.h" -#include "video_core/delayed_destruction_ring.h" -#include "video_core/dirty_flags.h" -#include "video_core/engines/fermi_2d.h" -#include "video_core/engines/kepler_compute.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/memory_manager.h" -#include "video_core/rasterizer_interface.h" -#include "video_core/surface.h" -#include "video_core/texture_cache/descriptor_table.h" -#include "video_core/texture_cache/format_lookup_table.h" -#include "video_core/texture_cache/formatter.h" -#include "video_core/texture_cache/image_base.h" -#include "video_core/texture_cache/image_info.h" -#include "video_core/texture_cache/image_view_base.h" -#include "video_core/texture_cache/image_view_info.h" -#include "video_core/texture_cache/render_targets.h" -#include "video_core/texture_cache/samples_helper.h" -#include "video_core/texture_cache/slot_vector.h" -#include "video_core/texture_cache/types.h" -#include "video_core/texture_cache/util.h" -#include "video_core/textures/texture.h" +#include "video_core/texture_cache/texture_cache_base.h" namespace VideoCommon { @@ -62,341 +21,1487 @@ using VideoCore::Surface::SurfaceType; using namespace Common::Literals; template -class TextureCache { - /// Address shift for caching images into a hash table - static constexpr u64 PAGE_BITS = 20; - - /// Enables debugging features to the texture cache - static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION; - /// Implement blits as copies between framebuffers - static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS; - /// True when some copies have to be emulated - static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES; - /// True when the API can provide info about the memory of the device. - static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; - - /// Image view ID for null descriptors - static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0}; - /// Sampler ID for bugged sampler ids - static constexpr SamplerId NULL_SAMPLER_ID{0}; - - static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB; - static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB; - - using Runtime = typename P::Runtime; - using Image = typename P::Image; - using ImageAlloc = typename P::ImageAlloc; - using ImageView = typename P::ImageView; - using Sampler = typename P::Sampler; - using Framebuffer = typename P::Framebuffer; - - struct BlitImages { - ImageId dst_id; - ImageId src_id; - PixelFormat dst_format; - PixelFormat src_format; - }; +TextureCache

::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_, + Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_) + : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, + kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} { + // Configure null sampler + TSCEntry sampler_descriptor{}; + sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear); + sampler_descriptor.mag_filter.Assign(Tegra::Texture::TextureFilter::Linear); + sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear); + sampler_descriptor.cubemap_anisotropy.Assign(1); + + // Make sure the first index is reserved for the null resources + // This way the null resource becomes a compile time constant + void(slot_image_views.insert(runtime, NullImageParams{})); + void(slot_samplers.insert(runtime, sampler_descriptor)); + + deletion_iterator = slot_images.begin(); + + if constexpr (HAS_DEVICE_MEMORY_INFO) { + const auto device_memory = runtime.GetDeviceLocalMemory(); + const u64 possible_expected_memory = (device_memory * 3) / 10; + const u64 possible_critical_memory = (device_memory * 6) / 10; + expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY); + critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY); + minimum_memory = 0; + } else { + // on OGL we can be more conservatives as the driver takes care. + expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; + critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; + minimum_memory = expected_memory; + } +} - template - struct IdentityHash { - [[nodiscard]] size_t operator()(T value) const noexcept { - return static_cast(value); +template +void TextureCache

::RunGarbageCollector() { + const bool high_priority_mode = total_used_memory >= expected_memory; + const bool aggressive_mode = total_used_memory >= critical_memory; + const u64 ticks_to_destroy = high_priority_mode ? 60 : 100; + int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64); + for (; num_iterations > 0; --num_iterations) { + if (deletion_iterator == slot_images.end()) { + deletion_iterator = slot_images.begin(); + if (deletion_iterator == slot_images.end()) { + break; + } } - }; - -public: - explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&, Tegra::Engines::Maxwell3D&, - Tegra::Engines::KeplerCompute&, Tegra::MemoryManager&); - - /// Notify the cache that a new frame has been queued - void TickFrame(); - - /// Return a constant reference to the given image view id - [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept; - - /// Return a reference to the given image view id - [[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept; - - /// Mark an image as modified from the GPU - void MarkModification(ImageId id) noexcept; - - /// Fill image_view_ids with the graphics images in indices - void FillGraphicsImageViews(std::span indices, - std::span image_view_ids); + auto [image_id, image_tmp] = *deletion_iterator; + Image* image = image_tmp; // fix clang error. + const bool is_alias = True(image->flags & ImageFlagBits::Alias); + const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap); + const bool must_download = image->IsSafeDownload(); + bool should_care = is_bad_overlap || is_alias || (high_priority_mode && !must_download); + const u64 ticks_needed = + is_bad_overlap + ? ticks_to_destroy >> 4 + : ((should_care && aggressive_mode) ? ticks_to_destroy >> 1 : ticks_to_destroy); + should_care |= aggressive_mode; + if (should_care && image->frame_tick + ticks_needed < frame_tick) { + if (is_bad_overlap) { + const bool overlap_check = std::ranges::all_of( + image->overlapping_images, [&, image](const ImageId& overlap_id) { + auto& overlap = slot_images[overlap_id]; + return overlap.frame_tick >= image->frame_tick; + }); + if (!overlap_check) { + ++deletion_iterator; + continue; + } + } + if (!is_bad_overlap && must_download) { + const bool alias_check = std::ranges::none_of( + image->aliased_images, [&, image](const AliasedImage& alias) { + auto& alias_image = slot_images[alias.id]; + return (alias_image.frame_tick < image->frame_tick) || + (alias_image.modification_tick < image->modification_tick); + }); + + if (alias_check) { + auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes); + const auto copies = FullDownloadCopies(image->info); + image->DownloadMemory(map, copies); + runtime.Finish(); + SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span); + } + } + if (True(image->flags & ImageFlagBits::Tracked)) { + UntrackImage(*image, image_id); + } + UnregisterImage(image_id); + DeleteImage(image_id); + if (is_bad_overlap) { + ++num_iterations; + } + } + ++deletion_iterator; + } +} - /// Fill image_view_ids with the compute images in indices - void FillComputeImageViews(std::span indices, std::span image_view_ids); +template +void TextureCache

::TickFrame() { + if (Settings::values.use_caches_gc.GetValue() && total_used_memory > minimum_memory) { + RunGarbageCollector(); + } + sentenced_images.Tick(); + sentenced_framebuffers.Tick(); + sentenced_image_view.Tick(); + ++frame_tick; +} - /// Get the sampler from the graphics descriptor table in the specified index - Sampler* GetGraphicsSampler(u32 index); +template +const typename P::ImageView& TextureCache

::GetImageView(ImageViewId id) const noexcept { + return slot_image_views[id]; +} - /// Get the sampler from the compute descriptor table in the specified index - Sampler* GetComputeSampler(u32 index); +template +typename P::ImageView& TextureCache

::GetImageView(ImageViewId id) noexcept { + return slot_image_views[id]; +} - /// Refresh the state for graphics image view and sampler descriptors - void SynchronizeGraphicsDescriptors(); +template +void TextureCache

::MarkModification(ImageId id) noexcept { + MarkModification(slot_images[id]); +} - /// Refresh the state for compute image view and sampler descriptors - void SynchronizeComputeDescriptors(); +template +void TextureCache

::FillGraphicsImageViews(std::span indices, + std::span image_view_ids) { + FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids); +} - /// Update bound render targets and upload memory if necessary - /// @param is_clear True when the render targets are being used for clears - void UpdateRenderTargets(bool is_clear); +template +void TextureCache

::FillComputeImageViews(std::span indices, + std::span image_view_ids) { + FillImageViews(compute_image_table, compute_image_view_ids, indices, image_view_ids); +} - /// Find a framebuffer with the currently bound render targets - /// UpdateRenderTargets should be called before this - Framebuffer* GetFramebuffer(); +template +typename P::Sampler* TextureCache

::GetGraphicsSampler(u32 index) { + if (index > graphics_sampler_table.Limit()) { + LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); + return &slot_samplers[NULL_SAMPLER_ID]; + } + const auto [descriptor, is_new] = graphics_sampler_table.Read(index); + SamplerId& id = graphics_sampler_ids[index]; + if (is_new) { + id = FindSampler(descriptor); + } + return &slot_samplers[id]; +} - /// Mark images in a range as modified from the CPU - void WriteMemory(VAddr cpu_addr, size_t size); +template +typename P::Sampler* TextureCache

::GetComputeSampler(u32 index) { + if (index > compute_sampler_table.Limit()) { + LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); + return &slot_samplers[NULL_SAMPLER_ID]; + } + const auto [descriptor, is_new] = compute_sampler_table.Read(index); + SamplerId& id = compute_sampler_ids[index]; + if (is_new) { + id = FindSampler(descriptor); + } + return &slot_samplers[id]; +} - /// Download contents of host images to guest memory in a region - void DownloadMemory(VAddr cpu_addr, size_t size); +template +void TextureCache

::SynchronizeGraphicsDescriptors() { + using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex; + const bool linked_tsc = maxwell3d.regs.sampler_index == SamplerIndex::ViaHeaderIndex; + const u32 tic_limit = maxwell3d.regs.tic.limit; + const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d.regs.tsc.limit; + if (graphics_sampler_table.Synchornize(maxwell3d.regs.tsc.Address(), tsc_limit)) { + graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); + } + if (graphics_image_table.Synchornize(maxwell3d.regs.tic.Address(), tic_limit)) { + graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); + } +} - /// Remove images in a region - void UnmapMemory(VAddr cpu_addr, size_t size); +template +void TextureCache

::SynchronizeComputeDescriptors() { + const bool linked_tsc = kepler_compute.launch_description.linked_tsc; + const u32 tic_limit = kepler_compute.regs.tic.limit; + const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute.regs.tsc.limit; + const GPUVAddr tsc_gpu_addr = kepler_compute.regs.tsc.Address(); + if (compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) { + compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); + } + if (compute_image_table.Synchornize(kepler_compute.regs.tic.Address(), tic_limit)) { + compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); + } +} - /// Remove images in a region - void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size); +template +void TextureCache

::UpdateRenderTargets(bool is_clear) { + using namespace VideoCommon::Dirty; + auto& flags = maxwell3d.dirty.flags; + if (!flags[Dirty::RenderTargets]) { + for (size_t index = 0; index < NUM_RT; ++index) { + ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; + PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id)); + } + const ImageViewId depth_buffer_id = render_targets.depth_buffer_id; + PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); + return; + } + flags[Dirty::RenderTargets] = false; - /// Blit an image with the given parameters - void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, - const Tegra::Engines::Fermi2D::Surface& src, - const Tegra::Engines::Fermi2D::Config& copy); + // Render target control is used on all render targets, so force look ups when this one is up + const bool force = flags[Dirty::RenderTargetControl]; + flags[Dirty::RenderTargetControl] = false; - /// Try to find a cached image view in the given CPU address - [[nodiscard]] ImageView* TryFindFramebufferImageView(VAddr cpu_addr); + for (size_t index = 0; index < NUM_RT; ++index) { + ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; + if (flags[Dirty::ColorBuffer0 + index] || force) { + flags[Dirty::ColorBuffer0 + index] = false; + BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear)); + } + PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id)); + } + if (flags[Dirty::ZetaBuffer] || force) { + flags[Dirty::ZetaBuffer] = false; + BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear)); + } + const ImageViewId depth_buffer_id = render_targets.depth_buffer_id; + PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); - /// Return true when there are uncommitted images to be downloaded - [[nodiscard]] bool HasUncommittedFlushes() const noexcept; + for (size_t index = 0; index < NUM_RT; ++index) { + render_targets.draw_buffers[index] = static_cast(maxwell3d.regs.rt_control.Map(index)); + } + render_targets.size = Extent2D{ + maxwell3d.regs.render_area.width, + maxwell3d.regs.render_area.height, + }; +} - /// Return true when the caller should wait for async downloads - [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept; +template +typename P::Framebuffer* TextureCache

::GetFramebuffer() { + return &slot_framebuffers[GetFramebufferId(render_targets)]; +} - /// Commit asynchronous downloads - void CommitAsyncFlushes(); +template +void TextureCache

::FillImageViews(DescriptorTable& table, + std::span cached_image_view_ids, + std::span indices, + std::span image_view_ids) { + ASSERT(indices.size() <= image_view_ids.size()); + do { + has_deleted_images = false; + std::ranges::transform(indices, image_view_ids.begin(), [&](u32 index) { + return VisitImageView(table, cached_image_view_ids, index); + }); + } while (has_deleted_images); +} - /// Pop asynchronous downloads - void PopAsyncFlushes(); +template +ImageViewId TextureCache

::VisitImageView(DescriptorTable& table, + std::span cached_image_view_ids, + u32 index) { + if (index > table.Limit()) { + LOG_DEBUG(HW_GPU, "Invalid image view index={}", index); + return NULL_IMAGE_VIEW_ID; + } + const auto [descriptor, is_new] = table.Read(index); + ImageViewId& image_view_id = cached_image_view_ids[index]; + if (is_new) { + image_view_id = FindImageView(descriptor); + } + if (image_view_id != NULL_IMAGE_VIEW_ID) { + PrepareImageView(image_view_id, false, false); + } + return image_view_id; +} - /// Return true when a CPU region is modified from the GPU - [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); +template +FramebufferId TextureCache

::GetFramebufferId(const RenderTargets& key) { + const auto [pair, is_new] = framebuffers.try_emplace(key); + FramebufferId& framebuffer_id = pair->second; + if (!is_new) { + return framebuffer_id; + } + std::array color_buffers; + std::ranges::transform(key.color_buffer_ids, color_buffers.begin(), + [this](ImageViewId id) { return id ? &slot_image_views[id] : nullptr; }); + ImageView* const depth_buffer = + key.depth_buffer_id ? &slot_image_views[key.depth_buffer_id] : nullptr; + framebuffer_id = slot_framebuffers.insert(runtime, color_buffers, depth_buffer, key); + return framebuffer_id; +} - std::mutex mutex; +template +void TextureCache

::WriteMemory(VAddr cpu_addr, size_t size) { + ForEachImageInRegion(cpu_addr, size, [this](ImageId image_id, Image& image) { + if (True(image.flags & ImageFlagBits::CpuModified)) { + return; + } + image.flags |= ImageFlagBits::CpuModified; + if (True(image.flags & ImageFlagBits::Tracked)) { + UntrackImage(image, image_id); + } + }); +} -private: - /// Iterate over all page indices in a range - template - static void ForEachCPUPage(VAddr addr, size_t size, Func&& func) { - static constexpr bool RETURNS_BOOL = std::is_same_v, bool>; - const u64 page_end = (addr + size - 1) >> PAGE_BITS; - for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) { - if constexpr (RETURNS_BOOL) { - if (func(page)) { - break; - } - } else { - func(page); - } +template +void TextureCache

::DownloadMemory(VAddr cpu_addr, size_t size) { + std::vector images; + ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) { + if (!image.IsSafeDownload()) { + return; } + image.flags &= ~ImageFlagBits::GpuModified; + images.push_back(image_id); + }); + if (images.empty()) { + return; } + std::ranges::sort(images, [this](ImageId lhs, ImageId rhs) { + return slot_images[lhs].modification_tick < slot_images[rhs].modification_tick; + }); + for (const ImageId image_id : images) { + Image& image = slot_images[image_id]; + auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); + const auto copies = FullDownloadCopies(image.info); + image.DownloadMemory(map, copies); + runtime.Finish(); + SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); + } +} - template - static void ForEachGPUPage(GPUVAddr addr, size_t size, Func&& func) { - static constexpr bool RETURNS_BOOL = std::is_same_v, bool>; - const u64 page_end = (addr + size - 1) >> PAGE_BITS; - for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) { - if constexpr (RETURNS_BOOL) { - if (func(page)) { - break; - } - } else { - func(page); - } +template +void TextureCache

::UnmapMemory(VAddr cpu_addr, size_t size) { + std::vector deleted_images; + ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); + for (const ImageId id : deleted_images) { + Image& image = slot_images[id]; + if (True(image.flags & ImageFlagBits::Tracked)) { + UntrackImage(image, id); } + UnregisterImage(id); + DeleteImage(id); } +} - /// Runs the Garbage Collector. - void RunGarbageCollector(); - - /// Fills image_view_ids in the image views in indices - void FillImageViews(DescriptorTable& table, - std::span cached_image_view_ids, std::span indices, - std::span image_view_ids); - - /// Find or create an image view in the guest descriptor table - ImageViewId VisitImageView(DescriptorTable& table, - std::span cached_image_view_ids, u32 index); - - /// Find or create a framebuffer with the given render target parameters - FramebufferId GetFramebufferId(const RenderTargets& key); +template +void TextureCache

::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) { + std::vector deleted_images; + ForEachImageInRegionGPU(gpu_addr, size, + [&](ImageId id, Image&) { deleted_images.push_back(id); }); + for (const ImageId id : deleted_images) { + Image& image = slot_images[id]; + if (True(image.flags & ImageFlagBits::Remapped)) { + continue; + } + image.flags |= ImageFlagBits::Remapped; + if (True(image.flags & ImageFlagBits::Tracked)) { + UntrackImage(image, id); + } + } +} - /// Refresh the contents (pixel data) of an image - void RefreshContents(Image& image, ImageId image_id); +template +void TextureCache

::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, + const Tegra::Engines::Fermi2D::Surface& src, + const Tegra::Engines::Fermi2D::Config& copy) { + const BlitImages images = GetBlitImages(dst, src); + const ImageId dst_id = images.dst_id; + const ImageId src_id = images.src_id; + PrepareImage(src_id, false, false); + PrepareImage(dst_id, true, false); + + ImageBase& dst_image = slot_images[dst_id]; + const ImageBase& src_image = slot_images[src_id]; + + // TODO: Deduplicate + const std::optional src_base = src_image.TryFindBase(src.Address()); + const SubresourceRange src_range{.base = src_base.value(), .extent = {1, 1}}; + const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range); + const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info); + const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples); + const Region2D src_region{ + Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y}, + Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y}, + }; - /// Upload data from guest to an image - template - void UploadImageContents(Image& image, StagingBuffer& staging_buffer); + const std::optional dst_base = dst_image.TryFindBase(dst.Address()); + const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}}; + const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range); + const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); + const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples); + const Region2D dst_region{ + Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y}, + Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y}, + }; - /// Find or create an image view from a guest descriptor - [[nodiscard]] ImageViewId FindImageView(const TICEntry& config); + // Always call this after src_framebuffer_id was queried, as the address might be invalidated. + Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; + if constexpr (FRAMEBUFFER_BLITS) { + // OpenGL blits from framebuffers, not images + Framebuffer* const src_framebuffer = &slot_framebuffers[src_framebuffer_id]; + runtime.BlitFramebuffer(dst_framebuffer, src_framebuffer, dst_region, src_region, + copy.filter, copy.operation); + } else { + // Vulkan can blit images, but it lacks format reinterpretations + // Provide a framebuffer in case it's necessary + ImageView& dst_view = slot_image_views[dst_view_id]; + ImageView& src_view = slot_image_views[src_view_id]; + runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter, + copy.operation); + } +} - /// Create a new image view from a guest descriptor - [[nodiscard]] ImageViewId CreateImageView(const TICEntry& config); +template +typename P::ImageView* TextureCache

::TryFindFramebufferImageView(VAddr cpu_addr) { + // TODO: Properly implement this + const auto it = page_table.find(cpu_addr >> PAGE_BITS); + if (it == page_table.end()) { + return nullptr; + } + const auto& image_map_ids = it->second; + for (const ImageMapId map_id : image_map_ids) { + const ImageMapView& map = slot_map_views[map_id]; + const ImageBase& image = slot_images[map.image_id]; + if (image.cpu_addr != cpu_addr) { + continue; + } + if (image.image_view_ids.empty()) { + continue; + } + return &slot_image_views[image.image_view_ids.at(0)]; + } + return nullptr; +} - /// Find or create an image from the given parameters - [[nodiscard]] ImageId FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr, - RelaxedOptions options = RelaxedOptions{}); +template +bool TextureCache

::HasUncommittedFlushes() const noexcept { + return !uncommitted_downloads.empty(); +} - /// Find an image from the given parameters - [[nodiscard]] ImageId FindImage(const ImageInfo& info, GPUVAddr gpu_addr, - RelaxedOptions options); +template +bool TextureCache

::ShouldWaitAsyncFlushes() const noexcept { + return !committed_downloads.empty() && !committed_downloads.front().empty(); +} - /// Create an image from the given parameters - [[nodiscard]] ImageId InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, - RelaxedOptions options); +template +void TextureCache

::CommitAsyncFlushes() { + // This is intentionally passing the value by copy + committed_downloads.push(uncommitted_downloads); + uncommitted_downloads.clear(); +} - /// Create a new image and join perfectly matching existing images - /// Remove joined images from the cache - [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); +template +void TextureCache

::PopAsyncFlushes() { + if (committed_downloads.empty()) { + return; + } + const std::span download_ids = committed_downloads.front(); + if (download_ids.empty()) { + committed_downloads.pop(); + return; + } + size_t total_size_bytes = 0; + for (const ImageId image_id : download_ids) { + total_size_bytes += slot_images[image_id].unswizzled_size_bytes; + } + auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); + const size_t original_offset = download_map.offset; + for (const ImageId image_id : download_ids) { + Image& image = slot_images[image_id]; + const auto copies = FullDownloadCopies(image.info); + image.DownloadMemory(download_map, copies); + download_map.offset += image.unswizzled_size_bytes; + } + // Wait for downloads to finish + runtime.Finish(); + + download_map.offset = original_offset; + std::span download_span = download_map.mapped_span; + for (const ImageId image_id : download_ids) { + const ImageBase& image = slot_images[image_id]; + const auto copies = FullDownloadCopies(image.info); + SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, download_span); + download_map.offset += image.unswizzled_size_bytes; + download_span = download_span.subspan(image.unswizzled_size_bytes); + } + committed_downloads.pop(); +} - /// Return a blit image pair from the given guest blit parameters - [[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst, - const Tegra::Engines::Fermi2D::Surface& src); +template +bool TextureCache

::IsRegionGpuModified(VAddr addr, size_t size) { + bool is_modified = false; + ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) { + if (False(image.flags & ImageFlagBits::GpuModified)) { + return false; + } + is_modified = true; + return true; + }); + return is_modified; +} - /// Find or create a sampler from a guest descriptor sampler - [[nodiscard]] SamplerId FindSampler(const TSCEntry& config); +template +void TextureCache

::RefreshContents(Image& image, ImageId image_id) { + if (False(image.flags & ImageFlagBits::CpuModified)) { + // Only upload modified images + return; + } + image.flags &= ~ImageFlagBits::CpuModified; + TrackImage(image, image_id); - /// Find or create an image view for the given color buffer index - [[nodiscard]] ImageViewId FindColorBuffer(size_t index, bool is_clear); + if (image.info.num_samples > 1) { + LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); + return; + } + auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image)); + UploadImageContents(image, staging); + runtime.InsertUploadMemoryBarrier(); +} - /// Find or create an image view for the depth buffer - [[nodiscard]] ImageViewId FindDepthBuffer(bool is_clear); +template +template +void TextureCache

::UploadImageContents(Image& image, StagingBuffer& staging) { + const std::span mapped_span = staging.mapped_span; + const GPUVAddr gpu_addr = image.gpu_addr; + + if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { + gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); + const auto uploads = FullUploadSwizzles(image.info); + runtime.AccelerateImageUpload(image, staging, uploads); + } else if (True(image.flags & ImageFlagBits::Converted)) { + std::vector unswizzled_data(image.unswizzled_size_bytes); + auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); + ConvertImage(unswizzled_data, image.info, mapped_span, copies); + image.UploadMemory(staging, copies); + } else { + const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); + image.UploadMemory(staging, copies); + } +} - /// Find or create a view for a render target with the given image parameters - [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, - bool is_clear); +template +ImageViewId TextureCache

::FindImageView(const TICEntry& config) { + if (!IsValidEntry(gpu_memory, config)) { + return NULL_IMAGE_VIEW_ID; + } + const auto [pair, is_new] = image_views.try_emplace(config); + ImageViewId& image_view_id = pair->second; + if (is_new) { + image_view_id = CreateImageView(config); + } + return image_view_id; +} - /// Iterates over all the images in a region calling func - template - void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func); +template +ImageViewId TextureCache

::CreateImageView(const TICEntry& config) { + const ImageInfo info(config); + if (info.type == ImageType::Buffer) { + const ImageViewInfo view_info(config, 0); + return slot_image_views.insert(runtime, info, view_info, config.Address()); + } + const u32 layer_offset = config.BaseLayer() * info.layer_stride; + const GPUVAddr image_gpu_addr = config.Address() - layer_offset; + const ImageId image_id = FindOrInsertImage(info, image_gpu_addr); + if (!image_id) { + return NULL_IMAGE_VIEW_ID; + } + ImageBase& image = slot_images[image_id]; + const SubresourceBase base = image.TryFindBase(config.Address()).value(); + ASSERT(base.level == 0); + const ImageViewInfo view_info(config, base.layer); + const ImageViewId image_view_id = FindOrEmplaceImageView(image_id, view_info); + ImageViewBase& image_view = slot_image_views[image_view_id]; + image_view.flags |= ImageViewFlagBits::Strong; + image.flags |= ImageFlagBits::Strong; + return image_view_id; +} - template - void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func); +template +ImageId TextureCache

::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr, + RelaxedOptions options) { + if (const ImageId image_id = FindImage(info, gpu_addr, options); image_id) { + return image_id; + } + return InsertImage(info, gpu_addr, options); +} - template - void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func); +template +ImageId TextureCache

::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, + RelaxedOptions options) { + std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + if (!cpu_addr) { + cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); + if (!cpu_addr) { + return ImageId{}; + } + } + const bool broken_views = runtime.HasBrokenTextureViewFormats(); + const bool native_bgr = runtime.HasNativeBgr(); + ImageId image_id; + const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { + if (True(existing_image.flags & ImageFlagBits::Remapped)) { + return false; + } + if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) { + const bool strict_size = False(options & RelaxedOptions::Size) && + True(existing_image.flags & ImageFlagBits::Strong); + const ImageInfo& existing = existing_image.info; + if (existing_image.gpu_addr == gpu_addr && existing.type == info.type && + existing.pitch == info.pitch && + IsPitchLinearSameSize(existing, info, strict_size) && + IsViewCompatible(existing.format, info.format, broken_views, native_bgr)) { + image_id = existing_image_id; + return true; + } + } else if (IsSubresource(info, existing_image, gpu_addr, options, broken_views, + native_bgr)) { + image_id = existing_image_id; + return true; + } + return false; + }; + ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda); + return image_id; +} - /// Iterates over all the images in a region calling func - template - void ForEachSparseSegment(ImageBase& image, Func&& func); +template +ImageId TextureCache

::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, + RelaxedOptions options) { + std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + if (!cpu_addr) { + const auto size = CalculateGuestSizeInBytes(info); + cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size); + if (!cpu_addr) { + const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space; + virtual_invalid_space += Common::AlignUp(size, 32); + cpu_addr = std::optional(fake_addr); + } + } + ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); + const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr); + const Image& image = slot_images[image_id]; + // Using "image.gpu_addr" instead of "gpu_addr" is important because it might be different + const auto [it, is_new] = image_allocs_table.try_emplace(image.gpu_addr); + if (is_new) { + it->second = slot_image_allocs.insert(); + } + slot_image_allocs[it->second].images.push_back(image_id); + return image_id; +} - /// Find or create an image view in the given image with the passed parameters - [[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info); +template +ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr) { + ImageInfo new_info = info; + const size_t size_bytes = CalculateGuestSizeInBytes(new_info); + const bool broken_views = runtime.HasBrokenTextureViewFormats(); + const bool native_bgr = runtime.HasNativeBgr(); + std::vector overlap_ids; + std::unordered_set overlaps_found; + std::vector left_aliased_ids; + std::vector right_aliased_ids; + std::unordered_set ignore_textures; + std::vector bad_overlap_ids; + const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) { + if (True(overlap.flags & ImageFlagBits::Remapped)) { + ignore_textures.insert(overlap_id); + return; + } + if (info.type == ImageType::Linear) { + if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) { + // Alias linear images with the same pitch + left_aliased_ids.push_back(overlap_id); + } + return; + } + overlaps_found.insert(overlap_id); + static constexpr bool strict_size = true; + const std::optional solution = ResolveOverlap( + new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr); + if (solution) { + gpu_addr = solution->gpu_addr; + cpu_addr = solution->cpu_addr; + new_info.resources = solution->resources; + overlap_ids.push_back(overlap_id); + return; + } + static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format; + const ImageBase new_image_base(new_info, gpu_addr, cpu_addr); + if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) { + left_aliased_ids.push_back(overlap_id); + overlap.flags |= ImageFlagBits::Alias; + } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options, + broken_views, native_bgr)) { + right_aliased_ids.push_back(overlap_id); + overlap.flags |= ImageFlagBits::Alias; + } else { + bad_overlap_ids.push_back(overlap_id); + overlap.flags |= ImageFlagBits::BadOverlap; + } + }; + ForEachImageInRegion(cpu_addr, size_bytes, region_check); + const auto region_check_gpu = [&](ImageId overlap_id, ImageBase& overlap) { + if (!overlaps_found.contains(overlap_id)) { + if (True(overlap.flags & ImageFlagBits::Remapped)) { + ignore_textures.insert(overlap_id); + } + if (overlap.gpu_addr == gpu_addr && overlap.guest_size_bytes == size_bytes) { + ignore_textures.insert(overlap_id); + } + } + }; + ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu); + const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); + Image& new_image = slot_images[new_image_id]; - /// Register image in the page table - void RegisterImage(ImageId image); + if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) { + new_image.flags |= ImageFlagBits::Sparse; + } - /// Unregister image from the page table - void UnregisterImage(ImageId image); + for (const ImageId overlap_id : ignore_textures) { + Image& overlap = slot_images[overlap_id]; + if (True(overlap.flags & ImageFlagBits::GpuModified)) { + UNIMPLEMENTED(); + } + if (True(overlap.flags & ImageFlagBits::Tracked)) { + UntrackImage(overlap, overlap_id); + } + UnregisterImage(overlap_id); + DeleteImage(overlap_id); + } - /// Track CPU reads and writes for image - void TrackImage(ImageBase& image, ImageId image_id); + // TODO: Only upload what we need + RefreshContents(new_image, new_image_id); + + for (const ImageId overlap_id : overlap_ids) { + Image& overlap = slot_images[overlap_id]; + if (overlap.info.num_samples != new_image.info.num_samples) { + LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented"); + } else { + const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value(); + const auto copies = MakeShrinkImageCopies(new_info, overlap.info, base); + runtime.CopyImage(new_image, overlap, copies); + } + if (True(overlap.flags & ImageFlagBits::Tracked)) { + UntrackImage(overlap, overlap_id); + } + UnregisterImage(overlap_id); + DeleteImage(overlap_id); + } + ImageBase& new_image_base = new_image; + for (const ImageId aliased_id : right_aliased_ids) { + ImageBase& aliased = slot_images[aliased_id]; + AddImageAlias(new_image_base, aliased, new_image_id, aliased_id); + new_image.flags |= ImageFlagBits::Alias; + } + for (const ImageId aliased_id : left_aliased_ids) { + ImageBase& aliased = slot_images[aliased_id]; + AddImageAlias(aliased, new_image_base, aliased_id, new_image_id); + new_image.flags |= ImageFlagBits::Alias; + } + for (const ImageId aliased_id : bad_overlap_ids) { + ImageBase& aliased = slot_images[aliased_id]; + aliased.overlapping_images.push_back(new_image_id); + new_image.overlapping_images.push_back(aliased_id); + new_image.flags |= ImageFlagBits::BadOverlap; + } + RegisterImage(new_image_id); + return new_image_id; +} - /// Stop tracking CPU reads and writes for image - void UntrackImage(ImageBase& image, ImageId image_id); +template +typename TextureCache

::BlitImages TextureCache

::GetBlitImages( + const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src) { + static constexpr auto FIND_OPTIONS = RelaxedOptions::Format | RelaxedOptions::Samples; + const GPUVAddr dst_addr = dst.Address(); + const GPUVAddr src_addr = src.Address(); + ImageInfo dst_info(dst); + ImageInfo src_info(src); + ImageId dst_id; + ImageId src_id; + do { + has_deleted_images = false; + dst_id = FindImage(dst_info, dst_addr, FIND_OPTIONS); + src_id = FindImage(src_info, src_addr, FIND_OPTIONS); + const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr; + const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr; + DeduceBlitImages(dst_info, src_info, dst_image, src_image); + if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) { + continue; + } + if (!dst_id) { + dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{}); + } + if (!src_id) { + src_id = InsertImage(src_info, src_addr, RelaxedOptions{}); + } + } while (has_deleted_images); + return BlitImages{ + .dst_id = dst_id, + .src_id = src_id, + .dst_format = dst_info.format, + .src_format = src_info.format, + }; +} - /// Delete image from the cache - void DeleteImage(ImageId image); +template +SamplerId TextureCache

::FindSampler(const TSCEntry& config) { + if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) { + return NULL_SAMPLER_ID; + } + const auto [pair, is_new] = samplers.try_emplace(config); + if (is_new) { + pair->second = slot_samplers.insert(runtime, config); + } + return pair->second; +} - /// Remove image views references from the cache - void RemoveImageViewReferences(std::span removed_views); +template +ImageViewId TextureCache

::FindColorBuffer(size_t index, bool is_clear) { + const auto& regs = maxwell3d.regs; + if (index >= regs.rt_control.count) { + return ImageViewId{}; + } + const auto& rt = regs.rt[index]; + const GPUVAddr gpu_addr = rt.Address(); + if (gpu_addr == 0) { + return ImageViewId{}; + } + if (rt.format == Tegra::RenderTargetFormat::NONE) { + return ImageViewId{}; + } + const ImageInfo info(regs, index); + return FindRenderTargetView(info, gpu_addr, is_clear); +} - /// Remove framebuffers using the given image views from the cache - void RemoveFramebuffers(std::span removed_views); +template +ImageViewId TextureCache

::FindDepthBuffer(bool is_clear) { + const auto& regs = maxwell3d.regs; + if (!regs.zeta_enable) { + return ImageViewId{}; + } + const GPUVAddr gpu_addr = regs.zeta.Address(); + if (gpu_addr == 0) { + return ImageViewId{}; + } + const ImageInfo info(regs); + return FindRenderTargetView(info, gpu_addr, is_clear); +} - /// Mark an image as modified from the GPU - void MarkModification(ImageBase& image) noexcept; +template +ImageViewId TextureCache

::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, + bool is_clear) { + const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{}; + const ImageId image_id = FindOrInsertImage(info, gpu_addr, options); + if (!image_id) { + return NULL_IMAGE_VIEW_ID; + } + Image& image = slot_images[image_id]; + const ImageViewType view_type = RenderTargetImageViewType(info); + SubresourceBase base; + if (image.info.type == ImageType::Linear) { + base = SubresourceBase{.level = 0, .layer = 0}; + } else { + base = image.TryFindBase(gpu_addr).value(); + } + const s32 layers = image.info.type == ImageType::e3D ? info.size.depth : info.resources.layers; + const SubresourceRange range{ + .base = base, + .extent = {.levels = 1, .layers = layers}, + }; + return FindOrEmplaceImageView(image_id, ImageViewInfo(view_type, info.format, range)); +} - /// Synchronize image aliases, copying data if needed - void SynchronizeAliases(ImageId image_id); +template +template +void TextureCache

::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) { + using FuncReturn = typename std::invoke_result::type; + static constexpr bool BOOL_BREAK = std::is_same_v; + boost::container::small_vector images; + boost::container::small_vector maps; + ForEachCPUPage(cpu_addr, size, [this, &images, &maps, cpu_addr, size, func](u64 page) { + const auto it = page_table.find(page); + if (it == page_table.end()) { + if constexpr (BOOL_BREAK) { + return false; + } else { + return; + } + } + for (const ImageMapId map_id : it->second) { + ImageMapView& map = slot_map_views[map_id]; + if (map.picked) { + continue; + } + if (!map.Overlaps(cpu_addr, size)) { + continue; + } + map.picked = true; + maps.push_back(map_id); + Image& image = slot_images[map.image_id]; + if (True(image.flags & ImageFlagBits::Picked)) { + continue; + } + image.flags |= ImageFlagBits::Picked; + images.push_back(map.image_id); + if constexpr (BOOL_BREAK) { + if (func(map.image_id, image)) { + return true; + } + } else { + func(map.image_id, image); + } + } + if constexpr (BOOL_BREAK) { + return false; + } + }); + for (const ImageId image_id : images) { + slot_images[image_id].flags &= ~ImageFlagBits::Picked; + } + for (const ImageMapId map_id : maps) { + slot_map_views[map_id].picked = false; + } +} - /// Prepare an image to be used - void PrepareImage(ImageId image_id, bool is_modification, bool invalidate); +template +template +void TextureCache

::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func) { + using FuncReturn = typename std::invoke_result::type; + static constexpr bool BOOL_BREAK = std::is_same_v; + boost::container::small_vector images; + ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { + const auto it = gpu_page_table.find(page); + if (it == gpu_page_table.end()) { + if constexpr (BOOL_BREAK) { + return false; + } else { + return; + } + } + for (const ImageId image_id : it->second) { + Image& image = slot_images[image_id]; + if (True(image.flags & ImageFlagBits::Picked)) { + continue; + } + if (!image.OverlapsGPU(gpu_addr, size)) { + continue; + } + image.flags |= ImageFlagBits::Picked; + images.push_back(image_id); + if constexpr (BOOL_BREAK) { + if (func(image_id, image)) { + return true; + } + } else { + func(image_id, image); + } + } + if constexpr (BOOL_BREAK) { + return false; + } + }); + for (const ImageId image_id : images) { + slot_images[image_id].flags &= ~ImageFlagBits::Picked; + } +} - /// Prepare an image view to be used - void PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate); +template +template +void TextureCache

::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func) { + using FuncReturn = typename std::invoke_result::type; + static constexpr bool BOOL_BREAK = std::is_same_v; + boost::container::small_vector images; + ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { + const auto it = sparse_page_table.find(page); + if (it == sparse_page_table.end()) { + if constexpr (BOOL_BREAK) { + return false; + } else { + return; + } + } + for (const ImageId image_id : it->second) { + Image& image = slot_images[image_id]; + if (True(image.flags & ImageFlagBits::Picked)) { + continue; + } + if (!image.OverlapsGPU(gpu_addr, size)) { + continue; + } + image.flags |= ImageFlagBits::Picked; + images.push_back(image_id); + if constexpr (BOOL_BREAK) { + if (func(image_id, image)) { + return true; + } + } else { + func(image_id, image); + } + } + if constexpr (BOOL_BREAK) { + return false; + } + }); + for (const ImageId image_id : images) { + slot_images[image_id].flags &= ~ImageFlagBits::Picked; + } +} - /// Execute copies from one image to the other, even if they are incompatible - void CopyImage(ImageId dst_id, ImageId src_id, std::span copies); +template +template +void TextureCache

::ForEachSparseSegment(ImageBase& image, Func&& func) { + using FuncReturn = typename std::invoke_result::type; + static constexpr bool RETURNS_BOOL = std::is_same_v; + const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes); + for (auto& segment : segments) { + const auto gpu_addr = segment.first; + const auto size = segment.second; + std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + ASSERT(cpu_addr); + if constexpr (RETURNS_BOOL) { + if (func(gpu_addr, *cpu_addr, size)) { + break; + } + } else { + func(gpu_addr, *cpu_addr, size); + } + } +} - /// Bind an image view as render target, downloading resources preemtively if needed - void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id); +template +ImageViewId TextureCache

::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) { + Image& image = slot_images[image_id]; + if (const ImageViewId image_view_id = image.FindView(info); image_view_id) { + return image_view_id; + } + const ImageViewId image_view_id = slot_image_views.insert(runtime, info, image_id, image); + image.InsertView(info, image_view_id); + return image_view_id; +} - /// Create a render target from a given image and image view parameters - [[nodiscard]] std::pair RenderTargetFromImage( - ImageId, const ImageViewInfo& view_info); +template +void TextureCache

::RegisterImage(ImageId image_id) { + ImageBase& image = slot_images[image_id]; + ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), + "Trying to register an already registered image"); + image.flags |= ImageFlagBits::Registered; + u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); + if ((IsPixelFormatASTC(image.info.format) && + True(image.flags & ImageFlagBits::AcceleratedUpload)) || + True(image.flags & ImageFlagBits::Converted)) { + tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); + } + total_used_memory += Common::AlignUp(tentative_size, 1024); + ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, + [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); }); + if (False(image.flags & ImageFlagBits::Sparse)) { + auto map_id = + slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id); + ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, + [this, map_id](u64 page) { page_table[page].push_back(map_id); }); + image.map_view_id = map_id; + return; + } + std::vector sparse_maps{}; + ForEachSparseSegment( + image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { + auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); + ForEachCPUPage(cpu_addr, size, + [this, map_id](u64 page) { page_table[page].push_back(map_id); }); + sparse_maps.push_back(map_id); + }); + sparse_views.emplace(image_id, std::move(sparse_maps)); + ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, + [this, image_id](u64 page) { sparse_page_table[page].push_back(image_id); }); +} - /// Returns true if the current clear parameters clear the whole image of a given image view - [[nodiscard]] bool IsFullClear(ImageViewId id); +template +void TextureCache

::UnregisterImage(ImageId image_id) { + Image& image = slot_images[image_id]; + ASSERT_MSG(True(image.flags & ImageFlagBits::Registered), + "Trying to unregister an already registered image"); + image.flags &= ~ImageFlagBits::Registered; + image.flags &= ~ImageFlagBits::BadOverlap; + u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); + if ((IsPixelFormatASTC(image.info.format) && + True(image.flags & ImageFlagBits::AcceleratedUpload)) || + True(image.flags & ImageFlagBits::Converted)) { + tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); + } + total_used_memory -= Common::AlignUp(tentative_size, 1024); + const auto& clear_page_table = + [this, image_id]( + u64 page, + std::unordered_map, IdentityHash>& selected_page_table) { + const auto page_it = selected_page_table.find(page); + if (page_it == selected_page_table.end()) { + UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); + return; + } + std::vector& image_ids = page_it->second; + const auto vector_it = std::ranges::find(image_ids, image_id); + if (vector_it == image_ids.end()) { + UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", + page << PAGE_BITS); + return; + } + image_ids.erase(vector_it); + }; + ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, + [this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); }); + if (False(image.flags & ImageFlagBits::Sparse)) { + const auto map_id = image.map_view_id; + ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) { + const auto page_it = page_table.find(page); + if (page_it == page_table.end()) { + UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); + return; + } + std::vector& image_map_ids = page_it->second; + const auto vector_it = std::ranges::find(image_map_ids, map_id); + if (vector_it == image_map_ids.end()) { + UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", + page << PAGE_BITS); + return; + } + image_map_ids.erase(vector_it); + }); + slot_map_views.erase(map_id); + return; + } + ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) { + clear_page_table(page, sparse_page_table); + }); + auto it = sparse_views.find(image_id); + ASSERT(it != sparse_views.end()); + auto& sparse_maps = it->second; + for (auto& map_view_id : sparse_maps) { + const auto& map_range = slot_map_views[map_view_id]; + const VAddr cpu_addr = map_range.cpu_addr; + const std::size_t size = map_range.size; + ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) { + const auto page_it = page_table.find(page); + if (page_it == page_table.end()) { + UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); + return; + } + std::vector& image_map_ids = page_it->second; + auto vector_it = image_map_ids.begin(); + while (vector_it != image_map_ids.end()) { + ImageMapView& map = slot_map_views[*vector_it]; + if (map.image_id != image_id) { + vector_it++; + continue; + } + if (!map.picked) { + map.picked = true; + } + vector_it = image_map_ids.erase(vector_it); + } + }); + slot_map_views.erase(map_view_id); + } + sparse_views.erase(it); +} - Runtime& runtime; - VideoCore::RasterizerInterface& rasterizer; - Tegra::Engines::Maxwell3D& maxwell3d; - Tegra::Engines::KeplerCompute& kepler_compute; - Tegra::MemoryManager& gpu_memory; +template +void TextureCache

::TrackImage(ImageBase& image, ImageId image_id) { + ASSERT(False(image.flags & ImageFlagBits::Tracked)); + image.flags |= ImageFlagBits::Tracked; + if (False(image.flags & ImageFlagBits::Sparse)) { + rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); + return; + } + if (True(image.flags & ImageFlagBits::Registered)) { + auto it = sparse_views.find(image_id); + ASSERT(it != sparse_views.end()); + auto& sparse_maps = it->second; + for (auto& map_view_id : sparse_maps) { + const auto& map = slot_map_views[map_view_id]; + const VAddr cpu_addr = map.cpu_addr; + const std::size_t size = map.size; + rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); + } + return; + } + ForEachSparseSegment(image, + [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { + rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); + }); +} - DescriptorTable graphics_image_table{gpu_memory}; - DescriptorTable graphics_sampler_table{gpu_memory}; - std::vector graphics_sampler_ids; - std::vector graphics_image_view_ids; +template +void TextureCache

::UntrackImage(ImageBase& image, ImageId image_id) { + ASSERT(True(image.flags & ImageFlagBits::Tracked)); + image.flags &= ~ImageFlagBits::Tracked; + if (False(image.flags & ImageFlagBits::Sparse)) { + rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); + return; + } + ASSERT(True(image.flags & ImageFlagBits::Registered)); + auto it = sparse_views.find(image_id); + ASSERT(it != sparse_views.end()); + auto& sparse_maps = it->second; + for (auto& map_view_id : sparse_maps) { + const auto& map = slot_map_views[map_view_id]; + const VAddr cpu_addr = map.cpu_addr; + const std::size_t size = map.size; + rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); + } +} - DescriptorTable compute_image_table{gpu_memory}; - DescriptorTable compute_sampler_table{gpu_memory}; - std::vector compute_sampler_ids; - std::vector compute_image_view_ids; +template +void TextureCache

::DeleteImage(ImageId image_id) { + ImageBase& image = slot_images[image_id]; + const GPUVAddr gpu_addr = image.gpu_addr; + const auto alloc_it = image_allocs_table.find(gpu_addr); + if (alloc_it == image_allocs_table.end()) { + UNREACHABLE_MSG("Trying to delete an image alloc that does not exist in address 0x{:x}", + gpu_addr); + return; + } + const ImageAllocId alloc_id = alloc_it->second; + std::vector& alloc_images = slot_image_allocs[alloc_id].images; + const auto alloc_image_it = std::ranges::find(alloc_images, image_id); + if (alloc_image_it == alloc_images.end()) { + UNREACHABLE_MSG("Trying to delete an image that does not exist"); + return; + } + ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked"); + ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered"); + + // Mark render targets as dirty + auto& dirty = maxwell3d.dirty.flags; + dirty[Dirty::RenderTargets] = true; + dirty[Dirty::ZetaBuffer] = true; + for (size_t rt = 0; rt < NUM_RT; ++rt) { + dirty[Dirty::ColorBuffer0 + rt] = true; + } + const std::span image_view_ids = image.image_view_ids; + for (const ImageViewId image_view_id : image_view_ids) { + std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{}); + if (render_targets.depth_buffer_id == image_view_id) { + render_targets.depth_buffer_id = ImageViewId{}; + } + } + RemoveImageViewReferences(image_view_ids); + RemoveFramebuffers(image_view_ids); + + for (const AliasedImage& alias : image.aliased_images) { + ImageBase& other_image = slot_images[alias.id]; + [[maybe_unused]] const size_t num_removed_aliases = + std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) { + return other_alias.id == image_id; + }); + other_image.CheckAliasState(); + ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}", + num_removed_aliases); + } + for (const ImageId overlap_id : image.overlapping_images) { + ImageBase& other_image = slot_images[overlap_id]; + [[maybe_unused]] const size_t num_removed_overlaps = std::erase_if( + other_image.overlapping_images, + [image_id](const ImageId other_overlap_id) { return other_overlap_id == image_id; }); + other_image.CheckBadOverlapState(); + ASSERT_MSG(num_removed_overlaps == 1, "Invalid number of removed overlapps: {}", + num_removed_overlaps); + } + for (const ImageViewId image_view_id : image_view_ids) { + sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); + slot_image_views.erase(image_view_id); + } + sentenced_images.Push(std::move(slot_images[image_id])); + slot_images.erase(image_id); - RenderTargets render_targets; + alloc_images.erase(alloc_image_it); + if (alloc_images.empty()) { + image_allocs_table.erase(alloc_it); + } + if constexpr (ENABLE_VALIDATION) { + std::ranges::fill(graphics_image_view_ids, CORRUPT_ID); + std::ranges::fill(compute_image_view_ids, CORRUPT_ID); + } + graphics_image_table.Invalidate(); + compute_image_table.Invalidate(); + has_deleted_images = true; +} - std::unordered_map image_views; - std::unordered_map samplers; - std::unordered_map framebuffers; +template +void TextureCache

::RemoveImageViewReferences(std::span removed_views) { + auto it = image_views.begin(); + while (it != image_views.end()) { + const auto found = std::ranges::find(removed_views, it->second); + if (found != removed_views.end()) { + it = image_views.erase(it); + } else { + ++it; + } + } +} - std::unordered_map, IdentityHash> page_table; - std::unordered_map, IdentityHash> gpu_page_table; - std::unordered_map, IdentityHash> sparse_page_table; +template +void TextureCache

::RemoveFramebuffers(std::span removed_views) { + auto it = framebuffers.begin(); + while (it != framebuffers.end()) { + if (it->first.Contains(removed_views)) { + it = framebuffers.erase(it); + } else { + ++it; + } + } +} - std::unordered_map> sparse_views; +template +void TextureCache

::MarkModification(ImageBase& image) noexcept { + image.flags |= ImageFlagBits::GpuModified; + image.modification_tick = ++modification_tick; +} - VAddr virtual_invalid_space{}; +template +void TextureCache

::SynchronizeAliases(ImageId image_id) { + boost::container::small_vector aliased_images; + ImageBase& image = slot_images[image_id]; + u64 most_recent_tick = image.modification_tick; + for (const AliasedImage& aliased : image.aliased_images) { + ImageBase& aliased_image = slot_images[aliased.id]; + if (image.modification_tick < aliased_image.modification_tick) { + most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); + aliased_images.push_back(&aliased); + } + } + if (aliased_images.empty()) { + return; + } + image.modification_tick = most_recent_tick; + std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) { + const ImageBase& lhs_image = slot_images[lhs->id]; + const ImageBase& rhs_image = slot_images[rhs->id]; + return lhs_image.modification_tick < rhs_image.modification_tick; + }); + for (const AliasedImage* const aliased : aliased_images) { + CopyImage(image_id, aliased->id, aliased->copies); + } +} - bool has_deleted_images = false; - u64 total_used_memory = 0; - u64 minimum_memory; - u64 expected_memory; - u64 critical_memory; +template +void TextureCache

::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) { + Image& image = slot_images[image_id]; + if (invalidate) { + image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified); + if (False(image.flags & ImageFlagBits::Tracked)) { + TrackImage(image, image_id); + } + } else { + RefreshContents(image, image_id); + SynchronizeAliases(image_id); + } + if (is_modification) { + MarkModification(image); + } + image.frame_tick = frame_tick; +} - SlotVector slot_images; - SlotVector slot_map_views; - SlotVector slot_image_views; - SlotVector slot_image_allocs; - SlotVector slot_samplers; - SlotVector slot_framebuffers; +template +void TextureCache

::PrepareImageView(ImageViewId image_view_id, bool is_modification, + bool invalidate) { + if (!image_view_id) { + return; + } + const ImageViewBase& image_view = slot_image_views[image_view_id]; + if (image_view.IsBuffer()) { + return; + } + PrepareImage(image_view.image_id, is_modification, invalidate); +} - // TODO: This data structure is not optimal and it should be reworked - std::vector uncommitted_downloads; - std::queue> committed_downloads; +template +void TextureCache

::CopyImage(ImageId dst_id, ImageId src_id, std::span copies) { + Image& dst = slot_images[dst_id]; + Image& src = slot_images[src_id]; + const auto dst_format_type = GetFormatType(dst.info.format); + const auto src_format_type = GetFormatType(src.info.format); + if (src_format_type == dst_format_type) { + if constexpr (HAS_EMULATED_COPIES) { + if (!runtime.CanImageBeCopied(dst, src)) { + return runtime.EmulateCopyImage(dst, src, copies); + } + } + return runtime.CopyImage(dst, src, copies); + } + UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D); + UNIMPLEMENTED_IF(src.info.type != ImageType::e2D); + for (const ImageCopy& copy : copies) { + UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1); + UNIMPLEMENTED_IF(copy.src_subresource.num_layers != 1); + UNIMPLEMENTED_IF(copy.src_offset != Offset3D{}); + UNIMPLEMENTED_IF(copy.dst_offset != Offset3D{}); + + const SubresourceBase dst_base{ + .level = copy.dst_subresource.base_level, + .layer = copy.dst_subresource.base_layer, + }; + const SubresourceBase src_base{ + .level = copy.src_subresource.base_level, + .layer = copy.src_subresource.base_layer, + }; + const SubresourceExtent dst_extent{.levels = 1, .layers = 1}; + const SubresourceExtent src_extent{.levels = 1, .layers = 1}; + const SubresourceRange dst_range{.base = dst_base, .extent = dst_extent}; + const SubresourceRange src_range{.base = src_base, .extent = src_extent}; + const ImageViewInfo dst_view_info(ImageViewType::e2D, dst.info.format, dst_range); + const ImageViewInfo src_view_info(ImageViewType::e2D, src.info.format, src_range); + const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); + Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; + const ImageViewId src_view_id = FindOrEmplaceImageView(src_id, src_view_info); + ImageView& dst_view = slot_image_views[dst_view_id]; + ImageView& src_view = slot_image_views[src_view_id]; + [[maybe_unused]] const Extent3D expected_size{ + .width = std::min(dst_view.size.width, src_view.size.width), + .height = std::min(dst_view.size.height, src_view.size.height), + .depth = std::min(dst_view.size.depth, src_view.size.depth), + }; + UNIMPLEMENTED_IF(copy.extent != expected_size); + + runtime.ConvertImage(dst_framebuffer, dst_view, src_view); + } +} - static constexpr size_t TICKS_TO_DESTROY = 6; - DelayedDestructionRing sentenced_images; - DelayedDestructionRing sentenced_image_view; - DelayedDestructionRing sentenced_framebuffers; +template +void TextureCache

::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id) { + if (*old_id == new_id) { + return; + } + if (*old_id) { + const ImageViewBase& old_view = slot_image_views[*old_id]; + if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) { + uncommitted_downloads.push_back(old_view.image_id); + } + } + *old_id = new_id; +} - std::unordered_map image_allocs_table; +template +std::pair TextureCache

::RenderTargetFromImage( + ImageId image_id, const ImageViewInfo& view_info) { + const ImageViewId view_id = FindOrEmplaceImageView(image_id, view_info); + const ImageBase& image = slot_images[image_id]; + const bool is_color = GetFormatType(image.info.format) == SurfaceType::ColorTexture; + const ImageViewId color_view_id = is_color ? view_id : ImageViewId{}; + const ImageViewId depth_view_id = is_color ? ImageViewId{} : view_id; + const Extent3D extent = MipSize(image.info.size, view_info.range.base.level); + const u32 num_samples = image.info.num_samples; + const auto [samples_x, samples_y] = SamplesLog2(num_samples); + const FramebufferId framebuffer_id = GetFramebufferId(RenderTargets{ + .color_buffer_ids = {color_view_id}, + .depth_buffer_id = depth_view_id, + .size = {extent.width >> samples_x, extent.height >> samples_y}, + }); + return {framebuffer_id, view_id}; +} - u64 modification_tick = 0; - u64 frame_tick = 0; - typename SlotVector::Iterator deletion_iterator; -}; +template +bool TextureCache

::IsFullClear(ImageViewId id) { + if (!id) { + return true; + } + const ImageViewBase& image_view = slot_image_views[id]; + const ImageBase& image = slot_images[image_view.image_id]; + const Extent3D size = image_view.size; + const auto& regs = maxwell3d.regs; + const auto& scissor = regs.scissor_test[0]; + if (image.info.resources.levels > 1 || image.info.resources.layers > 1) { + // Images with multiple resources can't be cleared in a single call + return false; + } + if (regs.clear_flags.scissor == 0) { + // If scissor testing is disabled, the clear is always full + return true; + } + // Make sure the clear covers all texels in the subresource + return scissor.min_x == 0 && scissor.min_y == 0 && scissor.max_x >= size.width && + scissor.max_y >= size.height; +} } // namespace VideoCommon diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h new file mode 100644 index 000000000..a4f6e9422 --- /dev/null +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -0,0 +1,402 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "common/alignment.h" +#include "common/common_types.h" +#include "common/literals.h" +#include "common/logging/log.h" +#include "common/settings.h" +#include "video_core/compatible_formats.h" +#include "video_core/delayed_destruction_ring.h" +#include "video_core/dirty_flags.h" +#include "video_core/engines/fermi_2d.h" +#include "video_core/engines/kepler_compute.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/memory_manager.h" +#include "video_core/rasterizer_interface.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/descriptor_table.h" +#include "video_core/texture_cache/format_lookup_table.h" +#include "video_core/texture_cache/formatter.h" +#include "video_core/texture_cache/image_base.h" +#include "video_core/texture_cache/image_info.h" +#include "video_core/texture_cache/image_view_base.h" +#include "video_core/texture_cache/image_view_info.h" +#include "video_core/texture_cache/render_targets.h" +#include "video_core/texture_cache/samples_helper.h" +#include "video_core/texture_cache/slot_vector.h" +#include "video_core/texture_cache/types.h" +#include "video_core/texture_cache/util.h" +#include "video_core/textures/texture.h" + +namespace VideoCommon { + +using Tegra::Texture::SwizzleSource; +using Tegra::Texture::TextureType; +using Tegra::Texture::TICEntry; +using Tegra::Texture::TSCEntry; +using VideoCore::Surface::GetFormatType; +using VideoCore::Surface::IsCopyCompatible; +using VideoCore::Surface::PixelFormat; +using VideoCore::Surface::PixelFormatFromDepthFormat; +using VideoCore::Surface::PixelFormatFromRenderTargetFormat; +using VideoCore::Surface::SurfaceType; +using namespace Common::Literals; + +template +class TextureCache { + /// Address shift for caching images into a hash table + static constexpr u64 PAGE_BITS = 20; + + /// Enables debugging features to the texture cache + static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION; + /// Implement blits as copies between framebuffers + static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS; + /// True when some copies have to be emulated + static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES; + /// True when the API can provide info about the memory of the device. + static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; + + /// Image view ID for null descriptors + static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0}; + /// Sampler ID for bugged sampler ids + static constexpr SamplerId NULL_SAMPLER_ID{0}; + + static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB; + static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB; + + using Runtime = typename P::Runtime; + using Image = typename P::Image; + using ImageAlloc = typename P::ImageAlloc; + using ImageView = typename P::ImageView; + using Sampler = typename P::Sampler; + using Framebuffer = typename P::Framebuffer; + + struct BlitImages { + ImageId dst_id; + ImageId src_id; + PixelFormat dst_format; + PixelFormat src_format; + }; + + template + struct IdentityHash { + [[nodiscard]] size_t operator()(T value) const noexcept { + return static_cast(value); + } + }; + +public: + explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&, Tegra::Engines::Maxwell3D&, + Tegra::Engines::KeplerCompute&, Tegra::MemoryManager&); + + /// Notify the cache that a new frame has been queued + void TickFrame(); + + /// Return a constant reference to the given image view id + [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept; + + /// Return a reference to the given image view id + [[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept; + + /// Mark an image as modified from the GPU + void MarkModification(ImageId id) noexcept; + + /// Fill image_view_ids with the graphics images in indices + void FillGraphicsImageViews(std::span indices, + std::span image_view_ids); + + /// Fill image_view_ids with the compute images in indices + void FillComputeImageViews(std::span indices, std::span image_view_ids); + + /// Get the sampler from the graphics descriptor table in the specified index + Sampler* GetGraphicsSampler(u32 index); + + /// Get the sampler from the compute descriptor table in the specified index + Sampler* GetComputeSampler(u32 index); + + /// Refresh the state for graphics image view and sampler descriptors + void SynchronizeGraphicsDescriptors(); + + /// Refresh the state for compute image view and sampler descriptors + void SynchronizeComputeDescriptors(); + + /// Update bound render targets and upload memory if necessary + /// @param is_clear True when the render targets are being used for clears + void UpdateRenderTargets(bool is_clear); + + /// Find a framebuffer with the currently bound render targets + /// UpdateRenderTargets should be called before this + Framebuffer* GetFramebuffer(); + + /// Mark images in a range as modified from the CPU + void WriteMemory(VAddr cpu_addr, size_t size); + + /// Download contents of host images to guest memory in a region + void DownloadMemory(VAddr cpu_addr, size_t size); + + /// Remove images in a region + void UnmapMemory(VAddr cpu_addr, size_t size); + + /// Remove images in a region + void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size); + + /// Blit an image with the given parameters + void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, + const Tegra::Engines::Fermi2D::Surface& src, + const Tegra::Engines::Fermi2D::Config& copy); + + /// Try to find a cached image view in the given CPU address + [[nodiscard]] ImageView* TryFindFramebufferImageView(VAddr cpu_addr); + + /// Return true when there are uncommitted images to be downloaded + [[nodiscard]] bool HasUncommittedFlushes() const noexcept; + + /// Return true when the caller should wait for async downloads + [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept; + + /// Commit asynchronous downloads + void CommitAsyncFlushes(); + + /// Pop asynchronous downloads + void PopAsyncFlushes(); + + /// Return true when a CPU region is modified from the GPU + [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); + + std::mutex mutex; + +private: + /// Iterate over all page indices in a range + template + static void ForEachCPUPage(VAddr addr, size_t size, Func&& func) { + static constexpr bool RETURNS_BOOL = std::is_same_v, bool>; + const u64 page_end = (addr + size - 1) >> PAGE_BITS; + for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) { + if constexpr (RETURNS_BOOL) { + if (func(page)) { + break; + } + } else { + func(page); + } + } + } + + template + static void ForEachGPUPage(GPUVAddr addr, size_t size, Func&& func) { + static constexpr bool RETURNS_BOOL = std::is_same_v, bool>; + const u64 page_end = (addr + size - 1) >> PAGE_BITS; + for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) { + if constexpr (RETURNS_BOOL) { + if (func(page)) { + break; + } + } else { + func(page); + } + } + } + + /// Runs the Garbage Collector. + void RunGarbageCollector(); + + /// Fills image_view_ids in the image views in indices + void FillImageViews(DescriptorTable& table, + std::span cached_image_view_ids, std::span indices, + std::span image_view_ids); + + /// Find or create an image view in the guest descriptor table + ImageViewId VisitImageView(DescriptorTable& table, + std::span cached_image_view_ids, u32 index); + + /// Find or create a framebuffer with the given render target parameters + FramebufferId GetFramebufferId(const RenderTargets& key); + + /// Refresh the contents (pixel data) of an image + void RefreshContents(Image& image, ImageId image_id); + + /// Upload data from guest to an image + template + void UploadImageContents(Image& image, StagingBuffer& staging_buffer); + + /// Find or create an image view from a guest descriptor + [[nodiscard]] ImageViewId FindImageView(const TICEntry& config); + + /// Create a new image view from a guest descriptor + [[nodiscard]] ImageViewId CreateImageView(const TICEntry& config); + + /// Find or create an image from the given parameters + [[nodiscard]] ImageId FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr, + RelaxedOptions options = RelaxedOptions{}); + + /// Find an image from the given parameters + [[nodiscard]] ImageId FindImage(const ImageInfo& info, GPUVAddr gpu_addr, + RelaxedOptions options); + + /// Create an image from the given parameters + [[nodiscard]] ImageId InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, + RelaxedOptions options); + + /// Create a new image and join perfectly matching existing images + /// Remove joined images from the cache + [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); + + /// Return a blit image pair from the given guest blit parameters + [[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst, + const Tegra::Engines::Fermi2D::Surface& src); + + /// Find or create a sampler from a guest descriptor sampler + [[nodiscard]] SamplerId FindSampler(const TSCEntry& config); + + /// Find or create an image view for the given color buffer index + [[nodiscard]] ImageViewId FindColorBuffer(size_t index, bool is_clear); + + /// Find or create an image view for the depth buffer + [[nodiscard]] ImageViewId FindDepthBuffer(bool is_clear); + + /// Find or create a view for a render target with the given image parameters + [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, + bool is_clear); + + /// Iterates over all the images in a region calling func + template + void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func); + + template + void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func); + + template + void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func); + + /// Iterates over all the images in a region calling func + template + void ForEachSparseSegment(ImageBase& image, Func&& func); + + /// Find or create an image view in the given image with the passed parameters + [[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info); + + /// Register image in the page table + void RegisterImage(ImageId image); + + /// Unregister image from the page table + void UnregisterImage(ImageId image); + + /// Track CPU reads and writes for image + void TrackImage(ImageBase& image, ImageId image_id); + + /// Stop tracking CPU reads and writes for image + void UntrackImage(ImageBase& image, ImageId image_id); + + /// Delete image from the cache + void DeleteImage(ImageId image); + + /// Remove image views references from the cache + void RemoveImageViewReferences(std::span removed_views); + + /// Remove framebuffers using the given image views from the cache + void RemoveFramebuffers(std::span removed_views); + + /// Mark an image as modified from the GPU + void MarkModification(ImageBase& image) noexcept; + + /// Synchronize image aliases, copying data if needed + void SynchronizeAliases(ImageId image_id); + + /// Prepare an image to be used + void PrepareImage(ImageId image_id, bool is_modification, bool invalidate); + + /// Prepare an image view to be used + void PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate); + + /// Execute copies from one image to the other, even if they are incompatible + void CopyImage(ImageId dst_id, ImageId src_id, std::span copies); + + /// Bind an image view as render target, downloading resources preemtively if needed + void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id); + + /// Create a render target from a given image and image view parameters + [[nodiscard]] std::pair RenderTargetFromImage( + ImageId, const ImageViewInfo& view_info); + + /// Returns true if the current clear parameters clear the whole image of a given image view + [[nodiscard]] bool IsFullClear(ImageViewId id); + + Runtime& runtime; + VideoCore::RasterizerInterface& rasterizer; + Tegra::Engines::Maxwell3D& maxwell3d; + Tegra::Engines::KeplerCompute& kepler_compute; + Tegra::MemoryManager& gpu_memory; + + DescriptorTable graphics_image_table{gpu_memory}; + DescriptorTable graphics_sampler_table{gpu_memory}; + std::vector graphics_sampler_ids; + std::vector graphics_image_view_ids; + + DescriptorTable compute_image_table{gpu_memory}; + DescriptorTable compute_sampler_table{gpu_memory}; + std::vector compute_sampler_ids; + std::vector compute_image_view_ids; + + RenderTargets render_targets; + + std::unordered_map image_views; + std::unordered_map samplers; + std::unordered_map framebuffers; + + std::unordered_map, IdentityHash> page_table; + std::unordered_map, IdentityHash> gpu_page_table; + std::unordered_map, IdentityHash> sparse_page_table; + + std::unordered_map> sparse_views; + + VAddr virtual_invalid_space{}; + + bool has_deleted_images = false; + u64 total_used_memory = 0; + u64 minimum_memory; + u64 expected_memory; + u64 critical_memory; + + SlotVector slot_images; + SlotVector slot_map_views; + SlotVector slot_image_views; + SlotVector slot_image_allocs; + SlotVector slot_samplers; + SlotVector slot_framebuffers; + + // TODO: This data structure is not optimal and it should be reworked + std::vector uncommitted_downloads; + std::queue> committed_downloads; + + static constexpr size_t TICKS_TO_DESTROY = 6; + DelayedDestructionRing sentenced_images; + DelayedDestructionRing sentenced_image_view; + DelayedDestructionRing sentenced_framebuffers; + + std::unordered_map image_allocs_table; + + u64 modification_tick = 0; + u64 frame_tick = 0; + typename SlotVector::Iterator deletion_iterator; +}; + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/texture_cache_templates.h b/src/video_core/texture_cache/texture_cache_templates.h deleted file mode 100644 index 8440d23d1..000000000 --- a/src/video_core/texture_cache/texture_cache_templates.h +++ /dev/null @@ -1,1507 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "video_core/texture_cache/texture_cache.h" - -namespace VideoCommon { - -using Tegra::Texture::SwizzleSource; -using Tegra::Texture::TextureType; -using Tegra::Texture::TICEntry; -using Tegra::Texture::TSCEntry; -using VideoCore::Surface::GetFormatType; -using VideoCore::Surface::IsCopyCompatible; -using VideoCore::Surface::PixelFormat; -using VideoCore::Surface::PixelFormatFromDepthFormat; -using VideoCore::Surface::PixelFormatFromRenderTargetFormat; -using VideoCore::Surface::SurfaceType; -using namespace Common::Literals; - -template -TextureCache

::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_, - Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_) - : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, - kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} { - // Configure null sampler - TSCEntry sampler_descriptor{}; - sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear); - sampler_descriptor.mag_filter.Assign(Tegra::Texture::TextureFilter::Linear); - sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear); - sampler_descriptor.cubemap_anisotropy.Assign(1); - - // Make sure the first index is reserved for the null resources - // This way the null resource becomes a compile time constant - void(slot_image_views.insert(runtime, NullImageParams{})); - void(slot_samplers.insert(runtime, sampler_descriptor)); - - deletion_iterator = slot_images.begin(); - - if constexpr (HAS_DEVICE_MEMORY_INFO) { - const auto device_memory = runtime.GetDeviceLocalMemory(); - const u64 possible_expected_memory = (device_memory * 3) / 10; - const u64 possible_critical_memory = (device_memory * 6) / 10; - expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY); - critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY); - minimum_memory = 0; - } else { - // on OGL we can be more conservatives as the driver takes care. - expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; - critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; - minimum_memory = expected_memory; - } -} - -template -void TextureCache

::RunGarbageCollector() { - const bool high_priority_mode = total_used_memory >= expected_memory; - const bool aggressive_mode = total_used_memory >= critical_memory; - const u64 ticks_to_destroy = high_priority_mode ? 60 : 100; - int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64); - for (; num_iterations > 0; --num_iterations) { - if (deletion_iterator == slot_images.end()) { - deletion_iterator = slot_images.begin(); - if (deletion_iterator == slot_images.end()) { - break; - } - } - auto [image_id, image_tmp] = *deletion_iterator; - Image* image = image_tmp; // fix clang error. - const bool is_alias = True(image->flags & ImageFlagBits::Alias); - const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap); - const bool must_download = image->IsSafeDownload(); - bool should_care = is_bad_overlap || is_alias || (high_priority_mode && !must_download); - const u64 ticks_needed = - is_bad_overlap - ? ticks_to_destroy >> 4 - : ((should_care && aggressive_mode) ? ticks_to_destroy >> 1 : ticks_to_destroy); - should_care |= aggressive_mode; - if (should_care && image->frame_tick + ticks_needed < frame_tick) { - if (is_bad_overlap) { - const bool overlap_check = std::ranges::all_of( - image->overlapping_images, [&, image](const ImageId& overlap_id) { - auto& overlap = slot_images[overlap_id]; - return overlap.frame_tick >= image->frame_tick; - }); - if (!overlap_check) { - ++deletion_iterator; - continue; - } - } - if (!is_bad_overlap && must_download) { - const bool alias_check = std::ranges::none_of( - image->aliased_images, [&, image](const AliasedImage& alias) { - auto& alias_image = slot_images[alias.id]; - return (alias_image.frame_tick < image->frame_tick) || - (alias_image.modification_tick < image->modification_tick); - }); - - if (alias_check) { - auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes); - const auto copies = FullDownloadCopies(image->info); - image->DownloadMemory(map, copies); - runtime.Finish(); - SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span); - } - } - if (True(image->flags & ImageFlagBits::Tracked)) { - UntrackImage(*image, image_id); - } - UnregisterImage(image_id); - DeleteImage(image_id); - if (is_bad_overlap) { - ++num_iterations; - } - } - ++deletion_iterator; - } -} - -template -void TextureCache

::TickFrame() { - if (Settings::values.use_caches_gc.GetValue() && total_used_memory > minimum_memory) { - RunGarbageCollector(); - } - sentenced_images.Tick(); - sentenced_framebuffers.Tick(); - sentenced_image_view.Tick(); - ++frame_tick; -} - -template -const typename P::ImageView& TextureCache

::GetImageView(ImageViewId id) const noexcept { - return slot_image_views[id]; -} - -template -typename P::ImageView& TextureCache

::GetImageView(ImageViewId id) noexcept { - return slot_image_views[id]; -} - -template -void TextureCache

::MarkModification(ImageId id) noexcept { - MarkModification(slot_images[id]); -} - -template -void TextureCache

::FillGraphicsImageViews(std::span indices, - std::span image_view_ids) { - FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids); -} - -template -void TextureCache

::FillComputeImageViews(std::span indices, - std::span image_view_ids) { - FillImageViews(compute_image_table, compute_image_view_ids, indices, image_view_ids); -} - -template -typename P::Sampler* TextureCache

::GetGraphicsSampler(u32 index) { - if (index > graphics_sampler_table.Limit()) { - LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); - return &slot_samplers[NULL_SAMPLER_ID]; - } - const auto [descriptor, is_new] = graphics_sampler_table.Read(index); - SamplerId& id = graphics_sampler_ids[index]; - if (is_new) { - id = FindSampler(descriptor); - } - return &slot_samplers[id]; -} - -template -typename P::Sampler* TextureCache

::GetComputeSampler(u32 index) { - if (index > compute_sampler_table.Limit()) { - LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); - return &slot_samplers[NULL_SAMPLER_ID]; - } - const auto [descriptor, is_new] = compute_sampler_table.Read(index); - SamplerId& id = compute_sampler_ids[index]; - if (is_new) { - id = FindSampler(descriptor); - } - return &slot_samplers[id]; -} - -template -void TextureCache

::SynchronizeGraphicsDescriptors() { - using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex; - const bool linked_tsc = maxwell3d.regs.sampler_index == SamplerIndex::ViaHeaderIndex; - const u32 tic_limit = maxwell3d.regs.tic.limit; - const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d.regs.tsc.limit; - if (graphics_sampler_table.Synchornize(maxwell3d.regs.tsc.Address(), tsc_limit)) { - graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); - } - if (graphics_image_table.Synchornize(maxwell3d.regs.tic.Address(), tic_limit)) { - graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); - } -} - -template -void TextureCache

::SynchronizeComputeDescriptors() { - const bool linked_tsc = kepler_compute.launch_description.linked_tsc; - const u32 tic_limit = kepler_compute.regs.tic.limit; - const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute.regs.tsc.limit; - const GPUVAddr tsc_gpu_addr = kepler_compute.regs.tsc.Address(); - if (compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) { - compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); - } - if (compute_image_table.Synchornize(kepler_compute.regs.tic.Address(), tic_limit)) { - compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); - } -} - -template -void TextureCache

::UpdateRenderTargets(bool is_clear) { - using namespace VideoCommon::Dirty; - auto& flags = maxwell3d.dirty.flags; - if (!flags[Dirty::RenderTargets]) { - for (size_t index = 0; index < NUM_RT; ++index) { - ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; - PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id)); - } - const ImageViewId depth_buffer_id = render_targets.depth_buffer_id; - PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); - return; - } - flags[Dirty::RenderTargets] = false; - - // Render target control is used on all render targets, so force look ups when this one is up - const bool force = flags[Dirty::RenderTargetControl]; - flags[Dirty::RenderTargetControl] = false; - - for (size_t index = 0; index < NUM_RT; ++index) { - ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; - if (flags[Dirty::ColorBuffer0 + index] || force) { - flags[Dirty::ColorBuffer0 + index] = false; - BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear)); - } - PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id)); - } - if (flags[Dirty::ZetaBuffer] || force) { - flags[Dirty::ZetaBuffer] = false; - BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear)); - } - const ImageViewId depth_buffer_id = render_targets.depth_buffer_id; - PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); - - for (size_t index = 0; index < NUM_RT; ++index) { - render_targets.draw_buffers[index] = static_cast(maxwell3d.regs.rt_control.Map(index)); - } - render_targets.size = Extent2D{ - maxwell3d.regs.render_area.width, - maxwell3d.regs.render_area.height, - }; -} - -template -typename P::Framebuffer* TextureCache

::GetFramebuffer() { - return &slot_framebuffers[GetFramebufferId(render_targets)]; -} - -template -void TextureCache

::FillImageViews(DescriptorTable& table, - std::span cached_image_view_ids, - std::span indices, - std::span image_view_ids) { - ASSERT(indices.size() <= image_view_ids.size()); - do { - has_deleted_images = false; - std::ranges::transform(indices, image_view_ids.begin(), [&](u32 index) { - return VisitImageView(table, cached_image_view_ids, index); - }); - } while (has_deleted_images); -} - -template -ImageViewId TextureCache

::VisitImageView(DescriptorTable& table, - std::span cached_image_view_ids, - u32 index) { - if (index > table.Limit()) { - LOG_DEBUG(HW_GPU, "Invalid image view index={}", index); - return NULL_IMAGE_VIEW_ID; - } - const auto [descriptor, is_new] = table.Read(index); - ImageViewId& image_view_id = cached_image_view_ids[index]; - if (is_new) { - image_view_id = FindImageView(descriptor); - } - if (image_view_id != NULL_IMAGE_VIEW_ID) { - PrepareImageView(image_view_id, false, false); - } - return image_view_id; -} - -template -FramebufferId TextureCache

::GetFramebufferId(const RenderTargets& key) { - const auto [pair, is_new] = framebuffers.try_emplace(key); - FramebufferId& framebuffer_id = pair->second; - if (!is_new) { - return framebuffer_id; - } - std::array color_buffers; - std::ranges::transform(key.color_buffer_ids, color_buffers.begin(), - [this](ImageViewId id) { return id ? &slot_image_views[id] : nullptr; }); - ImageView* const depth_buffer = - key.depth_buffer_id ? &slot_image_views[key.depth_buffer_id] : nullptr; - framebuffer_id = slot_framebuffers.insert(runtime, color_buffers, depth_buffer, key); - return framebuffer_id; -} - -template -void TextureCache

::WriteMemory(VAddr cpu_addr, size_t size) { - ForEachImageInRegion(cpu_addr, size, [this](ImageId image_id, Image& image) { - if (True(image.flags & ImageFlagBits::CpuModified)) { - return; - } - image.flags |= ImageFlagBits::CpuModified; - if (True(image.flags & ImageFlagBits::Tracked)) { - UntrackImage(image, image_id); - } - }); -} - -template -void TextureCache

::DownloadMemory(VAddr cpu_addr, size_t size) { - std::vector images; - ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) { - if (!image.IsSafeDownload()) { - return; - } - image.flags &= ~ImageFlagBits::GpuModified; - images.push_back(image_id); - }); - if (images.empty()) { - return; - } - std::ranges::sort(images, [this](ImageId lhs, ImageId rhs) { - return slot_images[lhs].modification_tick < slot_images[rhs].modification_tick; - }); - for (const ImageId image_id : images) { - Image& image = slot_images[image_id]; - auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); - const auto copies = FullDownloadCopies(image.info); - image.DownloadMemory(map, copies); - runtime.Finish(); - SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); - } -} - -template -void TextureCache

::UnmapMemory(VAddr cpu_addr, size_t size) { - std::vector deleted_images; - ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); - for (const ImageId id : deleted_images) { - Image& image = slot_images[id]; - if (True(image.flags & ImageFlagBits::Tracked)) { - UntrackImage(image, id); - } - UnregisterImage(id); - DeleteImage(id); - } -} - -template -void TextureCache

::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) { - std::vector deleted_images; - ForEachImageInRegionGPU(gpu_addr, size, - [&](ImageId id, Image&) { deleted_images.push_back(id); }); - for (const ImageId id : deleted_images) { - Image& image = slot_images[id]; - if (True(image.flags & ImageFlagBits::Remapped)) { - continue; - } - image.flags |= ImageFlagBits::Remapped; - if (True(image.flags & ImageFlagBits::Tracked)) { - UntrackImage(image, id); - } - } -} - -template -void TextureCache

::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, - const Tegra::Engines::Fermi2D::Surface& src, - const Tegra::Engines::Fermi2D::Config& copy) { - const BlitImages images = GetBlitImages(dst, src); - const ImageId dst_id = images.dst_id; - const ImageId src_id = images.src_id; - PrepareImage(src_id, false, false); - PrepareImage(dst_id, true, false); - - ImageBase& dst_image = slot_images[dst_id]; - const ImageBase& src_image = slot_images[src_id]; - - // TODO: Deduplicate - const std::optional src_base = src_image.TryFindBase(src.Address()); - const SubresourceRange src_range{.base = src_base.value(), .extent = {1, 1}}; - const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range); - const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info); - const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples); - const Region2D src_region{ - Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y}, - Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y}, - }; - - const std::optional dst_base = dst_image.TryFindBase(dst.Address()); - const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}}; - const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range); - const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); - const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples); - const Region2D dst_region{ - Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y}, - Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y}, - }; - - // Always call this after src_framebuffer_id was queried, as the address might be invalidated. - Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; - if constexpr (FRAMEBUFFER_BLITS) { - // OpenGL blits from framebuffers, not images - Framebuffer* const src_framebuffer = &slot_framebuffers[src_framebuffer_id]; - runtime.BlitFramebuffer(dst_framebuffer, src_framebuffer, dst_region, src_region, - copy.filter, copy.operation); - } else { - // Vulkan can blit images, but it lacks format reinterpretations - // Provide a framebuffer in case it's necessary - ImageView& dst_view = slot_image_views[dst_view_id]; - ImageView& src_view = slot_image_views[src_view_id]; - runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter, - copy.operation); - } -} - -template -typename P::ImageView* TextureCache

::TryFindFramebufferImageView(VAddr cpu_addr) { - // TODO: Properly implement this - const auto it = page_table.find(cpu_addr >> PAGE_BITS); - if (it == page_table.end()) { - return nullptr; - } - const auto& image_map_ids = it->second; - for (const ImageMapId map_id : image_map_ids) { - const ImageMapView& map = slot_map_views[map_id]; - const ImageBase& image = slot_images[map.image_id]; - if (image.cpu_addr != cpu_addr) { - continue; - } - if (image.image_view_ids.empty()) { - continue; - } - return &slot_image_views[image.image_view_ids.at(0)]; - } - return nullptr; -} - -template -bool TextureCache

::HasUncommittedFlushes() const noexcept { - return !uncommitted_downloads.empty(); -} - -template -bool TextureCache

::ShouldWaitAsyncFlushes() const noexcept { - return !committed_downloads.empty() && !committed_downloads.front().empty(); -} - -template -void TextureCache

::CommitAsyncFlushes() { - // This is intentionally passing the value by copy - committed_downloads.push(uncommitted_downloads); - uncommitted_downloads.clear(); -} - -template -void TextureCache

::PopAsyncFlushes() { - if (committed_downloads.empty()) { - return; - } - const std::span download_ids = committed_downloads.front(); - if (download_ids.empty()) { - committed_downloads.pop(); - return; - } - size_t total_size_bytes = 0; - for (const ImageId image_id : download_ids) { - total_size_bytes += slot_images[image_id].unswizzled_size_bytes; - } - auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); - const size_t original_offset = download_map.offset; - for (const ImageId image_id : download_ids) { - Image& image = slot_images[image_id]; - const auto copies = FullDownloadCopies(image.info); - image.DownloadMemory(download_map, copies); - download_map.offset += image.unswizzled_size_bytes; - } - // Wait for downloads to finish - runtime.Finish(); - - download_map.offset = original_offset; - std::span download_span = download_map.mapped_span; - for (const ImageId image_id : download_ids) { - const ImageBase& image = slot_images[image_id]; - const auto copies = FullDownloadCopies(image.info); - SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, download_span); - download_map.offset += image.unswizzled_size_bytes; - download_span = download_span.subspan(image.unswizzled_size_bytes); - } - committed_downloads.pop(); -} - -template -bool TextureCache

::IsRegionGpuModified(VAddr addr, size_t size) { - bool is_modified = false; - ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) { - if (False(image.flags & ImageFlagBits::GpuModified)) { - return false; - } - is_modified = true; - return true; - }); - return is_modified; -} - -template -void TextureCache

::RefreshContents(Image& image, ImageId image_id) { - if (False(image.flags & ImageFlagBits::CpuModified)) { - // Only upload modified images - return; - } - image.flags &= ~ImageFlagBits::CpuModified; - TrackImage(image, image_id); - - if (image.info.num_samples > 1) { - LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); - return; - } - auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image)); - UploadImageContents(image, staging); - runtime.InsertUploadMemoryBarrier(); -} - -template -template -void TextureCache

::UploadImageContents(Image& image, StagingBuffer& staging) { - const std::span mapped_span = staging.mapped_span; - const GPUVAddr gpu_addr = image.gpu_addr; - - if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { - gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); - const auto uploads = FullUploadSwizzles(image.info); - runtime.AccelerateImageUpload(image, staging, uploads); - } else if (True(image.flags & ImageFlagBits::Converted)) { - std::vector unswizzled_data(image.unswizzled_size_bytes); - auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); - ConvertImage(unswizzled_data, image.info, mapped_span, copies); - image.UploadMemory(staging, copies); - } else { - const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); - image.UploadMemory(staging, copies); - } -} - -template -ImageViewId TextureCache

::FindImageView(const TICEntry& config) { - if (!IsValidEntry(gpu_memory, config)) { - return NULL_IMAGE_VIEW_ID; - } - const auto [pair, is_new] = image_views.try_emplace(config); - ImageViewId& image_view_id = pair->second; - if (is_new) { - image_view_id = CreateImageView(config); - } - return image_view_id; -} - -template -ImageViewId TextureCache

::CreateImageView(const TICEntry& config) { - const ImageInfo info(config); - if (info.type == ImageType::Buffer) { - const ImageViewInfo view_info(config, 0); - return slot_image_views.insert(runtime, info, view_info, config.Address()); - } - const u32 layer_offset = config.BaseLayer() * info.layer_stride; - const GPUVAddr image_gpu_addr = config.Address() - layer_offset; - const ImageId image_id = FindOrInsertImage(info, image_gpu_addr); - if (!image_id) { - return NULL_IMAGE_VIEW_ID; - } - ImageBase& image = slot_images[image_id]; - const SubresourceBase base = image.TryFindBase(config.Address()).value(); - ASSERT(base.level == 0); - const ImageViewInfo view_info(config, base.layer); - const ImageViewId image_view_id = FindOrEmplaceImageView(image_id, view_info); - ImageViewBase& image_view = slot_image_views[image_view_id]; - image_view.flags |= ImageViewFlagBits::Strong; - image.flags |= ImageFlagBits::Strong; - return image_view_id; -} - -template -ImageId TextureCache

::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr, - RelaxedOptions options) { - if (const ImageId image_id = FindImage(info, gpu_addr, options); image_id) { - return image_id; - } - return InsertImage(info, gpu_addr, options); -} - -template -ImageId TextureCache

::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, - RelaxedOptions options) { - std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - if (!cpu_addr) { - cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); - if (!cpu_addr) { - return ImageId{}; - } - } - const bool broken_views = runtime.HasBrokenTextureViewFormats(); - const bool native_bgr = runtime.HasNativeBgr(); - ImageId image_id; - const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { - if (True(existing_image.flags & ImageFlagBits::Remapped)) { - return false; - } - if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) { - const bool strict_size = False(options & RelaxedOptions::Size) && - True(existing_image.flags & ImageFlagBits::Strong); - const ImageInfo& existing = existing_image.info; - if (existing_image.gpu_addr == gpu_addr && existing.type == info.type && - existing.pitch == info.pitch && - IsPitchLinearSameSize(existing, info, strict_size) && - IsViewCompatible(existing.format, info.format, broken_views, native_bgr)) { - image_id = existing_image_id; - return true; - } - } else if (IsSubresource(info, existing_image, gpu_addr, options, broken_views, - native_bgr)) { - image_id = existing_image_id; - return true; - } - return false; - }; - ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda); - return image_id; -} - -template -ImageId TextureCache

::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, - RelaxedOptions options) { - std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - if (!cpu_addr) { - const auto size = CalculateGuestSizeInBytes(info); - cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size); - if (!cpu_addr) { - const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space; - virtual_invalid_space += Common::AlignUp(size, 32); - cpu_addr = std::optional(fake_addr); - } - } - ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); - const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr); - const Image& image = slot_images[image_id]; - // Using "image.gpu_addr" instead of "gpu_addr" is important because it might be different - const auto [it, is_new] = image_allocs_table.try_emplace(image.gpu_addr); - if (is_new) { - it->second = slot_image_allocs.insert(); - } - slot_image_allocs[it->second].images.push_back(image_id); - return image_id; -} - -template -ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr) { - ImageInfo new_info = info; - const size_t size_bytes = CalculateGuestSizeInBytes(new_info); - const bool broken_views = runtime.HasBrokenTextureViewFormats(); - const bool native_bgr = runtime.HasNativeBgr(); - std::vector overlap_ids; - std::unordered_set overlaps_found; - std::vector left_aliased_ids; - std::vector right_aliased_ids; - std::unordered_set ignore_textures; - std::vector bad_overlap_ids; - const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) { - if (True(overlap.flags & ImageFlagBits::Remapped)) { - ignore_textures.insert(overlap_id); - return; - } - if (info.type == ImageType::Linear) { - if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) { - // Alias linear images with the same pitch - left_aliased_ids.push_back(overlap_id); - } - return; - } - overlaps_found.insert(overlap_id); - static constexpr bool strict_size = true; - const std::optional solution = ResolveOverlap( - new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr); - if (solution) { - gpu_addr = solution->gpu_addr; - cpu_addr = solution->cpu_addr; - new_info.resources = solution->resources; - overlap_ids.push_back(overlap_id); - return; - } - static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format; - const ImageBase new_image_base(new_info, gpu_addr, cpu_addr); - if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) { - left_aliased_ids.push_back(overlap_id); - overlap.flags |= ImageFlagBits::Alias; - } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options, - broken_views, native_bgr)) { - right_aliased_ids.push_back(overlap_id); - overlap.flags |= ImageFlagBits::Alias; - } else { - bad_overlap_ids.push_back(overlap_id); - overlap.flags |= ImageFlagBits::BadOverlap; - } - }; - ForEachImageInRegion(cpu_addr, size_bytes, region_check); - const auto region_check_gpu = [&](ImageId overlap_id, ImageBase& overlap) { - if (!overlaps_found.contains(overlap_id)) { - if (True(overlap.flags & ImageFlagBits::Remapped)) { - ignore_textures.insert(overlap_id); - } - if (overlap.gpu_addr == gpu_addr && overlap.guest_size_bytes == size_bytes) { - ignore_textures.insert(overlap_id); - } - } - }; - ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu); - const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); - Image& new_image = slot_images[new_image_id]; - - if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) { - new_image.flags |= ImageFlagBits::Sparse; - } - - for (const ImageId overlap_id : ignore_textures) { - Image& overlap = slot_images[overlap_id]; - if (True(overlap.flags & ImageFlagBits::GpuModified)) { - UNIMPLEMENTED(); - } - if (True(overlap.flags & ImageFlagBits::Tracked)) { - UntrackImage(overlap, overlap_id); - } - UnregisterImage(overlap_id); - DeleteImage(overlap_id); - } - - // TODO: Only upload what we need - RefreshContents(new_image, new_image_id); - - for (const ImageId overlap_id : overlap_ids) { - Image& overlap = slot_images[overlap_id]; - if (overlap.info.num_samples != new_image.info.num_samples) { - LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented"); - } else { - const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value(); - const auto copies = MakeShrinkImageCopies(new_info, overlap.info, base); - runtime.CopyImage(new_image, overlap, copies); - } - if (True(overlap.flags & ImageFlagBits::Tracked)) { - UntrackImage(overlap, overlap_id); - } - UnregisterImage(overlap_id); - DeleteImage(overlap_id); - } - ImageBase& new_image_base = new_image; - for (const ImageId aliased_id : right_aliased_ids) { - ImageBase& aliased = slot_images[aliased_id]; - AddImageAlias(new_image_base, aliased, new_image_id, aliased_id); - new_image.flags |= ImageFlagBits::Alias; - } - for (const ImageId aliased_id : left_aliased_ids) { - ImageBase& aliased = slot_images[aliased_id]; - AddImageAlias(aliased, new_image_base, aliased_id, new_image_id); - new_image.flags |= ImageFlagBits::Alias; - } - for (const ImageId aliased_id : bad_overlap_ids) { - ImageBase& aliased = slot_images[aliased_id]; - aliased.overlapping_images.push_back(new_image_id); - new_image.overlapping_images.push_back(aliased_id); - new_image.flags |= ImageFlagBits::BadOverlap; - } - RegisterImage(new_image_id); - return new_image_id; -} - -template -typename TextureCache

::BlitImages TextureCache

::GetBlitImages( - const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src) { - static constexpr auto FIND_OPTIONS = RelaxedOptions::Format | RelaxedOptions::Samples; - const GPUVAddr dst_addr = dst.Address(); - const GPUVAddr src_addr = src.Address(); - ImageInfo dst_info(dst); - ImageInfo src_info(src); - ImageId dst_id; - ImageId src_id; - do { - has_deleted_images = false; - dst_id = FindImage(dst_info, dst_addr, FIND_OPTIONS); - src_id = FindImage(src_info, src_addr, FIND_OPTIONS); - const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr; - const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr; - DeduceBlitImages(dst_info, src_info, dst_image, src_image); - if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) { - continue; - } - if (!dst_id) { - dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{}); - } - if (!src_id) { - src_id = InsertImage(src_info, src_addr, RelaxedOptions{}); - } - } while (has_deleted_images); - return BlitImages{ - .dst_id = dst_id, - .src_id = src_id, - .dst_format = dst_info.format, - .src_format = src_info.format, - }; -} - -template -SamplerId TextureCache

::FindSampler(const TSCEntry& config) { - if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) { - return NULL_SAMPLER_ID; - } - const auto [pair, is_new] = samplers.try_emplace(config); - if (is_new) { - pair->second = slot_samplers.insert(runtime, config); - } - return pair->second; -} - -template -ImageViewId TextureCache

::FindColorBuffer(size_t index, bool is_clear) { - const auto& regs = maxwell3d.regs; - if (index >= regs.rt_control.count) { - return ImageViewId{}; - } - const auto& rt = regs.rt[index]; - const GPUVAddr gpu_addr = rt.Address(); - if (gpu_addr == 0) { - return ImageViewId{}; - } - if (rt.format == Tegra::RenderTargetFormat::NONE) { - return ImageViewId{}; - } - const ImageInfo info(regs, index); - return FindRenderTargetView(info, gpu_addr, is_clear); -} - -template -ImageViewId TextureCache

::FindDepthBuffer(bool is_clear) { - const auto& regs = maxwell3d.regs; - if (!regs.zeta_enable) { - return ImageViewId{}; - } - const GPUVAddr gpu_addr = regs.zeta.Address(); - if (gpu_addr == 0) { - return ImageViewId{}; - } - const ImageInfo info(regs); - return FindRenderTargetView(info, gpu_addr, is_clear); -} - -template -ImageViewId TextureCache

::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, - bool is_clear) { - const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{}; - const ImageId image_id = FindOrInsertImage(info, gpu_addr, options); - if (!image_id) { - return NULL_IMAGE_VIEW_ID; - } - Image& image = slot_images[image_id]; - const ImageViewType view_type = RenderTargetImageViewType(info); - SubresourceBase base; - if (image.info.type == ImageType::Linear) { - base = SubresourceBase{.level = 0, .layer = 0}; - } else { - base = image.TryFindBase(gpu_addr).value(); - } - const s32 layers = image.info.type == ImageType::e3D ? info.size.depth : info.resources.layers; - const SubresourceRange range{ - .base = base, - .extent = {.levels = 1, .layers = layers}, - }; - return FindOrEmplaceImageView(image_id, ImageViewInfo(view_type, info.format, range)); -} - -template -template -void TextureCache

::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) { - using FuncReturn = typename std::invoke_result::type; - static constexpr bool BOOL_BREAK = std::is_same_v; - boost::container::small_vector images; - boost::container::small_vector maps; - ForEachCPUPage(cpu_addr, size, [this, &images, &maps, cpu_addr, size, func](u64 page) { - const auto it = page_table.find(page); - if (it == page_table.end()) { - if constexpr (BOOL_BREAK) { - return false; - } else { - return; - } - } - for (const ImageMapId map_id : it->second) { - ImageMapView& map = slot_map_views[map_id]; - if (map.picked) { - continue; - } - if (!map.Overlaps(cpu_addr, size)) { - continue; - } - map.picked = true; - maps.push_back(map_id); - Image& image = slot_images[map.image_id]; - if (True(image.flags & ImageFlagBits::Picked)) { - continue; - } - image.flags |= ImageFlagBits::Picked; - images.push_back(map.image_id); - if constexpr (BOOL_BREAK) { - if (func(map.image_id, image)) { - return true; - } - } else { - func(map.image_id, image); - } - } - if constexpr (BOOL_BREAK) { - return false; - } - }); - for (const ImageId image_id : images) { - slot_images[image_id].flags &= ~ImageFlagBits::Picked; - } - for (const ImageMapId map_id : maps) { - slot_map_views[map_id].picked = false; - } -} - -template -template -void TextureCache

::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func) { - using FuncReturn = typename std::invoke_result::type; - static constexpr bool BOOL_BREAK = std::is_same_v; - boost::container::small_vector images; - ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { - const auto it = gpu_page_table.find(page); - if (it == gpu_page_table.end()) { - if constexpr (BOOL_BREAK) { - return false; - } else { - return; - } - } - for (const ImageId image_id : it->second) { - Image& image = slot_images[image_id]; - if (True(image.flags & ImageFlagBits::Picked)) { - continue; - } - if (!image.OverlapsGPU(gpu_addr, size)) { - continue; - } - image.flags |= ImageFlagBits::Picked; - images.push_back(image_id); - if constexpr (BOOL_BREAK) { - if (func(image_id, image)) { - return true; - } - } else { - func(image_id, image); - } - } - if constexpr (BOOL_BREAK) { - return false; - } - }); - for (const ImageId image_id : images) { - slot_images[image_id].flags &= ~ImageFlagBits::Picked; - } -} - -template -template -void TextureCache

::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func) { - using FuncReturn = typename std::invoke_result::type; - static constexpr bool BOOL_BREAK = std::is_same_v; - boost::container::small_vector images; - ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { - const auto it = sparse_page_table.find(page); - if (it == sparse_page_table.end()) { - if constexpr (BOOL_BREAK) { - return false; - } else { - return; - } - } - for (const ImageId image_id : it->second) { - Image& image = slot_images[image_id]; - if (True(image.flags & ImageFlagBits::Picked)) { - continue; - } - if (!image.OverlapsGPU(gpu_addr, size)) { - continue; - } - image.flags |= ImageFlagBits::Picked; - images.push_back(image_id); - if constexpr (BOOL_BREAK) { - if (func(image_id, image)) { - return true; - } - } else { - func(image_id, image); - } - } - if constexpr (BOOL_BREAK) { - return false; - } - }); - for (const ImageId image_id : images) { - slot_images[image_id].flags &= ~ImageFlagBits::Picked; - } -} - -template -template -void TextureCache

::ForEachSparseSegment(ImageBase& image, Func&& func) { - using FuncReturn = typename std::invoke_result::type; - static constexpr bool RETURNS_BOOL = std::is_same_v; - const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes); - for (auto& segment : segments) { - const auto gpu_addr = segment.first; - const auto size = segment.second; - std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - ASSERT(cpu_addr); - if constexpr (RETURNS_BOOL) { - if (func(gpu_addr, *cpu_addr, size)) { - break; - } - } else { - func(gpu_addr, *cpu_addr, size); - } - } -} - -template -ImageViewId TextureCache

::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) { - Image& image = slot_images[image_id]; - if (const ImageViewId image_view_id = image.FindView(info); image_view_id) { - return image_view_id; - } - const ImageViewId image_view_id = slot_image_views.insert(runtime, info, image_id, image); - image.InsertView(info, image_view_id); - return image_view_id; -} - -template -void TextureCache

::RegisterImage(ImageId image_id) { - ImageBase& image = slot_images[image_id]; - ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), - "Trying to register an already registered image"); - image.flags |= ImageFlagBits::Registered; - u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); - if ((IsPixelFormatASTC(image.info.format) && - True(image.flags & ImageFlagBits::AcceleratedUpload)) || - True(image.flags & ImageFlagBits::Converted)) { - tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); - } - total_used_memory += Common::AlignUp(tentative_size, 1024); - ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, - [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); }); - if (False(image.flags & ImageFlagBits::Sparse)) { - auto map_id = - slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id); - ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, - [this, map_id](u64 page) { page_table[page].push_back(map_id); }); - image.map_view_id = map_id; - return; - } - std::vector sparse_maps{}; - ForEachSparseSegment( - image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { - auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); - ForEachCPUPage(cpu_addr, size, - [this, map_id](u64 page) { page_table[page].push_back(map_id); }); - sparse_maps.push_back(map_id); - }); - sparse_views.emplace(image_id, std::move(sparse_maps)); - ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, - [this, image_id](u64 page) { sparse_page_table[page].push_back(image_id); }); -} - -template -void TextureCache

::UnregisterImage(ImageId image_id) { - Image& image = slot_images[image_id]; - ASSERT_MSG(True(image.flags & ImageFlagBits::Registered), - "Trying to unregister an already registered image"); - image.flags &= ~ImageFlagBits::Registered; - image.flags &= ~ImageFlagBits::BadOverlap; - u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); - if ((IsPixelFormatASTC(image.info.format) && - True(image.flags & ImageFlagBits::AcceleratedUpload)) || - True(image.flags & ImageFlagBits::Converted)) { - tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); - } - total_used_memory -= Common::AlignUp(tentative_size, 1024); - const auto& clear_page_table = - [this, image_id]( - u64 page, - std::unordered_map, IdentityHash>& selected_page_table) { - const auto page_it = selected_page_table.find(page); - if (page_it == selected_page_table.end()) { - UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); - return; - } - std::vector& image_ids = page_it->second; - const auto vector_it = std::ranges::find(image_ids, image_id); - if (vector_it == image_ids.end()) { - UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", - page << PAGE_BITS); - return; - } - image_ids.erase(vector_it); - }; - ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, - [this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); }); - if (False(image.flags & ImageFlagBits::Sparse)) { - const auto map_id = image.map_view_id; - ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) { - const auto page_it = page_table.find(page); - if (page_it == page_table.end()) { - UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); - return; - } - std::vector& image_map_ids = page_it->second; - const auto vector_it = std::ranges::find(image_map_ids, map_id); - if (vector_it == image_map_ids.end()) { - UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", - page << PAGE_BITS); - return; - } - image_map_ids.erase(vector_it); - }); - slot_map_views.erase(map_id); - return; - } - ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) { - clear_page_table(page, sparse_page_table); - }); - auto it = sparse_views.find(image_id); - ASSERT(it != sparse_views.end()); - auto& sparse_maps = it->second; - for (auto& map_view_id : sparse_maps) { - const auto& map_range = slot_map_views[map_view_id]; - const VAddr cpu_addr = map_range.cpu_addr; - const std::size_t size = map_range.size; - ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) { - const auto page_it = page_table.find(page); - if (page_it == page_table.end()) { - UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); - return; - } - std::vector& image_map_ids = page_it->second; - auto vector_it = image_map_ids.begin(); - while (vector_it != image_map_ids.end()) { - ImageMapView& map = slot_map_views[*vector_it]; - if (map.image_id != image_id) { - vector_it++; - continue; - } - if (!map.picked) { - map.picked = true; - } - vector_it = image_map_ids.erase(vector_it); - } - }); - slot_map_views.erase(map_view_id); - } - sparse_views.erase(it); -} - -template -void TextureCache

::TrackImage(ImageBase& image, ImageId image_id) { - ASSERT(False(image.flags & ImageFlagBits::Tracked)); - image.flags |= ImageFlagBits::Tracked; - if (False(image.flags & ImageFlagBits::Sparse)) { - rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); - return; - } - if (True(image.flags & ImageFlagBits::Registered)) { - auto it = sparse_views.find(image_id); - ASSERT(it != sparse_views.end()); - auto& sparse_maps = it->second; - for (auto& map_view_id : sparse_maps) { - const auto& map = slot_map_views[map_view_id]; - const VAddr cpu_addr = map.cpu_addr; - const std::size_t size = map.size; - rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); - } - return; - } - ForEachSparseSegment(image, - [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { - rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); - }); -} - -template -void TextureCache

::UntrackImage(ImageBase& image, ImageId image_id) { - ASSERT(True(image.flags & ImageFlagBits::Tracked)); - image.flags &= ~ImageFlagBits::Tracked; - if (False(image.flags & ImageFlagBits::Sparse)) { - rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); - return; - } - ASSERT(True(image.flags & ImageFlagBits::Registered)); - auto it = sparse_views.find(image_id); - ASSERT(it != sparse_views.end()); - auto& sparse_maps = it->second; - for (auto& map_view_id : sparse_maps) { - const auto& map = slot_map_views[map_view_id]; - const VAddr cpu_addr = map.cpu_addr; - const std::size_t size = map.size; - rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); - } -} - -template -void TextureCache

::DeleteImage(ImageId image_id) { - ImageBase& image = slot_images[image_id]; - const GPUVAddr gpu_addr = image.gpu_addr; - const auto alloc_it = image_allocs_table.find(gpu_addr); - if (alloc_it == image_allocs_table.end()) { - UNREACHABLE_MSG("Trying to delete an image alloc that does not exist in address 0x{:x}", - gpu_addr); - return; - } - const ImageAllocId alloc_id = alloc_it->second; - std::vector& alloc_images = slot_image_allocs[alloc_id].images; - const auto alloc_image_it = std::ranges::find(alloc_images, image_id); - if (alloc_image_it == alloc_images.end()) { - UNREACHABLE_MSG("Trying to delete an image that does not exist"); - return; - } - ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked"); - ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered"); - - // Mark render targets as dirty - auto& dirty = maxwell3d.dirty.flags; - dirty[Dirty::RenderTargets] = true; - dirty[Dirty::ZetaBuffer] = true; - for (size_t rt = 0; rt < NUM_RT; ++rt) { - dirty[Dirty::ColorBuffer0 + rt] = true; - } - const std::span image_view_ids = image.image_view_ids; - for (const ImageViewId image_view_id : image_view_ids) { - std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{}); - if (render_targets.depth_buffer_id == image_view_id) { - render_targets.depth_buffer_id = ImageViewId{}; - } - } - RemoveImageViewReferences(image_view_ids); - RemoveFramebuffers(image_view_ids); - - for (const AliasedImage& alias : image.aliased_images) { - ImageBase& other_image = slot_images[alias.id]; - [[maybe_unused]] const size_t num_removed_aliases = - std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) { - return other_alias.id == image_id; - }); - other_image.CheckAliasState(); - ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}", - num_removed_aliases); - } - for (const ImageId overlap_id : image.overlapping_images) { - ImageBase& other_image = slot_images[overlap_id]; - [[maybe_unused]] const size_t num_removed_overlaps = std::erase_if( - other_image.overlapping_images, - [image_id](const ImageId other_overlap_id) { return other_overlap_id == image_id; }); - other_image.CheckBadOverlapState(); - ASSERT_MSG(num_removed_overlaps == 1, "Invalid number of removed overlapps: {}", - num_removed_overlaps); - } - for (const ImageViewId image_view_id : image_view_ids) { - sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); - slot_image_views.erase(image_view_id); - } - sentenced_images.Push(std::move(slot_images[image_id])); - slot_images.erase(image_id); - - alloc_images.erase(alloc_image_it); - if (alloc_images.empty()) { - image_allocs_table.erase(alloc_it); - } - if constexpr (ENABLE_VALIDATION) { - std::ranges::fill(graphics_image_view_ids, CORRUPT_ID); - std::ranges::fill(compute_image_view_ids, CORRUPT_ID); - } - graphics_image_table.Invalidate(); - compute_image_table.Invalidate(); - has_deleted_images = true; -} - -template -void TextureCache

::RemoveImageViewReferences(std::span removed_views) { - auto it = image_views.begin(); - while (it != image_views.end()) { - const auto found = std::ranges::find(removed_views, it->second); - if (found != removed_views.end()) { - it = image_views.erase(it); - } else { - ++it; - } - } -} - -template -void TextureCache

::RemoveFramebuffers(std::span removed_views) { - auto it = framebuffers.begin(); - while (it != framebuffers.end()) { - if (it->first.Contains(removed_views)) { - it = framebuffers.erase(it); - } else { - ++it; - } - } -} - -template -void TextureCache

::MarkModification(ImageBase& image) noexcept { - image.flags |= ImageFlagBits::GpuModified; - image.modification_tick = ++modification_tick; -} - -template -void TextureCache

::SynchronizeAliases(ImageId image_id) { - boost::container::small_vector aliased_images; - ImageBase& image = slot_images[image_id]; - u64 most_recent_tick = image.modification_tick; - for (const AliasedImage& aliased : image.aliased_images) { - ImageBase& aliased_image = slot_images[aliased.id]; - if (image.modification_tick < aliased_image.modification_tick) { - most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); - aliased_images.push_back(&aliased); - } - } - if (aliased_images.empty()) { - return; - } - image.modification_tick = most_recent_tick; - std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) { - const ImageBase& lhs_image = slot_images[lhs->id]; - const ImageBase& rhs_image = slot_images[rhs->id]; - return lhs_image.modification_tick < rhs_image.modification_tick; - }); - for (const AliasedImage* const aliased : aliased_images) { - CopyImage(image_id, aliased->id, aliased->copies); - } -} - -template -void TextureCache

::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) { - Image& image = slot_images[image_id]; - if (invalidate) { - image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified); - if (False(image.flags & ImageFlagBits::Tracked)) { - TrackImage(image, image_id); - } - } else { - RefreshContents(image, image_id); - SynchronizeAliases(image_id); - } - if (is_modification) { - MarkModification(image); - } - image.frame_tick = frame_tick; -} - -template -void TextureCache

::PrepareImageView(ImageViewId image_view_id, bool is_modification, - bool invalidate) { - if (!image_view_id) { - return; - } - const ImageViewBase& image_view = slot_image_views[image_view_id]; - if (image_view.IsBuffer()) { - return; - } - PrepareImage(image_view.image_id, is_modification, invalidate); -} - -template -void TextureCache

::CopyImage(ImageId dst_id, ImageId src_id, std::span copies) { - Image& dst = slot_images[dst_id]; - Image& src = slot_images[src_id]; - const auto dst_format_type = GetFormatType(dst.info.format); - const auto src_format_type = GetFormatType(src.info.format); - if (src_format_type == dst_format_type) { - if constexpr (HAS_EMULATED_COPIES) { - if (!runtime.CanImageBeCopied(dst, src)) { - return runtime.EmulateCopyImage(dst, src, copies); - } - } - return runtime.CopyImage(dst, src, copies); - } - UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D); - UNIMPLEMENTED_IF(src.info.type != ImageType::e2D); - for (const ImageCopy& copy : copies) { - UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1); - UNIMPLEMENTED_IF(copy.src_subresource.num_layers != 1); - UNIMPLEMENTED_IF(copy.src_offset != Offset3D{}); - UNIMPLEMENTED_IF(copy.dst_offset != Offset3D{}); - - const SubresourceBase dst_base{ - .level = copy.dst_subresource.base_level, - .layer = copy.dst_subresource.base_layer, - }; - const SubresourceBase src_base{ - .level = copy.src_subresource.base_level, - .layer = copy.src_subresource.base_layer, - }; - const SubresourceExtent dst_extent{.levels = 1, .layers = 1}; - const SubresourceExtent src_extent{.levels = 1, .layers = 1}; - const SubresourceRange dst_range{.base = dst_base, .extent = dst_extent}; - const SubresourceRange src_range{.base = src_base, .extent = src_extent}; - const ImageViewInfo dst_view_info(ImageViewType::e2D, dst.info.format, dst_range); - const ImageViewInfo src_view_info(ImageViewType::e2D, src.info.format, src_range); - const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); - Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; - const ImageViewId src_view_id = FindOrEmplaceImageView(src_id, src_view_info); - ImageView& dst_view = slot_image_views[dst_view_id]; - ImageView& src_view = slot_image_views[src_view_id]; - [[maybe_unused]] const Extent3D expected_size{ - .width = std::min(dst_view.size.width, src_view.size.width), - .height = std::min(dst_view.size.height, src_view.size.height), - .depth = std::min(dst_view.size.depth, src_view.size.depth), - }; - UNIMPLEMENTED_IF(copy.extent != expected_size); - - runtime.ConvertImage(dst_framebuffer, dst_view, src_view); - } -} - -template -void TextureCache

::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id) { - if (*old_id == new_id) { - return; - } - if (*old_id) { - const ImageViewBase& old_view = slot_image_views[*old_id]; - if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) { - uncommitted_downloads.push_back(old_view.image_id); - } - } - *old_id = new_id; -} - -template -std::pair TextureCache

::RenderTargetFromImage( - ImageId image_id, const ImageViewInfo& view_info) { - const ImageViewId view_id = FindOrEmplaceImageView(image_id, view_info); - const ImageBase& image = slot_images[image_id]; - const bool is_color = GetFormatType(image.info.format) == SurfaceType::ColorTexture; - const ImageViewId color_view_id = is_color ? view_id : ImageViewId{}; - const ImageViewId depth_view_id = is_color ? ImageViewId{} : view_id; - const Extent3D extent = MipSize(image.info.size, view_info.range.base.level); - const u32 num_samples = image.info.num_samples; - const auto [samples_x, samples_y] = SamplesLog2(num_samples); - const FramebufferId framebuffer_id = GetFramebufferId(RenderTargets{ - .color_buffer_ids = {color_view_id}, - .depth_buffer_id = depth_view_id, - .size = {extent.width >> samples_x, extent.height >> samples_y}, - }); - return {framebuffer_id, view_id}; -} - -template -bool TextureCache

::IsFullClear(ImageViewId id) { - if (!id) { - return true; - } - const ImageViewBase& image_view = slot_image_views[id]; - const ImageBase& image = slot_images[image_view.image_id]; - const Extent3D size = image_view.size; - const auto& regs = maxwell3d.regs; - const auto& scissor = regs.scissor_test[0]; - if (image.info.resources.levels > 1 || image.info.resources.layers > 1) { - // Images with multiple resources can't be cleared in a single call - return false; - } - if (regs.clear_flags.scissor == 0) { - // If scissor testing is disabled, the clear is always full - return true; - } - // Make sure the clear covers all texels in the subresource - return scissor.min_x == 0 && scissor.min_y == 0 && scissor.max_x >= size.width && - scissor.max_y >= size.height; -} - -} // namespace VideoCommon -- cgit v1.2.3 From 02e98f6c93e4c6b360934e154f453d5b01394104 Mon Sep 17 00:00:00 2001 From: yzct12345 <87620833+yzct12345@users.noreply.github.com> Date: Thu, 5 Aug 2021 20:52:12 +0000 Subject: texture_cache: Don't change copyright year --- src/video_core/renderer_opengl/gl_texture_cache.cpp | 2 +- src/video_core/renderer_vulkan/vk_texture_cache.cpp | 2 +- src/video_core/texture_cache/texture_cache.h | 2 +- src/video_core/texture_cache/texture_cache_base.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 26b423f5e..53848ca5e 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -1,4 +1,4 @@ -// Copyright 2021 yuzu Emulator Project +// Copyright 2019 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index b0496556d..8e029bcb3 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1,4 +1,4 @@ -// Copyright 2021 yuzu Emulator Project +// Copyright 2019 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 5884fa16e..d7fe87514 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1,4 +1,4 @@ -// Copyright 2021 yuzu Emulator Project +// Copyright 2019 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index a4f6e9422..09474b823 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -1,4 +1,4 @@ -// Copyright 2021 yuzu Emulator Project +// Copyright 2019 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -- cgit v1.2.3 From e80323b8b070c42a8846a23200f9a36787f790d8 Mon Sep 17 00:00:00 2001 From: yzct12345 <87620833+yzct12345@users.noreply.github.com> Date: Sat, 7 Aug 2021 01:27:47 +0000 Subject: texture_cache: Address ameerj's review --- src/video_core/renderer_opengl/gl_texture_cache.cpp | 1 + src/video_core/renderer_vulkan/vk_texture_cache.cpp | 2 ++ src/video_core/texture_cache/texture_cache.h | 2 ++ src/video_core/texture_cache/texture_cache_base.h | 10 ---------- 4 files changed, 5 insertions(+), 10 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 53848ca5e..b0aee6cc1 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -18,6 +18,7 @@ #include "video_core/renderer_opengl/maxwell_to_gl.h" #include "video_core/renderer_opengl/util_shaders.h" #include "video_core/surface.h" +#include "video_core/texture_cache/formatter.h" #include "video_core/texture_cache/samples_helper.h" namespace OpenGL { diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 8e029bcb3..8f4df7122 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -19,6 +19,8 @@ #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" +#include "video_core/texture_cache/formatter.h" +#include "video_core/texture_cache/samples_helper.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index d7fe87514..40953afb7 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -4,6 +4,8 @@ #pragma once +#include "video_core/dirty_flags.h" +#include "video_core/texture_cache/samples_helper.h" #include "video_core/texture_cache/texture_cache_base.h" namespace VideoCommon { diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 09474b823..b72448c0d 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -4,12 +4,8 @@ #pragma once -#include #include -#include -#include #include -#include #include #include #include @@ -22,11 +18,9 @@ #include "common/alignment.h" #include "common/common_types.h" #include "common/literals.h" -#include "common/logging/log.h" #include "common/settings.h" #include "video_core/compatible_formats.h" #include "video_core/delayed_destruction_ring.h" -#include "video_core/dirty_flags.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" @@ -34,14 +28,10 @@ #include "video_core/rasterizer_interface.h" #include "video_core/surface.h" #include "video_core/texture_cache/descriptor_table.h" -#include "video_core/texture_cache/format_lookup_table.h" -#include "video_core/texture_cache/formatter.h" #include "video_core/texture_cache/image_base.h" #include "video_core/texture_cache/image_info.h" -#include "video_core/texture_cache/image_view_base.h" #include "video_core/texture_cache/image_view_info.h" #include "video_core/texture_cache/render_targets.h" -#include "video_core/texture_cache/samples_helper.h" #include "video_core/texture_cache/slot_vector.h" #include "video_core/texture_cache/types.h" #include "video_core/texture_cache/util.h" -- cgit v1.2.3