aboutsummaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt1
-rw-r--r--src/video_core/buffer_cache/buffer_base.h11
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h163
-rw-r--r--src/video_core/command_classes/codecs/codec.h8
-rw-r--r--src/video_core/command_classes/vic.cpp25
-rw-r--r--src/video_core/engines/maxwell_3d.h1
-rw-r--r--src/video_core/host_shaders/astc_decoder.comp46
-rw-r--r--src/video_core/rasterizer_interface.h4
-rw-r--r--src/video_core/renderer_opengl/gl_arb_decompiler.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h2
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp6
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h2
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.h5
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h9
-rw-r--r--src/video_core/renderer_opengl/util_shaders.cpp31
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp5
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.cpp107
-rw-r--r--src/video_core/renderer_vulkan/vk_master_semaphore.cpp18
-rw-r--r--src/video_core/renderer_vulkan/vk_master_semaphore.h9
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp8
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.cpp5
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp6
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h11
-rw-r--r--src/video_core/surface.cpp7
-rw-r--r--src/video_core/surface.h2
-rw-r--r--src/video_core/texture_cache/image_base.cpp37
-rw-r--r--src/video_core/texture_cache/image_base.h12
-rw-r--r--src/video_core/texture_cache/slot_vector.h70
-rw-r--r--src/video_core/texture_cache/texture_cache.h153
-rw-r--r--src/video_core/texture_cache/util.cpp2
-rw-r--r--src/video_core/textures/astc.cpp10
-rw-r--r--src/video_core/textures/astc.h11
-rw-r--r--src/video_core/vulkan_common/nsight_aftermath_tracker.cpp2
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp12
-rw-r--r--src/video_core/vulkan_common/vulkan_device.h38
-rw-r--r--src/video_core/vulkan_common/vulkan_memory_allocator.cpp22
-rw-r--r--src/video_core/vulkan_common/vulkan_memory_allocator.h5
40 files changed, 601 insertions, 273 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index f9454bbaa..e31eb30c0 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -293,6 +293,7 @@ endif()
if (MSVC)
target_compile_options(video_core PRIVATE
/we4267 # 'var' : conversion from 'size_t' to 'type', possible loss of data
+ /we4244 # 'var' : conversion from integer to 'type', possible loss of data
/we4456 # Declaration of 'identifier' hides previous local declaration
/we4457 # Declaration of 'identifier' hides function parameter
/we4458 # Declaration of 'identifier' hides class member
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h
index a39505903..b121d36a3 100644
--- a/src/video_core/buffer_cache/buffer_base.h
+++ b/src/video_core/buffer_cache/buffer_base.h
@@ -256,6 +256,16 @@ public:
stream_score += score;
}
+ /// Sets the new frame tick
+ void SetFrameTick(u64 new_frame_tick) noexcept {
+ frame_tick = new_frame_tick;
+ }
+
+ /// Returns the new frame tick
+ [[nodiscard]] u64 FrameTick() const noexcept {
+ return frame_tick;
+ }
+
/// Returns the likeliness of this being a stream buffer
[[nodiscard]] int StreamScore() const noexcept {
return stream_score;
@@ -586,6 +596,7 @@ private:
RasterizerInterface* rasterizer = nullptr;
VAddr cpu_addr = 0;
Words words;
+ u64 frame_tick = 0;
BufferFlagBits flags{};
int stream_score = 0;
};
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index d371b842f..cad7f902d 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -18,6 +18,7 @@
#include "common/common_types.h"
#include "common/div_ceil.h"
+#include "common/literals.h"
#include "common/microprofile.h"
#include "common/scope_exit.h"
#include "common/settings.h"
@@ -47,8 +48,11 @@ constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8;
constexpr u32 NUM_STORAGE_BUFFERS = 16;
constexpr u32 NUM_STAGES = 5;
+using namespace Common::Literals;
+
template <typename P>
class BufferCache {
+
// Page size for caching purposes.
// This is unrelated to the CPU page size and it can be changed as it seems optimal.
static constexpr u32 PAGE_BITS = 16;
@@ -65,6 +69,9 @@ class BufferCache {
static constexpr BufferId NULL_BUFFER_ID{0};
+ static constexpr u64 EXPECTED_MEMORY = 512_MiB;
+ static constexpr u64 CRITICAL_MEMORY = 1_GiB;
+
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using Runtime = typename P::Runtime;
@@ -92,7 +99,7 @@ class BufferCache {
};
public:
- static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = 4096;
+ static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB);
explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
Tegra::Engines::Maxwell3D& maxwell3d_,
@@ -188,6 +195,8 @@ private:
((cpu_addr + size) & ~Core::Memory::PAGE_MASK);
}
+ void RunGarbageCollector();
+
void BindHostIndexBuffer();
void BindHostVertexBuffers();
@@ -243,6 +252,8 @@ private:
template <bool insert>
void ChangeRegister(BufferId buffer_id);
+ void TouchBuffer(Buffer& buffer) const noexcept;
+
bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size);
bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size);
@@ -255,6 +266,10 @@ private:
void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span<BufferCopy> copies);
+ void DownloadBufferMemory(Buffer& buffer_id);
+
+ void DownloadBufferMemory(Buffer& buffer_id, VAddr cpu_addr, u64 size);
+
void DeleteBuffer(BufferId buffer_id);
void ReplaceBufferDownloads(BufferId old_buffer_id, BufferId new_buffer_id);
@@ -319,6 +334,10 @@ private:
size_t immediate_buffer_capacity = 0;
std::unique_ptr<u8[]> immediate_buffer_alloc;
+ typename SlotVector<Buffer>::Iterator deletion_iterator;
+ u64 frame_tick = 0;
+ u64 total_used_memory = 0;
+
std::array<BufferId, ((1ULL << 39) >> PAGE_BITS)> page_table;
};
@@ -332,6 +351,28 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} {
// Ensure the first slot is used for the null buffer
void(slot_buffers.insert(runtime, NullBufferParams{}));
+ deletion_iterator = slot_buffers.end();
+}
+
+template <class P>
+void BufferCache<P>::RunGarbageCollector() {
+ const bool aggressive_gc = total_used_memory >= CRITICAL_MEMORY;
+ const u64 ticks_to_destroy = aggressive_gc ? 60 : 120;
+ int num_iterations = aggressive_gc ? 64 : 32;
+ for (; num_iterations > 0; --num_iterations) {
+ if (deletion_iterator == slot_buffers.end()) {
+ deletion_iterator = slot_buffers.begin();
+ }
+ ++deletion_iterator;
+ if (deletion_iterator == slot_buffers.end()) {
+ break;
+ }
+ const auto [buffer_id, buffer] = *deletion_iterator;
+ if (buffer->FrameTick() + ticks_to_destroy < frame_tick) {
+ DownloadBufferMemory(*buffer);
+ DeleteBuffer(buffer_id);
+ }
+ }
}
template <class P>
@@ -349,6 +390,10 @@ void BufferCache<P>::TickFrame() {
const bool skip_preferred = hits * 256 < shots * 251;
uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0;
+ if (Settings::values.use_caches_gc.GetValue() && total_used_memory >= EXPECTED_MEMORY) {
+ RunGarbageCollector();
+ }
+ ++frame_tick;
delayed_destruction_ring.Tick();
}
@@ -372,48 +417,7 @@ void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) {
template <class P>
void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) {
ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) {
- boost::container::small_vector<BufferCopy, 1> copies;
- u64 total_size_bytes = 0;
- u64 largest_copy = 0;
- buffer.ForEachDownloadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) {
- copies.push_back(BufferCopy{
- .src_offset = range_offset,
- .dst_offset = total_size_bytes,
- .size = range_size,
- });
- total_size_bytes += range_size;
- largest_copy = std::max(largest_copy, range_size);
- });
- if (total_size_bytes == 0) {
- return;
- }
- MICROPROFILE_SCOPE(GPU_DownloadMemory);
-
- if constexpr (USE_MEMORY_MAPS) {
- auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
- const u8* const mapped_memory = download_staging.mapped_span.data();
- const std::span<BufferCopy> copies_span(copies.data(), copies.data() + copies.size());
- for (BufferCopy& copy : copies) {
- // Modify copies to have the staging offset in mind
- copy.dst_offset += download_staging.offset;
- }
- runtime.CopyBuffer(download_staging.buffer, buffer, copies_span);
- runtime.Finish();
- for (const BufferCopy& copy : copies) {
- const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
- // Undo the modified offset
- const u64 dst_offset = copy.dst_offset - download_staging.offset;
- const u8* copy_mapped_memory = mapped_memory + dst_offset;
- cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size);
- }
- } else {
- const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy);
- for (const BufferCopy& copy : copies) {
- buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size));
- const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
- cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size);
- }
- }
+ DownloadBufferMemory(buffer, cpu_addr, size);
});
}
@@ -640,6 +644,7 @@ bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
template <class P>
void BufferCache<P>::BindHostIndexBuffer() {
Buffer& buffer = slot_buffers[index_buffer.buffer_id];
+ TouchBuffer(buffer);
const u32 offset = buffer.Offset(index_buffer.cpu_addr);
const u32 size = index_buffer.size;
SynchronizeBuffer(buffer, index_buffer.cpu_addr, size);
@@ -658,6 +663,7 @@ void BufferCache<P>::BindHostVertexBuffers() {
for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
const Binding& binding = vertex_buffers[index];
Buffer& buffer = slot_buffers[binding.buffer_id];
+ TouchBuffer(buffer);
SynchronizeBuffer(buffer, binding.cpu_addr, binding.size);
if (!flags[Dirty::VertexBuffer0 + index]) {
continue;
@@ -693,6 +699,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
const VAddr cpu_addr = binding.cpu_addr;
const u32 size = binding.size;
Buffer& buffer = slot_buffers[binding.buffer_id];
+ TouchBuffer(buffer);
const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID &&
size <= uniform_buffer_skip_cache_size &&
!buffer.IsRegionGpuModified(cpu_addr, size);
@@ -744,6 +751,7 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) {
const Binding& binding = storage_buffers[stage][index];
Buffer& buffer = slot_buffers[binding.buffer_id];
+ TouchBuffer(buffer);
const u32 size = binding.size;
SynchronizeBuffer(buffer, binding.cpu_addr, size);
@@ -766,6 +774,7 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() {
for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
const Binding& binding = transform_feedback_buffers[index];
Buffer& buffer = slot_buffers[binding.buffer_id];
+ TouchBuffer(buffer);
const u32 size = binding.size;
SynchronizeBuffer(buffer, binding.cpu_addr, size);
@@ -784,6 +793,7 @@ void BufferCache<P>::BindHostComputeUniformBuffers() {
ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) {
const Binding& binding = compute_uniform_buffers[index];
Buffer& buffer = slot_buffers[binding.buffer_id];
+ TouchBuffer(buffer);
const u32 size = binding.size;
SynchronizeBuffer(buffer, binding.cpu_addr, size);
@@ -803,6 +813,7 @@ void BufferCache<P>::BindHostComputeStorageBuffers() {
ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) {
const Binding& binding = compute_storage_buffers[index];
Buffer& buffer = slot_buffers[binding.buffer_id];
+ TouchBuffer(buffer);
const u32 size = binding.size;
SynchronizeBuffer(buffer, binding.cpu_addr, size);
@@ -1101,6 +1112,7 @@ BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) {
const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size);
const u32 size = static_cast<u32>(overlap.end - overlap.begin);
const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size);
+ TouchBuffer(slot_buffers[new_buffer_id]);
for (const BufferId overlap_id : overlap.ids) {
JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
}
@@ -1122,8 +1134,14 @@ template <class P>
template <bool insert>
void BufferCache<P>::ChangeRegister(BufferId buffer_id) {
const Buffer& buffer = slot_buffers[buffer_id];
+ const auto size = buffer.SizeBytes();
+ if (insert) {
+ total_used_memory += Common::AlignUp(size, 1024);
+ } else {
+ total_used_memory -= Common::AlignUp(size, 1024);
+ }
const VAddr cpu_addr_begin = buffer.CpuAddr();
- const VAddr cpu_addr_end = cpu_addr_begin + buffer.SizeBytes();
+ const VAddr cpu_addr_end = cpu_addr_begin + size;
const u64 page_begin = cpu_addr_begin / PAGE_SIZE;
const u64 page_end = Common::DivCeil(cpu_addr_end, PAGE_SIZE);
for (u64 page = page_begin; page != page_end; ++page) {
@@ -1136,6 +1154,11 @@ void BufferCache<P>::ChangeRegister(BufferId buffer_id) {
}
template <class P>
+void BufferCache<P>::TouchBuffer(Buffer& buffer) const noexcept {
+ buffer.SetFrameTick(frame_tick);
+}
+
+template <class P>
bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) {
if (buffer.CpuAddr() == 0) {
return true;
@@ -1212,6 +1235,57 @@ void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes,
}
template <class P>
+void BufferCache<P>::DownloadBufferMemory(Buffer& buffer) {
+ DownloadBufferMemory(buffer, buffer.CpuAddr(), buffer.SizeBytes());
+}
+
+template <class P>
+void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 size) {
+ boost::container::small_vector<BufferCopy, 1> copies;
+ u64 total_size_bytes = 0;
+ u64 largest_copy = 0;
+ buffer.ForEachDownloadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) {
+ copies.push_back(BufferCopy{
+ .src_offset = range_offset,
+ .dst_offset = total_size_bytes,
+ .size = range_size,
+ });
+ total_size_bytes += range_size;
+ largest_copy = std::max(largest_copy, range_size);
+ });
+ if (total_size_bytes == 0) {
+ return;
+ }
+ MICROPROFILE_SCOPE(GPU_DownloadMemory);
+
+ if constexpr (USE_MEMORY_MAPS) {
+ auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
+ const u8* const mapped_memory = download_staging.mapped_span.data();
+ const std::span<BufferCopy> copies_span(copies.data(), copies.data() + copies.size());
+ for (BufferCopy& copy : copies) {
+ // Modify copies to have the staging offset in mind
+ copy.dst_offset += download_staging.offset;
+ }
+ runtime.CopyBuffer(download_staging.buffer, buffer, copies_span);
+ runtime.Finish();
+ for (const BufferCopy& copy : copies) {
+ const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
+ // Undo the modified offset
+ const u64 dst_offset = copy.dst_offset - download_staging.offset;
+ const u8* copy_mapped_memory = mapped_memory + dst_offset;
+ cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size);
+ }
+ } else {
+ const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy);
+ for (const BufferCopy& copy : copies) {
+ buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size));
+ const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
+ cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size);
+ }
+ }
+}
+
+template <class P>
void BufferCache<P>::DeleteBuffer(BufferId buffer_id) {
const auto scalar_replace = [buffer_id](Binding& binding) {
if (binding.buffer_id == buffer_id) {
@@ -1236,6 +1310,7 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id) {
Unregister(buffer_id);
delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id]));
+ slot_buffers.erase(buffer_id);
NotifyBufferDeletion();
}
diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h
index 8a2a6c360..3e135a2a6 100644
--- a/src/video_core/command_classes/codecs/codec.h
+++ b/src/video_core/command_classes/codecs/codec.h
@@ -14,10 +14,18 @@ extern "C" {
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wconversion"
#endif
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4242) // conversion from 'type' to 'type', possible loss of data
+#pragma warning(disable : 4244) // conversion from 'type' to 'type', possible loss of data
+#endif
#include <libavcodec/avcodec.h>
#if defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic pop
#endif
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
}
namespace Tegra {
diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp
index 0a8b82f2b..5faf8c0f1 100644
--- a/src/video_core/command_classes/vic.cpp
+++ b/src/video_core/command_classes/vic.cpp
@@ -3,7 +3,28 @@
// Refer to the license.txt file included.
#include <array>
+
+extern "C" {
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+#ifdef _MSC_VER
+#pragma warning(disable : 4244) // conversion from 'type' to 'type', possible loss of data
+#pragma warning(push)
+#endif
+#include <libswscale/swscale.h>
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+}
+
#include "common/assert.h"
+#include "common/logging/log.h"
+
#include "video_core/command_classes/nvdec.h"
#include "video_core/command_classes/vic.h"
#include "video_core/engines/maxwell_3d.h"
@@ -11,10 +32,6 @@
#include "video_core/memory_manager.h"
#include "video_core/textures/decoders.h"
-extern "C" {
-#include <libswscale/swscale.h>
-}
-
namespace Tegra {
Vic::Vic(GPU& gpu_, std::shared_ptr<Nvdec> nvdec_processor_)
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index ffed42a29..335383955 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -242,6 +242,7 @@ public:
return 4;
default:
UNREACHABLE();
+ return 1;
}
}
diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp
index eaba1b103..c37f15bfd 100644
--- a/src/video_core/host_shaders/astc_decoder.comp
+++ b/src/video_core/host_shaders/astc_decoder.comp
@@ -11,12 +11,8 @@
#define UNIFORM(n)
#define BINDING_INPUT_BUFFER 0
#define BINDING_ENC_BUFFER 1
-#define BINDING_6_TO_8_BUFFER 2
-#define BINDING_7_TO_8_BUFFER 3
-#define BINDING_8_TO_8_BUFFER 4
-#define BINDING_BYTE_TO_16_BUFFER 5
-#define BINDING_SWIZZLE_BUFFER 6
-#define BINDING_OUTPUT_IMAGE 7
+#define BINDING_SWIZZLE_BUFFER 2
+#define BINDING_OUTPUT_IMAGE 3
#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
@@ -26,10 +22,6 @@
#define BINDING_SWIZZLE_BUFFER 0
#define BINDING_INPUT_BUFFER 1
#define BINDING_ENC_BUFFER 2
-#define BINDING_6_TO_8_BUFFER 3
-#define BINDING_7_TO_8_BUFFER 4
-#define BINDING_8_TO_8_BUFFER 5
-#define BINDING_BYTE_TO_16_BUFFER 6
#define BINDING_OUTPUT_IMAGE 0
#endif
@@ -76,19 +68,6 @@ layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU32 {
layout(binding = BINDING_ENC_BUFFER, std430) readonly buffer EncodingsValues {
EncodingData encoding_values[];
};
-// ASTC Precompiled tables
-layout(binding = BINDING_6_TO_8_BUFFER, std430) readonly buffer REPLICATE_6_BIT_TO_8 {
- uint REPLICATE_6_BIT_TO_8_TABLE[];
-};
-layout(binding = BINDING_7_TO_8_BUFFER, std430) readonly buffer REPLICATE_7_BIT_TO_8 {
- uint REPLICATE_7_BIT_TO_8_TABLE[];
-};
-layout(binding = BINDING_8_TO_8_BUFFER, std430) readonly buffer REPLICATE_8_BIT_TO_8 {
- uint REPLICATE_8_BIT_TO_8_TABLE[];
-};
-layout(binding = BINDING_BYTE_TO_16_BUFFER, std430) readonly buffer REPLICATE_BYTE_TO_16 {
- uint REPLICATE_BYTE_TO_16_TABLE[];
-};
layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2DArray dest_image;
@@ -139,6 +118,19 @@ const uint REPLICATE_4_BIT_TO_6_TABLE[16] =
const uint REPLICATE_5_BIT_TO_6_TABLE[32] =
uint[](0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 33, 35, 37, 39, 41, 43, 45,
47, 49, 51, 53, 55, 57, 59, 61, 63);
+const uint REPLICATE_6_BIT_TO_8_TABLE[64] =
+ uint[](0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 65, 69, 73, 77, 81, 85, 89,
+ 93, 97, 101, 105, 109, 113, 117, 121, 125, 130, 134, 138, 142, 146, 150, 154, 158, 162,
+ 166, 170, 174, 178, 182, 186, 190, 195, 199, 203, 207, 211, 215, 219, 223, 227, 231, 235,
+ 239, 243, 247, 251, 255);
+const uint REPLICATE_7_BIT_TO_8_TABLE[128] =
+ uint[](0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44,
+ 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88,
+ 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126,
+ 129, 131, 133, 135, 137, 139, 141, 143, 145, 147, 149, 151, 153, 155, 157, 159, 161, 163,
+ 165, 167, 169, 171, 173, 175, 177, 179, 181, 183, 185, 187, 189, 191, 193, 195, 197, 199,
+ 201, 203, 205, 207, 209, 211, 213, 215, 217, 219, 221, 223, 225, 227, 229, 231, 233, 235,
+ 237, 239, 241, 243, 245, 247, 249, 251, 253, 255);
// Input ASTC texture globals
uint current_index = 0;
@@ -207,8 +199,7 @@ uint Replicate(uint val, uint num_bits, uint to_bit) {
}
uvec4 ReplicateByteTo16(uvec4 value) {
- return uvec4(REPLICATE_BYTE_TO_16_TABLE[value.x], REPLICATE_BYTE_TO_16_TABLE[value.y],
- REPLICATE_BYTE_TO_16_TABLE[value.z], REPLICATE_BYTE_TO_16_TABLE[value.w]);
+ return value * 0x101;
}
uint ReplicateBitTo7(uint value) {
@@ -236,7 +227,7 @@ uint FastReplicateTo8(uint value, uint num_bits) {
case 7:
return REPLICATE_7_BIT_TO_8_TABLE[value];
case 8:
- return REPLICATE_8_BIT_TO_8_TABLE[value];
+ return value;
}
return Replicate(value, num_bits, 8);
}
@@ -1327,6 +1318,9 @@ void main() {
offset += swizzle;
const ivec3 coord = ivec3(gl_GlobalInvocationID * uvec3(block_dims, 1));
+ if (any(greaterThanEqual(coord, imageSize(dest_image)))) {
+ return;
+ }
uint block_index =
pos.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + pos.y * gl_WorkGroupSize.x + pos.x;
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index f968b5b16..07939432f 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -4,10 +4,10 @@
#pragma once
-#include <atomic>
#include <functional>
#include <optional>
#include <span>
+#include <stop_token>
#include "common/common_types.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/gpu.h"
@@ -123,7 +123,7 @@ public:
virtual void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {}
/// Initialize disk cached resources for the game being emulated
- virtual void LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading,
+ virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
const DiskResourceLoadCallback& callback) {}
/// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver.
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
index 3e4d88c30..e8d8d2aa5 100644
--- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
@@ -454,7 +454,7 @@ private:
template <typename... Args>
void AddExpression(std::string_view text, Args&&... args) {
- shader_source += fmt::format(text, std::forward<Args>(args)...);
+ shader_source += fmt::format(fmt::runtime(text), std::forward<Args>(args)...);
}
template <typename... Args>
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index f87bb269b..eb8bdaa85 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -351,7 +351,7 @@ void RasterizerOpenGL::SetupShaders(bool is_indexed) {
}
}
-void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading,
+void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) {
shader_cache.LoadDiskCache(title_id, stop_loading, callback);
}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 76298517f..9995a563b 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -94,7 +94,7 @@ public:
const Tegra::Engines::Fermi2D::Config& copy_config) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override;
- void LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading,
+ void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) override;
/// Returns true when there are commands queued to the OpenGL server.
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 5cf7cd151..5a01c59ec 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -331,7 +331,7 @@ ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_,
ShaderCacheOpenGL::~ShaderCacheOpenGL() = default;
-void ShaderCacheOpenGL::LoadDiskCache(u64 title_id, const std::atomic_bool& stop_loading,
+void ShaderCacheOpenGL::LoadDiskCache(u64 title_id, std::stop_token stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) {
disk_cache.BindTitleID(title_id);
const std::optional transferable = disk_cache.LoadTransferable();
@@ -372,7 +372,7 @@ void ShaderCacheOpenGL::LoadDiskCache(u64 title_id, const std::atomic_bool& stop
const auto scope = context->Acquire();
for (std::size_t i = begin; i < end; ++i) {
- if (stop_loading) {
+ if (stop_loading.stop_requested()) {
return;
}
const auto& entry = (*transferable)[i];
@@ -435,7 +435,7 @@ void ShaderCacheOpenGL::LoadDiskCache(u64 title_id, const std::atomic_bool& stop
precompiled_cache_altered = true;
return;
}
- if (stop_loading) {
+ if (stop_loading.stop_requested()) {
return;
}
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 2aed0697e..b30308b6f 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -127,7 +127,7 @@ public:
~ShaderCacheOpenGL() override;
/// Loads disk cache for the current game
- void LoadDiskCache(u64 title_id, const std::atomic_bool& stop_loading,
+ void LoadDiskCache(u64 title_id, std::stop_token stop_loading,
const VideoCore::DiskResourceLoadCallback& callback);
/// Gets the current specified shader stage program
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index ac78d344c..9c28498e8 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -96,7 +96,7 @@ public:
// etc).
template <typename... Args>
void AddLine(std::string_view text, Args&&... args) {
- AddExpression(fmt::format(text, std::forward<Args>(args)...));
+ AddExpression(fmt::format(fmt::runtime(text), std::forward<Args>(args)...));
AddNewLine();
}
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h
index 6dbb6bfba..2e67922a6 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.h
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.h
@@ -12,12 +12,15 @@
#include <glad/glad.h>
#include "common/common_types.h"
+#include "common/literals.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
+using namespace Common::Literals;
+
class StreamBuffer {
- static constexpr size_t STREAM_BUFFER_SIZE = 64 * 1024 * 1024;
+ static constexpr size_t STREAM_BUFFER_SIZE = 64_MiB;
static constexpr size_t NUM_SYNCS = 16;
static constexpr size_t REGION_SIZE = STREAM_BUFFER_SIZE / NUM_SYNCS;
static constexpr size_t MAX_ALIGNMENT = 256;
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 9b4038615..23948feed 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -737,6 +737,8 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_,
}
}
+Image::~Image() = default;
+
void Image::UploadMemory(const ImageBufferMap& map,
std::span<const VideoCommon::BufferImageCopy> copies) {
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer);
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index df8be12ff..25fe61566 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -143,6 +143,14 @@ public:
explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
VAddr cpu_addr);
+ ~Image();
+
+ Image(const Image&) = delete;
+ Image& operator=(const Image&) = delete;
+
+ Image(Image&&) = default;
+ Image& operator=(Image&&) = default;
+
void UploadMemory(const ImageBufferMap& map,
std::span<const VideoCommon::BufferImageCopy> copies);
@@ -235,6 +243,7 @@ struct TextureCacheParams {
static constexpr bool ENABLE_VALIDATION = true;
static constexpr bool FRAMEBUFFER_BLITS = true;
static constexpr bool HAS_EMULATED_COPIES = true;
+ static constexpr bool HAS_DEVICE_MEMORY_INFO = false;
using Runtime = OpenGL::TextureCacheRuntime;
using Image = OpenGL::Image;
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp
index 47fddcb6e..abaf1ee6a 100644
--- a/src/video_core/renderer_opengl/util_shaders.cpp
+++ b/src/video_core/renderer_opengl/util_shaders.cpp
@@ -69,7 +69,8 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_)
swizzle_table_buffer.Create();
astc_buffer.Create();
glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0);
- glNamedBufferStorage(astc_buffer.handle, sizeof(ASTC_BUFFER_DATA), &ASTC_BUFFER_DATA, 0);
+ glNamedBufferStorage(astc_buffer.handle, sizeof(ASTC_ENCODINGS_VALUES), &ASTC_ENCODINGS_VALUES,
+ 0);
}
UtilShaders::~UtilShaders() = default;
@@ -79,12 +80,6 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
static constexpr GLuint BINDING_ENC_BUFFER = 2;
-
- static constexpr GLuint BINDING_6_TO_8_BUFFER = 3;
- static constexpr GLuint BINDING_7_TO_8_BUFFER = 4;
- static constexpr GLuint BINDING_8_TO_8_BUFFER = 5;
- static constexpr GLuint BINDING_BYTE_TO_16_BUFFER = 6;
-
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
const Extent2D tile_size{
@@ -93,21 +88,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
};
program_manager.BindHostCompute(astc_decoder_program.handle);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
- glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle,
- offsetof(AstcBufferData, encoding_values),
- sizeof(AstcBufferData::encoding_values));
- glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_6_TO_8_BUFFER, astc_buffer.handle,
- offsetof(AstcBufferData, replicate_6_to_8),
- sizeof(AstcBufferData::replicate_6_to_8));
- glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_7_TO_8_BUFFER, astc_buffer.handle,
- offsetof(AstcBufferData, replicate_7_to_8),
- sizeof(AstcBufferData::replicate_7_to_8));
- glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_8_TO_8_BUFFER, astc_buffer.handle,
- offsetof(AstcBufferData, replicate_8_to_8),
- sizeof(AstcBufferData::replicate_8_to_8));
- glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_BYTE_TO_16_BUFFER, astc_buffer.handle,
- offsetof(AstcBufferData, replicate_byte_to_16),
- sizeof(AstcBufferData::replicate_byte_to_16));
+ glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle);
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
glUniform2ui(1, tile_size.width, tile_size.height);
@@ -137,6 +118,12 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers);
}
+ // Precautionary barrier to ensure the compute shader is done decoding prior to texture access.
+ // GL_TEXTURE_FETCH_BARRIER_BIT and GL_SHADER_IMAGE_ACCESS_BARRIER_BIT are used in a separate
+ // glMemoryBarrier call by the texture cache runtime
+ glMemoryBarrier(GL_UNIFORM_BARRIER_BIT | GL_COMMAND_BARRIER_BIT | GL_PIXEL_BUFFER_BARRIER_BIT |
+ GL_TEXTURE_UPDATE_BARRIER_BIT | GL_BUFFER_UPDATE_BARRIER_BIT |
+ GL_SHADER_STORAGE_BARRIER_BIT | GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
program_manager.RestoreGuestCompute();
}
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 8cb65e588..0df4e1a1c 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -55,8 +55,9 @@ size_t BytesPerIndex(VkIndexType index_type) {
template <typename T>
std::array<T, 6> MakeQuadIndices(u32 quad, u32 first) {
std::array<T, 6> indices{0, 1, 2, 0, 2, 3};
- std::ranges::transform(indices, indices.begin(),
- [quad, first](u32 index) { return first + index + quad * 4; });
+ for (T& index : indices) {
+ index = static_cast<T>(first + index + quad * 4);
+ }
return indices;
}
} // Anonymous namespace
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index e11406e58..205cd3b05 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -30,19 +30,16 @@
namespace Vulkan {
using Tegra::Texture::SWIZZLE_TABLE;
-using Tegra::Texture::ASTC::EncodingsValues;
+using Tegra::Texture::ASTC::ASTC_ENCODINGS_VALUES;
using namespace Tegra::Texture::ASTC;
namespace {
constexpr u32 ASTC_BINDING_INPUT_BUFFER = 0;
constexpr u32 ASTC_BINDING_ENC_BUFFER = 1;
-constexpr u32 ASTC_BINDING_6_TO_8_BUFFER = 2;
-constexpr u32 ASTC_BINDING_7_TO_8_BUFFER = 3;
-constexpr u32 ASTC_BINDING_8_TO_8_BUFFER = 4;
-constexpr u32 ASTC_BINDING_BYTE_TO_16_BUFFER = 5;
-constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 6;
-constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 7;
+constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 2;
+constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 3;
+constexpr size_t ASTC_NUM_BINDINGS = 4;
VkPushConstantRange BuildComputePushConstantRange(std::size_t size) {
return {
@@ -71,7 +68,7 @@ std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBinding
}};
}
-std::array<VkDescriptorSetLayoutBinding, 8> BuildASTCDescriptorSetBindings() {
+std::array<VkDescriptorSetLayoutBinding, ASTC_NUM_BINDINGS> BuildASTCDescriptorSetBindings() {
return {{
{
.binding = ASTC_BINDING_INPUT_BUFFER,
@@ -88,34 +85,6 @@ std::array<VkDescriptorSetLayoutBinding, 8> BuildASTCDescriptorSetBindings() {
.pImmutableSamplers = nullptr,
},
{
- .binding = ASTC_BINDING_6_TO_8_BUFFER,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = nullptr,
- },
- {
- .binding = ASTC_BINDING_7_TO_8_BUFFER,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = nullptr,
- },
- {
- .binding = ASTC_BINDING_8_TO_8_BUFFER,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = nullptr,
- },
- {
- .binding = ASTC_BINDING_BYTE_TO_16_BUFFER,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = nullptr,
- },
- {
.binding = ASTC_BINDING_SWIZZLE_BUFFER,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
@@ -143,7 +112,8 @@ VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() {
};
}
-std::array<VkDescriptorUpdateTemplateEntryKHR, 8> BuildASTCPassDescriptorUpdateTemplateEntry() {
+std::array<VkDescriptorUpdateTemplateEntryKHR, ASTC_NUM_BINDINGS>
+BuildASTCPassDescriptorUpdateTemplateEntry() {
return {{
{
.dstBinding = ASTC_BINDING_INPUT_BUFFER,
@@ -162,38 +132,6 @@ std::array<VkDescriptorUpdateTemplateEntryKHR, 8> BuildASTCPassDescriptorUpdateT
.stride = sizeof(DescriptorUpdateEntry),
},
{
- .dstBinding = ASTC_BINDING_6_TO_8_BUFFER,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .offset = ASTC_BINDING_6_TO_8_BUFFER * sizeof(DescriptorUpdateEntry),
- .stride = sizeof(DescriptorUpdateEntry),
- },
- {
- .dstBinding = ASTC_BINDING_7_TO_8_BUFFER,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .offset = ASTC_BINDING_7_TO_8_BUFFER * sizeof(DescriptorUpdateEntry),
- .stride = sizeof(DescriptorUpdateEntry),
- },
- {
- .dstBinding = ASTC_BINDING_8_TO_8_BUFFER,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .offset = ASTC_BINDING_8_TO_8_BUFFER * sizeof(DescriptorUpdateEntry),
- .stride = sizeof(DescriptorUpdateEntry),
- },
- {
- .dstBinding = ASTC_BINDING_BYTE_TO_16_BUFFER,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .offset = ASTC_BINDING_BYTE_TO_16_BUFFER * sizeof(DescriptorUpdateEntry),
- .stride = sizeof(DescriptorUpdateEntry),
- },
- {
.dstBinding = ASTC_BINDING_SWIZZLE_BUFFER,
.dstArrayElement = 0,
.descriptorCount = 1,
@@ -222,15 +160,6 @@ struct AstcPushConstants {
u32 block_height_mask;
};
-struct AstcBufferData {
- decltype(SWIZZLE_TABLE) swizzle_table_buffer = SWIZZLE_TABLE;
- decltype(EncodingsValues) encoding_values = EncodingsValues;
- decltype(REPLICATE_6_BIT_TO_8_TABLE) replicate_6_to_8 = REPLICATE_6_BIT_TO_8_TABLE;
- decltype(REPLICATE_7_BIT_TO_8_TABLE) replicate_7_to_8 = REPLICATE_7_BIT_TO_8_TABLE;
- decltype(REPLICATE_8_BIT_TO_8_TABLE) replicate_8_to_8 = REPLICATE_8_BIT_TO_8_TABLE;
- decltype(REPLICATE_BYTE_TO_16_TABLE) replicate_byte_to_16 = REPLICATE_BYTE_TO_16_TABLE;
-} constexpr ASTC_BUFFER_DATA;
-
} // Anonymous namespace
VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool,
@@ -423,7 +352,7 @@ ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_,
ASTCDecoderPass::~ASTCDecoderPass() = default;
void ASTCDecoderPass::MakeDataBuffer() {
- constexpr size_t TOTAL_BUFFER_SIZE = sizeof(ASTC_BUFFER_DATA) + sizeof(SWIZZLE_TABLE);
+ constexpr size_t TOTAL_BUFFER_SIZE = sizeof(ASTC_ENCODINGS_VALUES) + sizeof(SWIZZLE_TABLE);
data_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
@@ -437,9 +366,10 @@ void ASTCDecoderPass::MakeDataBuffer() {
data_buffer_commit = memory_allocator.Commit(data_buffer, MemoryUsage::Upload);
const auto staging_ref = staging_buffer_pool.Request(TOTAL_BUFFER_SIZE, MemoryUsage::Upload);
- std::memcpy(staging_ref.mapped_span.data(), &ASTC_BUFFER_DATA, sizeof(ASTC_BUFFER_DATA));
+ std::memcpy(staging_ref.mapped_span.data(), &ASTC_ENCODINGS_VALUES,
+ sizeof(ASTC_ENCODINGS_VALUES));
// Tack on the swizzle table at the end of the buffer
- std::memcpy(staging_ref.mapped_span.data() + sizeof(ASTC_BUFFER_DATA), &SWIZZLE_TABLE,
+ std::memcpy(staging_ref.mapped_span.data() + sizeof(ASTC_ENCODINGS_VALUES), &SWIZZLE_TABLE,
sizeof(SWIZZLE_TABLE));
scheduler.Record([src = staging_ref.buffer, offset = staging_ref.offset, dst = *data_buffer,
@@ -509,18 +439,8 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(map.buffer, input_offset,
image.guest_size_bytes - swizzle.buffer_offset);
- update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, encoding_values),
- sizeof(AstcBufferData::encoding_values));
- update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_6_to_8),
- sizeof(AstcBufferData::replicate_6_to_8));
- update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_7_to_8),
- sizeof(AstcBufferData::replicate_7_to_8));
- update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_8_to_8),
- sizeof(AstcBufferData::replicate_8_to_8));
- update_descriptor_queue.AddBuffer(*data_buffer,
- offsetof(AstcBufferData, replicate_byte_to_16),
- sizeof(AstcBufferData::replicate_byte_to_16));
- update_descriptor_queue.AddBuffer(*data_buffer, sizeof(AstcBufferData),
+ update_descriptor_queue.AddBuffer(*data_buffer, 0, sizeof(ASTC_ENCODINGS_VALUES));
+ update_descriptor_queue.AddBuffer(*data_buffer, sizeof(ASTC_ENCODINGS_VALUES),
sizeof(SWIZZLE_TABLE));
update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level));
@@ -569,6 +489,7 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, image_barrier);
});
+ scheduler.Finish();
}
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
index db78ce3d9..6852c11b0 100644
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
@@ -2,8 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include <atomic>
-#include <chrono>
+#include <thread>
#include "common/settings.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
@@ -12,8 +11,6 @@
namespace Vulkan {
-using namespace std::chrono_literals;
-
MasterSemaphore::MasterSemaphore(const Device& device) {
static constexpr VkSemaphoreTypeCreateInfoKHR semaphore_type_ci{
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO_KHR,
@@ -34,9 +31,9 @@ MasterSemaphore::MasterSemaphore(const Device& device) {
// Validation layers have a bug where they fail to track resource usage when using timeline
// semaphores and synchronizing with GetSemaphoreCounterValueKHR. To workaround this issue, have
// a separate thread waiting for each timeline semaphore value.
- debug_thread = std::thread([this] {
+ debug_thread = std::jthread([this](std::stop_token stop_token) {
u64 counter = 0;
- while (!shutdown) {
+ while (!stop_token.stop_requested()) {
if (semaphore.Wait(counter, 10'000'000)) {
++counter;
}
@@ -44,13 +41,6 @@ MasterSemaphore::MasterSemaphore(const Device& device) {
});
}
-MasterSemaphore::~MasterSemaphore() {
- shutdown = true;
-
- // This thread might not be started
- if (debug_thread.joinable()) {
- debug_thread.join();
- }
-}
+MasterSemaphore::~MasterSemaphore() = default;
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h
index 4b6d64daa..ee3cd35d0 100644
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.h
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h
@@ -65,11 +65,10 @@ public:
}
private:
- vk::Semaphore semaphore; ///< Timeline semaphore.
- std::atomic<u64> gpu_tick{0}; ///< Current known GPU tick.
- std::atomic<u64> current_tick{1}; ///< Current logical tick.
- std::atomic<bool> shutdown{false}; ///< True when the object is being destroyed.
- std::thread debug_thread; ///< Debug thread to workaround validation layer bugs.
+ vk::Semaphore semaphore; ///< Timeline semaphore.
+ std::atomic<u64> gpu_tick{0}; ///< Current known GPU tick.
+ std::atomic<u64> current_tick{1}; ///< Current logical tick.
+ std::jthread debug_thread; ///< Debug thread to workaround validation layer bugs.
};
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
index 7a1232497..0412b5234 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -12,6 +12,7 @@
#include "common/assert.h"
#include "common/bit_util.h"
#include "common/common_types.h"
+#include "common/literals.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/vulkan_common/vulkan_device.h"
@@ -19,12 +20,15 @@
namespace Vulkan {
namespace {
+
+using namespace Common::Literals;
+
// Maximum potential alignment of a Vulkan buffer
constexpr VkDeviceSize MAX_ALIGNMENT = 256;
// Maximum size to put elements in the stream buffer
-constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8 * 1024 * 1024;
+constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8_MiB;
// Stream buffer size in bytes
-constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
+constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128_MiB;
constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS;
constexpr VkMemoryPropertyFlags HOST_FLAGS =
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
index a09fe084e..7b4875d0e 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
@@ -10,6 +10,7 @@
#include "common/alignment.h"
#include "common/assert.h"
+#include "common/literals.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/vulkan_common/vulkan_device.h"
@@ -19,6 +20,8 @@ namespace Vulkan {
namespace {
+using namespace Common::Literals;
+
constexpr VkBufferUsageFlags BUFFER_USAGE =
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
@@ -26,7 +29,7 @@ constexpr VkBufferUsageFlags BUFFER_USAGE =
constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
-constexpr u64 PREFERRED_STREAM_BUFFER_SIZE = 256 * 1024 * 1024;
+constexpr u64 PREFERRED_STREAM_BUFFER_SIZE = 256_MiB;
/// Find a memory type with the passed requirements
std::optional<u32> FindMemoryType(const VkPhysicalDeviceMemoryProperties& properties,
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 52860b4cf..a2ab4d1ee 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -818,6 +818,10 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
});
}
+u64 TextureCacheRuntime::GetDeviceLocalMemory() const {
+ return device.GetDeviceLocalMemory();
+}
+
Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_addr_,
VAddr cpu_addr_)
: VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler},
@@ -876,6 +880,8 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_
}
}
+Image::~Image() = default;
+
void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
// TODO: Move this to another API
scheduler->RequestOutsideRenderPassOperationContext();
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 4a57d378b..172bcdf98 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -97,6 +97,8 @@ struct TextureCacheRuntime {
// All known Vulkan drivers can natively handle BGR textures
return true;
}
+
+ u64 GetDeviceLocalMemory() const;
};
class Image : public VideoCommon::ImageBase {
@@ -104,6 +106,14 @@ public:
explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
VAddr cpu_addr);
+ ~Image();
+
+ Image(const Image&) = delete;
+ Image& operator=(const Image&) = delete;
+
+ Image(Image&&) = default;
+ Image& operator=(Image&&) = default;
+
void UploadMemory(const StagingBufferRef& map,
std::span<const VideoCommon::BufferImageCopy> copies);
@@ -257,6 +267,7 @@ struct TextureCacheParams {
static constexpr bool ENABLE_VALIDATION = true;
static constexpr bool FRAMEBUFFER_BLITS = false;
static constexpr bool HAS_EMULATED_COPIES = false;
+ static constexpr bool HAS_DEVICE_MEMORY_INFO = true;
using Runtime = Vulkan::TextureCacheRuntime;
using Image = Vulkan::Image;
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index 6308aef94..eb1746265 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -283,4 +283,11 @@ std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) {
return {DefaultBlockWidth(format), DefaultBlockHeight(format)};
}
+u64 EstimatedDecompressedSize(u64 base_size, PixelFormat format) {
+ constexpr u64 RGBA8_PIXEL_SIZE = 4;
+ const u64 base_block_size = static_cast<u64>(DefaultBlockWidth(format)) *
+ static_cast<u64>(DefaultBlockHeight(format)) * RGBA8_PIXEL_SIZE;
+ return (base_size * base_block_size) / BytesPerBlock(format);
+}
+
} // namespace VideoCore::Surface
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index c40ab89d0..1503db81f 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -462,4 +462,6 @@ bool IsPixelFormatSRGB(PixelFormat format);
std::pair<u32, u32> GetASTCBlockSize(PixelFormat format);
+u64 EstimatedDecompressedSize(u64 base_size, PixelFormat format);
+
} // namespace VideoCore::Surface
diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp
index 9914926b3..ad69d32d1 100644
--- a/src/video_core/texture_cache/image_base.cpp
+++ b/src/video_core/texture_cache/image_base.cpp
@@ -113,6 +113,43 @@ void ImageBase::InsertView(const ImageViewInfo& view_info, ImageViewId image_vie
image_view_ids.push_back(image_view_id);
}
+bool ImageBase::IsSafeDownload() const noexcept {
+ // Skip images that were not modified from the GPU
+ if (False(flags & ImageFlagBits::GpuModified)) {
+ return false;
+ }
+ // Skip images that .are. modified from the CPU
+ // We don't want to write sensitive data from the guest
+ if (True(flags & ImageFlagBits::CpuModified)) {
+ return false;
+ }
+ if (info.num_samples > 1) {
+ LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented");
+ return false;
+ }
+ return true;
+}
+
+void ImageBase::CheckBadOverlapState() {
+ if (False(flags & ImageFlagBits::BadOverlap)) {
+ return;
+ }
+ if (!overlapping_images.empty()) {
+ return;
+ }
+ flags &= ~ImageFlagBits::BadOverlap;
+}
+
+void ImageBase::CheckAliasState() {
+ if (False(flags & ImageFlagBits::Alias)) {
+ return;
+ }
+ if (!aliased_images.empty()) {
+ return;
+ }
+ flags &= ~ImageFlagBits::Alias;
+}
+
void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) {
static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format;
ASSERT(lhs.info.type == rhs.info.type);
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h
index b7f3b7e43..e326cab71 100644
--- a/src/video_core/texture_cache/image_base.h
+++ b/src/video_core/texture_cache/image_base.h
@@ -25,6 +25,12 @@ enum class ImageFlagBits : u32 {
Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted
Registered = 1 << 6, ///< True when the image is registered
Picked = 1 << 7, ///< Temporary flag to mark the image as picked
+
+ // Garbage Collection Flags
+ BadOverlap = 1 << 8, ///< This image overlaps other but doesn't fit, has higher
+ ///< garbage collection priority
+ Alias = 1 << 9, ///< This image has aliases and has priority on garbage
+ ///< collection
};
DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
@@ -44,11 +50,16 @@ struct ImageBase {
void InsertView(const ImageViewInfo& view_info, ImageViewId image_view_id);
+ [[nodiscard]] bool IsSafeDownload() const noexcept;
+
[[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept {
const VAddr overlap_end = overlap_cpu_addr + overlap_size;
return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end;
}
+ void CheckBadOverlapState();
+ void CheckAliasState();
+
ImageInfo info;
u32 guest_size_bytes = 0;
@@ -72,6 +83,7 @@ struct ImageBase {
std::vector<SubresourceBase> slice_subresources;
std::vector<AliasedImage> aliased_images;
+ std::vector<ImageId> overlapping_images;
};
struct ImageAllocBase {
diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h
index eae3be6ea..6180b8c0e 100644
--- a/src/video_core/texture_cache/slot_vector.h
+++ b/src/video_core/texture_cache/slot_vector.h
@@ -5,6 +5,7 @@
#pragma once
#include <array>
+#include <bit>
#include <concepts>
#include <numeric>
#include <type_traits>
@@ -32,6 +33,60 @@ template <class T>
requires std::is_nothrow_move_assignable_v<T>&&
std::is_nothrow_move_constructible_v<T> class SlotVector {
public:
+ class Iterator {
+ friend SlotVector<T>;
+
+ public:
+ constexpr Iterator() = default;
+
+ Iterator& operator++() noexcept {
+ const u64* const bitset = slot_vector->stored_bitset.data();
+ const u32 size = static_cast<u32>(slot_vector->stored_bitset.size()) * 64;
+ if (id.index < size) {
+ do {
+ ++id.index;
+ } while (id.index < size && !IsValid(bitset));
+ if (id.index == size) {
+ id.index = SlotId::INVALID_INDEX;
+ }
+ }
+ return *this;
+ }
+
+ Iterator operator++(int) noexcept {
+ const Iterator copy{*this};
+ ++*this;
+ return copy;
+ }
+
+ bool operator==(const Iterator& other) const noexcept {
+ return id.index == other.id.index;
+ }
+
+ bool operator!=(const Iterator& other) const noexcept {
+ return id.index != other.id.index;
+ }
+
+ std::pair<SlotId, T*> operator*() const noexcept {
+ return {id, std::addressof((*slot_vector)[id])};
+ }
+
+ T* operator->() const noexcept {
+ return std::addressof((*slot_vector)[id]);
+ }
+
+ private:
+ Iterator(SlotVector<T>* slot_vector_, SlotId id_) noexcept
+ : slot_vector{slot_vector_}, id{id_} {}
+
+ bool IsValid(const u64* bitset) const noexcept {
+ return ((bitset[id.index / 64] >> (id.index % 64)) & 1) != 0;
+ }
+
+ SlotVector<T>* slot_vector;
+ SlotId id;
+ };
+
~SlotVector() noexcept {
size_t index = 0;
for (u64 bits : stored_bitset) {
@@ -70,6 +125,20 @@ public:
ResetStorageBit(id.index);
}
+ [[nodiscard]] Iterator begin() noexcept {
+ const auto it = std::ranges::find_if(stored_bitset, [](u64 value) { return value != 0; });
+ if (it == stored_bitset.end()) {
+ return end();
+ }
+ const u32 word_index = static_cast<u32>(std::distance(it, stored_bitset.begin()));
+ const SlotId first_id{word_index * 64 + static_cast<u32>(std::countr_zero(*it))};
+ return Iterator(this, first_id);
+ }
+
+ [[nodiscard]] Iterator end() noexcept {
+ return Iterator(this, SlotId{SlotId::INVALID_INDEX});
+ }
+
private:
struct NonTrivialDummy {
NonTrivialDummy() noexcept {}
@@ -140,7 +209,6 @@ private:
Entry* values = nullptr;
size_t values_capacity = 0;
- size_t values_size = 0;
std::vector<u64> stored_bitset;
std::vector<u32> free_list;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 59b7c678b..c7cfd02b6 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -19,9 +19,10 @@
#include <boost/container/small_vector.hpp>
#include "common/alignment.h"
-#include "common/common_funcs.h"
#include "common/common_types.h"
+#include "common/literals.h"
#include "common/logging/log.h"
+#include "common/settings.h"
#include "video_core/compatible_formats.h"
#include "video_core/delayed_destruction_ring.h"
#include "video_core/dirty_flags.h"
@@ -57,6 +58,7 @@ using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::PixelFormatFromDepthFormat;
using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
using VideoCore::Surface::SurfaceType;
+using namespace Common::Literals;
template <class P>
class TextureCache {
@@ -69,12 +71,17 @@ class TextureCache {
static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS;
/// True when some copies have to be emulated
static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES;
+ /// True when the API can provide info about the memory of the device.
+ static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO;
/// Image view ID for null descriptors
static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0};
/// Sampler ID for bugged sampler ids
static constexpr SamplerId NULL_SAMPLER_ID{0};
+ static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB;
+ static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB;
+
using Runtime = typename P::Runtime;
using Image = typename P::Image;
using ImageAlloc = typename P::ImageAlloc;
@@ -197,6 +204,9 @@ private:
}
}
+ /// Runs the Garbage Collector.
+ void RunGarbageCollector();
+
/// Fills image_view_ids in the image views in indices
void FillImageViews(DescriptorTable<TICEntry>& table,
std::span<ImageViewId> cached_image_view_ids, std::span<const u32> indices,
@@ -333,6 +343,10 @@ private:
std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> page_table;
bool has_deleted_images = false;
+ u64 total_used_memory = 0;
+ u64 minimum_memory;
+ u64 expected_memory;
+ u64 critical_memory;
SlotVector<Image> slot_images;
SlotVector<ImageView> slot_image_views;
@@ -353,6 +367,7 @@ private:
u64 modification_tick = 0;
u64 frame_tick = 0;
+ typename SlotVector<Image>::Iterator deletion_iterator;
};
template <class P>
@@ -373,11 +388,94 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
// This way the null resource becomes a compile time constant
void(slot_image_views.insert(runtime, NullImageParams{}));
void(slot_samplers.insert(runtime, sampler_descriptor));
+
+ deletion_iterator = slot_images.begin();
+
+ if constexpr (HAS_DEVICE_MEMORY_INFO) {
+ const auto device_memory = runtime.GetDeviceLocalMemory();
+ const u64 possible_expected_memory = (device_memory * 3) / 10;
+ const u64 possible_critical_memory = (device_memory * 6) / 10;
+ expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY);
+ critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY);
+ minimum_memory = 0;
+ } else {
+ // on OGL we can be more conservatives as the driver takes care.
+ expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB;
+ critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB;
+ minimum_memory = expected_memory;
+ }
+}
+
+template <class P>
+void TextureCache<P>::RunGarbageCollector() {
+ const bool high_priority_mode = total_used_memory >= expected_memory;
+ const bool aggressive_mode = total_used_memory >= critical_memory;
+ const u64 ticks_to_destroy = high_priority_mode ? 60 : 100;
+ int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64);
+ for (; num_iterations > 0; --num_iterations) {
+ if (deletion_iterator == slot_images.end()) {
+ deletion_iterator = slot_images.begin();
+ if (deletion_iterator == slot_images.end()) {
+ break;
+ }
+ }
+ auto [image_id, image_tmp] = *deletion_iterator;
+ Image* image = image_tmp; // fix clang error.
+ const bool is_alias = True(image->flags & ImageFlagBits::Alias);
+ const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap);
+ const bool must_download = image->IsSafeDownload();
+ bool should_care = is_bad_overlap || is_alias || (high_priority_mode && !must_download);
+ const u64 ticks_needed =
+ is_bad_overlap
+ ? ticks_to_destroy >> 4
+ : ((should_care && aggressive_mode) ? ticks_to_destroy >> 1 : ticks_to_destroy);
+ should_care |= aggressive_mode;
+ if (should_care && image->frame_tick + ticks_needed < frame_tick) {
+ if (is_bad_overlap) {
+ const bool overlap_check = std::ranges::all_of(
+ image->overlapping_images, [&, image](const ImageId& overlap_id) {
+ auto& overlap = slot_images[overlap_id];
+ return overlap.frame_tick >= image->frame_tick;
+ });
+ if (!overlap_check) {
+ ++deletion_iterator;
+ continue;
+ }
+ }
+ if (!is_bad_overlap && must_download) {
+ const bool alias_check = std::ranges::none_of(
+ image->aliased_images, [&, image](const AliasedImage& alias) {
+ auto& alias_image = slot_images[alias.id];
+ return (alias_image.frame_tick < image->frame_tick) ||
+ (alias_image.modification_tick < image->modification_tick);
+ });
+
+ if (alias_check) {
+ auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes);
+ const auto copies = FullDownloadCopies(image->info);
+ image->DownloadMemory(map, copies);
+ runtime.Finish();
+ SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span);
+ }
+ }
+ if (True(image->flags & ImageFlagBits::Tracked)) {
+ UntrackImage(*image);
+ }
+ UnregisterImage(image_id);
+ DeleteImage(image_id);
+ if (is_bad_overlap) {
+ ++num_iterations;
+ }
+ }
+ ++deletion_iterator;
+ }
}
template <class P>
void TextureCache<P>::TickFrame() {
- // Tick sentenced resources in this order to ensure they are destroyed in the right order
+ if (Settings::values.use_caches_gc.GetValue() && total_used_memory > minimum_memory) {
+ RunGarbageCollector();
+ }
sentenced_images.Tick();
sentenced_framebuffers.Tick();
sentenced_image_view.Tick();
@@ -568,17 +666,7 @@ template <class P>
void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
std::vector<ImageId> images;
ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) {
- // Skip images that were not modified from the GPU
- if (False(image.flags & ImageFlagBits::GpuModified)) {
- return;
- }
- // Skip images that .are. modified from the CPU
- // We don't want to write sensitive data from the guest
- if (True(image.flags & ImageFlagBits::CpuModified)) {
- return;
- }
- if (image.info.num_samples > 1) {
- LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented");
+ if (!image.IsSafeDownload()) {
return;
}
image.flags &= ~ImageFlagBits::GpuModified;
@@ -967,6 +1055,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
std::vector<ImageId> overlap_ids;
std::vector<ImageId> left_aliased_ids;
std::vector<ImageId> right_aliased_ids;
+ std::vector<ImageId> bad_overlap_ids;
ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) {
if (info.type != overlap.info.type) {
return;
@@ -992,9 +1081,14 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
const ImageBase new_image_base(new_info, gpu_addr, cpu_addr);
if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) {
left_aliased_ids.push_back(overlap_id);
+ overlap.flags |= ImageFlagBits::Alias;
} else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options,
broken_views, native_bgr)) {
right_aliased_ids.push_back(overlap_id);
+ overlap.flags |= ImageFlagBits::Alias;
+ } else {
+ bad_overlap_ids.push_back(overlap_id);
+ overlap.flags |= ImageFlagBits::BadOverlap;
}
});
const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
@@ -1022,10 +1116,18 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
for (const ImageId aliased_id : right_aliased_ids) {
ImageBase& aliased = slot_images[aliased_id];
AddImageAlias(new_image_base, aliased, new_image_id, aliased_id);
+ new_image.flags |= ImageFlagBits::Alias;
}
for (const ImageId aliased_id : left_aliased_ids) {
ImageBase& aliased = slot_images[aliased_id];
AddImageAlias(aliased, new_image_base, aliased_id, new_image_id);
+ new_image.flags |= ImageFlagBits::Alias;
+ }
+ for (const ImageId aliased_id : bad_overlap_ids) {
+ ImageBase& aliased = slot_images[aliased_id];
+ aliased.overlapping_images.push_back(new_image_id);
+ new_image.overlapping_images.push_back(aliased_id);
+ new_image.flags |= ImageFlagBits::BadOverlap;
}
RegisterImage(new_image_id);
return new_image_id;
@@ -1195,6 +1297,13 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
image.flags |= ImageFlagBits::Registered;
ForEachPage(image.cpu_addr, image.guest_size_bytes,
[this, image_id](u64 page) { page_table[page].push_back(image_id); });
+ u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
+ if ((IsPixelFormatASTC(image.info.format) &&
+ True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
+ True(image.flags & ImageFlagBits::Converted)) {
+ tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
+ }
+ total_used_memory += Common::AlignUp(tentative_size, 1024);
}
template <class P>
@@ -1203,6 +1312,14 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
ASSERT_MSG(True(image.flags & ImageFlagBits::Registered),
"Trying to unregister an already registered image");
image.flags &= ~ImageFlagBits::Registered;
+ image.flags &= ~ImageFlagBits::BadOverlap;
+ u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
+ if ((IsPixelFormatASTC(image.info.format) &&
+ True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
+ True(image.flags & ImageFlagBits::Converted)) {
+ tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
+ }
+ total_used_memory -= Common::AlignUp(tentative_size, 1024);
ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) {
const auto page_it = page_table.find(page);
if (page_it == page_table.end()) {
@@ -1276,9 +1393,19 @@ void TextureCache<P>::DeleteImage(ImageId image_id) {
std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) {
return other_alias.id == image_id;
});
+ other_image.CheckAliasState();
ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}",
num_removed_aliases);
}
+ for (const ImageId overlap_id : image.overlapping_images) {
+ ImageBase& other_image = slot_images[overlap_id];
+ [[maybe_unused]] const size_t num_removed_overlaps = std::erase_if(
+ other_image.overlapping_images,
+ [image_id](const ImageId other_overlap_id) { return other_overlap_id == image_id; });
+ other_image.CheckBadOverlapState();
+ ASSERT_MSG(num_removed_overlaps == 1, "Invalid number of removed overlapps: {}",
+ num_removed_overlaps);
+ }
for (const ImageViewId image_view_id : image_view_ids) {
sentenced_image_view.Push(std::move(slot_image_views[image_view_id]));
slot_image_views.erase(image_view_id);
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index 6835fd747..4efe042b6 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -581,6 +581,8 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr
for (s32 layer = 0; layer < info.resources.layers; ++layer) {
const std::span<const u8> src = input.subspan(host_offset);
+ gpu_memory.ReadBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes());
+
SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height,
num_tiles.depth, block.height, block.depth);
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index 9b2177ebd..7b756ba41 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -269,7 +269,7 @@ static void DecodeQuintBlock(InputBitStream& bits, IntegerEncodedVector& result,
static void DecodeIntegerSequence(IntegerEncodedVector& result, InputBitStream& bits, u32 maxRange,
u32 nValues) {
// Determine encoding parameters
- IntegerEncodedValue val = EncodingsValues[maxRange];
+ IntegerEncodedValue val = ASTC_ENCODINGS_VALUES[maxRange];
// Start decoding
u32 nValsDecoded = 0;
@@ -310,7 +310,7 @@ struct TexelWeightParams {
nIdxs *= 2;
}
- return EncodingsValues[m_MaxWeight].GetBitLength(nIdxs);
+ return ASTC_ENCODINGS_VALUES[m_MaxWeight].GetBitLength(nIdxs);
}
u32 GetNumWeightValues() const {
@@ -551,6 +551,8 @@ static void FillError(std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) {
}
}
}
+
+static constexpr auto REPLICATE_BYTE_TO_16_TABLE = MakeReplicateTable<u32, 8, 16>();
static constexpr u32 ReplicateByteTo16(std::size_t value) {
return REPLICATE_BYTE_TO_16_TABLE[value];
}
@@ -753,12 +755,12 @@ static void DecodeColorValues(u32* out, std::span<u8> data, const u32* modes, co
// figure out the max value for each of them...
u32 range = 256;
while (--range > 0) {
- IntegerEncodedValue val = EncodingsValues[range];
+ IntegerEncodedValue val = ASTC_ENCODINGS_VALUES[range];
u32 bitLength = val.GetBitLength(nValues);
if (bitLength <= nBitsForColorData) {
// Find the smallest possible range that matches the given encoding
while (--range > 0) {
- IntegerEncodedValue newval = EncodingsValues[range];
+ IntegerEncodedValue newval = ASTC_ENCODINGS_VALUES[range];
if (!newval.MatchesEncoding(val)) {
break;
}
diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h
index c1c37dfe7..0229ae122 100644
--- a/src/video_core/textures/astc.h
+++ b/src/video_core/textures/astc.h
@@ -77,7 +77,7 @@ constexpr std::array<IntegerEncodedValue, 256> MakeEncodedValues() {
return encodings;
}
-constexpr std::array<IntegerEncodedValue, 256> EncodingsValues = MakeEncodedValues();
+constexpr std::array<IntegerEncodedValue, 256> ASTC_ENCODINGS_VALUES = MakeEncodedValues();
// Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)]
// is the same as [(num_bits - 1):0] and repeats all the way down.
@@ -116,19 +116,10 @@ constexpr auto MakeReplicateTable() {
return table;
}
-constexpr auto REPLICATE_BYTE_TO_16_TABLE = MakeReplicateTable<u32, 8, 16>();
constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable<u32, 6, 8>();
constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable<u32, 7, 8>();
constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable<u32, 8, 8>();
-struct AstcBufferData {
- decltype(EncodingsValues) encoding_values = EncodingsValues;
- decltype(REPLICATE_6_BIT_TO_8_TABLE) replicate_6_to_8 = REPLICATE_6_BIT_TO_8_TABLE;
- decltype(REPLICATE_7_BIT_TO_8_TABLE) replicate_7_to_8 = REPLICATE_7_BIT_TO_8_TABLE;
- decltype(REPLICATE_8_BIT_TO_8_TABLE) replicate_8_to_8 = REPLICATE_8_BIT_TO_8_TABLE;
- decltype(REPLICATE_BYTE_TO_16_TABLE) replicate_byte_to_16 = REPLICATE_BYTE_TO_16_TABLE;
-} constexpr ASTC_BUFFER_DATA;
-
void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
uint32_t block_width, uint32_t block_height, std::span<uint8_t> output);
diff --git a/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp
index f0ee76519..758c038ba 100644
--- a/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp
+++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp
@@ -50,7 +50,7 @@ NsightAftermathTracker::NsightAftermathTracker() {
}
dump_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::LogDir) / "gpucrash";
- void(Common::FS::RemoveDirRecursively(dump_dir));
+ Common::FS::RemoveDirRecursively(dump_dir);
if (!Common::FS::CreateDir(dump_dir)) {
LOG_ERROR(Render_Vulkan, "Failed to create Nsight Aftermath dump directory");
return;
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index c52cd246b..f214510da 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -408,6 +408,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
}
logical = vk::Device::Create(physical, queue_cis, extensions, first_next, dld);
+ CollectPhysicalMemoryInfo();
CollectTelemetryParameters();
CollectToolingInfo();
@@ -839,6 +840,17 @@ void Device::CollectTelemetryParameters() {
}
}
+void Device::CollectPhysicalMemoryInfo() {
+ const auto mem_properties = physical.GetMemoryProperties();
+ const size_t num_properties = mem_properties.memoryHeapCount;
+ device_access_memory = 0;
+ for (size_t element = 0; element < num_properties; ++element) {
+ if ((mem_properties.memoryHeaps[element].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) != 0) {
+ device_access_memory += mem_properties.memoryHeaps[element].size;
+ }
+ }
+}
+
void Device::CollectToolingInfo() {
if (!ext_tooling_info) {
return;
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index c5504467d..96c0f8c60 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -228,6 +228,10 @@ public:
return use_asynchronous_shaders;
}
+ u64 GetDeviceLocalMemory() const {
+ return device_access_memory;
+ }
+
private:
/// Checks if the physical device is suitable.
void CheckSuitability(bool requires_swapchain) const;
@@ -247,6 +251,9 @@ private:
/// Collects information about attached tools.
void CollectToolingInfo();
+ /// Collects information about the device's local memory.
+ void CollectPhysicalMemoryInfo();
+
/// Returns a list of queue initialization descriptors.
std::vector<VkDeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const;
@@ -260,21 +267,22 @@ private:
bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
FormatType format_type) const;
- VkInstance instance; ///< Vulkan instance.
- vk::DeviceDispatch dld; ///< Device function pointers.
- vk::PhysicalDevice physical; ///< Physical device.
- VkPhysicalDeviceProperties properties; ///< Device properties.
- vk::Device logical; ///< Logical device.
- vk::Queue graphics_queue; ///< Main graphics queue.
- vk::Queue present_queue; ///< Main present queue.
- u32 instance_version{}; ///< Vulkan onstance version.
- u32 graphics_family{}; ///< Main graphics queue family index.
- u32 present_family{}; ///< Main present queue family index.
- VkDriverIdKHR driver_id{}; ///< Driver ID.
- VkShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.ed
- bool is_optimal_astc_supported{}; ///< Support for native ASTC.
- bool is_float16_supported{}; ///< Support for float16 arithmetics.
- bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
+ VkInstance instance; ///< Vulkan instance.
+ vk::DeviceDispatch dld; ///< Device function pointers.
+ vk::PhysicalDevice physical; ///< Physical device.
+ VkPhysicalDeviceProperties properties; ///< Device properties.
+ vk::Device logical; ///< Logical device.
+ vk::Queue graphics_queue; ///< Main graphics queue.
+ vk::Queue present_queue; ///< Main present queue.
+ u32 instance_version{}; ///< Vulkan onstance version.
+ u32 graphics_family{}; ///< Main graphics queue family index.
+ u32 present_family{}; ///< Main present queue family index.
+ VkDriverIdKHR driver_id{}; ///< Driver ID.
+ VkShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.
+ u64 device_access_memory{}; ///< Total size of device local memory in bytes.
+ bool is_optimal_astc_supported{}; ///< Support for native ASTC.
+ bool is_float16_supported{}; ///< Support for float16 arithmetics.
+ bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
bool is_formatless_image_load_supported{}; ///< Support for shader image read without format.
bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images.
bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil.
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
index 5edd06ebc..aa173d19e 100644
--- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
+++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
@@ -69,10 +69,10 @@ constexpr VkExportMemoryAllocateInfo EXPORT_ALLOCATE_INFO{
class MemoryAllocation {
public:
- explicit MemoryAllocation(vk::DeviceMemory memory_, VkMemoryPropertyFlags properties,
- u64 allocation_size_, u32 type)
- : memory{std::move(memory_)}, allocation_size{allocation_size_}, property_flags{properties},
- shifted_memory_type{1U << type} {}
+ explicit MemoryAllocation(MemoryAllocator* const allocator_, vk::DeviceMemory memory_,
+ VkMemoryPropertyFlags properties, u64 allocation_size_, u32 type)
+ : allocator{allocator_}, memory{std::move(memory_)}, allocation_size{allocation_size_},
+ property_flags{properties}, shifted_memory_type{1U << type} {}
#if defined(_WIN32) || defined(__unix__)
~MemoryAllocation() {
@@ -106,6 +106,10 @@ public:
const auto it = std::ranges::find(commits, begin, &Range::begin);
ASSERT_MSG(it != commits.end(), "Invalid commit");
commits.erase(it);
+ if (commits.empty()) {
+ // Do not call any code involving 'this' after this call, the object will be destroyed
+ allocator->ReleaseMemory(this);
+ }
}
[[nodiscard]] std::span<u8> Map() {
@@ -171,6 +175,7 @@ private:
return candidate;
}
+ MemoryAllocator* const allocator; ///< Parent memory allocation.
const vk::DeviceMemory memory; ///< Vulkan memory allocation handler.
const u64 allocation_size; ///< Size of this allocation.
const VkMemoryPropertyFlags property_flags; ///< Vulkan memory property flags.
@@ -275,10 +280,17 @@ bool MemoryAllocator::TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask,
return false;
}
}
- allocations.push_back(std::make_unique<MemoryAllocation>(std::move(memory), flags, size, type));
+ allocations.push_back(
+ std::make_unique<MemoryAllocation>(this, std::move(memory), flags, size, type));
return true;
}
+void MemoryAllocator::ReleaseMemory(MemoryAllocation* alloc) {
+ const auto it = std::ranges::find(allocations, alloc, &std::unique_ptr<MemoryAllocation>::get);
+ ASSERT(it != allocations.end());
+ allocations.erase(it);
+}
+
std::optional<MemoryCommit> MemoryAllocator::TryCommit(const VkMemoryRequirements& requirements,
VkMemoryPropertyFlags flags) {
for (auto& allocation : allocations) {
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.h b/src/video_core/vulkan_common/vulkan_memory_allocator.h
index db12d02f4..b61e931e0 100644
--- a/src/video_core/vulkan_common/vulkan_memory_allocator.h
+++ b/src/video_core/vulkan_common/vulkan_memory_allocator.h
@@ -69,6 +69,8 @@ private:
/// Memory allocator container.
/// Allocates and releases memory allocations on demand.
class MemoryAllocator {
+ friend MemoryAllocation;
+
public:
/**
* Construct memory allocator
@@ -104,6 +106,9 @@ private:
/// Tries to allocate a chunk of memory.
bool TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size);
+ /// Releases a chunk of memory.
+ void ReleaseMemory(MemoryAllocation* alloc);
+
/// Tries to allocate a memory commit.
std::optional<MemoryCommit> TryCommit(const VkMemoryRequirements& requirements,
VkMemoryPropertyFlags flags);