From 090bc588e5498c6a2cea136d28bfe43354aa2096 Mon Sep 17 00:00:00 2001
From: ameerj <52414509+ameerj@users.noreply.github.com>
Date: Wed, 22 Feb 2023 00:26:07 -0500
Subject: texture_cache: Add async texture decoding
---
src/video_core/texture_cache/image_base.h | 3 +
src/video_core/texture_cache/texture_cache.h | 69 +++++++++++++++++++++++
src/video_core/texture_cache/texture_cache_base.h | 16 ++++++
3 files changed, 88 insertions(+)
(limited to 'src/video_core/texture_cache')
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h
index 620565684..e8fa592d2 100644
--- a/src/video_core/texture_cache/image_base.h
+++ b/src/video_core/texture_cache/image_base.h
@@ -38,6 +38,9 @@ enum class ImageFlagBits : u32 {
Rescaled = 1 << 13,
CheckingRescalable = 1 << 14,
IsRescalable = 1 << 15,
+
+ AsynchronousDecode = 1 << 16,
+ IsDecoding = 1 << 17, ///< Is currently being decoded asynchornously.
};
DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 3e2cbb0b0..4159bc796 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -85,6 +85,11 @@ void TextureCache
::RunGarbageCollector() {
}
--num_iterations;
auto& image = slot_images[image_id];
+ if (True(image.flags & ImageFlagBits::IsDecoding)) {
+ // This image is still being decoded, deleting it will invalidate the slot
+ // used by the async decoder thread.
+ return false;
+ }
const bool must_download =
image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap);
if (!high_priority_mode &&
@@ -133,6 +138,8 @@ void TextureCache
::TickFrame() {
sentenced_images.Tick();
sentenced_framebuffers.Tick();
sentenced_image_view.Tick();
+ TickAsyncDecode();
+
runtime.TickFrame();
critical_gc = 0;
++frame_tick;
@@ -777,6 +784,10 @@ void TextureCache
::RefreshContents(Image& image, ImageId image_id) {
LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented");
return;
}
+ if (True(image.flags & ImageFlagBits::AsynchronousDecode)) {
+ QueueAsyncDecode(image, image_id);
+ return;
+ }
auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image));
UploadImageContents(image, staging);
runtime.InsertUploadMemoryBarrier();
@@ -989,6 +1000,64 @@ u64 TextureCache
::GetScaledImageSizeBytes(const ImageBase& image) {
return fitted_size;
}
+template
+void TextureCache::QueueAsyncDecode(Image& image, ImageId image_id) {
+ UNIMPLEMENTED_IF(False(image.flags & ImageFlagBits::Converted));
+
+ image.flags |= ImageFlagBits::IsDecoding;
+ auto decode = std::make_unique();
+ auto* decode_ptr = decode.get();
+ decode->image_id = image_id;
+ async_decodes.push_back(std::move(decode));
+
+ Common::ScratchBuffer local_unswizzle_data_buffer(image.unswizzled_size_bytes);
+ const size_t guest_size_bytes = image.guest_size_bytes;
+ swizzle_data_buffer.resize_destructive(guest_size_bytes);
+ gpu_memory->ReadBlockUnsafe(image.gpu_addr, swizzle_data_buffer.data(), guest_size_bytes);
+ auto copies = UnswizzleImage(*gpu_memory, image.gpu_addr, image.info, swizzle_data_buffer,
+ local_unswizzle_data_buffer);
+ const size_t out_size = MapSizeBytes(image);
+
+ auto func = [out_size, copies, info = image.info,
+ input = std::move(local_unswizzle_data_buffer),
+ async_decode = decode_ptr]() mutable {
+ async_decode->decoded_data.resize_destructive(out_size);
+ std::span copies_span{copies.data(), copies.size()};
+ ConvertImage(input, info, async_decode->decoded_data, copies_span);
+
+ // TODO: Do we need this lock?
+ std::unique_lock lock{async_decode->mutex};
+ async_decode->copies = std::move(copies);
+ async_decode->complete = true;
+ };
+ texture_decode_worker.QueueWork(std::move(func));
+}
+
+template
+void TextureCache::TickAsyncDecode() {
+ bool has_uploads{};
+ auto i = async_decodes.begin();
+ while (i != async_decodes.end()) {
+ auto* async_decode = i->get();
+ std::unique_lock lock{async_decode->mutex};
+ if (!async_decode->complete) {
+ ++i;
+ continue;
+ }
+ Image& image = slot_images[async_decode->image_id];
+ auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image));
+ std::memcpy(staging.mapped_span.data(), async_decode->decoded_data.data(),
+ async_decode->decoded_data.size());
+ image.UploadMemory(staging, async_decode->copies);
+ image.flags &= ~ImageFlagBits::IsDecoding;
+ has_uploads = true;
+ i = async_decodes.erase(i);
+ }
+ if (has_uploads) {
+ runtime.InsertUploadMemoryBarrier();
+ }
+}
+
template
bool TextureCache::ScaleUp(Image& image) {
const bool has_copy = image.HasScaled();
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index 485eaabaa..013836933 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -3,6 +3,7 @@
#pragma once
+#include
#include
#include
#include
@@ -18,6 +19,7 @@
#include "common/lru_cache.h"
#include "common/polyfill_ranges.h"
#include "common/scratch_buffer.h"
+#include "common/thread_worker.h"
#include "video_core/compatible_formats.h"
#include "video_core/control/channel_state_cache.h"
#include "video_core/delayed_destruction_ring.h"
@@ -54,6 +56,14 @@ struct ImageViewInOut {
ImageViewId id{};
};
+struct AsyncDecodeContext {
+ ImageId image_id;
+ Common::ScratchBuffer decoded_data;
+ std::vector copies;
+ std::mutex mutex;
+ std::atomic_bool complete;
+};
+
using TextureCacheGPUMap = std::unordered_map, Common::IdentityHash>;
class TextureCacheChannelInfo : public ChannelInfo {
@@ -377,6 +387,9 @@ private:
bool ScaleDown(Image& image);
u64 GetScaledImageSizeBytes(const ImageBase& image);
+ void QueueAsyncDecode(Image& image, ImageId image_id);
+ void TickAsyncDecode();
+
Runtime& runtime;
VideoCore::RasterizerInterface& rasterizer;
@@ -430,6 +443,9 @@ private:
u64 modification_tick = 0;
u64 frame_tick = 0;
+
+ Common::ThreadWorker texture_decode_worker{1, "TextureDecoder"};
+ std::vector> async_decodes;
};
} // namespace VideoCommon
--
cgit v1.2.3
From b5bcd8c71b2d5fd0528191990b4e11bc916b5d7a Mon Sep 17 00:00:00 2001
From: ameerj <52414509+ameerj@users.noreply.github.com>
Date: Wed, 22 Feb 2023 00:48:12 -0500
Subject: configuration: Add async ASTC decode setting
---
src/common/settings.cpp | 2 ++
src/common/settings.h | 1 +
src/video_core/renderer_opengl/gl_texture_cache.cpp | 17 ++++++++++++++---
src/video_core/renderer_vulkan/vk_texture_cache.cpp | 7 ++++---
src/video_core/texture_cache/texture_cache.h | 1 +
src/video_core/textures/astc.cpp | 4 ++--
src/yuzu/configuration/config.cpp | 2 ++
src/yuzu/configuration/configure_graphics_advanced.cpp | 7 +++++++
src/yuzu/configuration/configure_graphics_advanced.h | 1 +
src/yuzu/configuration/configure_graphics_advanced.ui | 10 ++++++++++
src/yuzu_cmd/config.cpp | 1 +
src/yuzu_cmd/default_ini.h | 4 ++++
12 files changed, 49 insertions(+), 8 deletions(-)
(limited to 'src/video_core/texture_cache')
diff --git a/src/common/settings.cpp b/src/common/settings.cpp
index 49b41c158..70b02146b 100644
--- a/src/common/settings.cpp
+++ b/src/common/settings.cpp
@@ -59,6 +59,7 @@ void LogSettings() {
values.use_asynchronous_gpu_emulation.GetValue());
log_setting("Renderer_NvdecEmulation", values.nvdec_emulation.GetValue());
log_setting("Renderer_AccelerateASTC", values.accelerate_astc.GetValue());
+ log_setting("Renderer_AsyncASTC", values.async_astc.GetValue());
log_setting("Renderer_UseVsync", values.use_vsync.GetValue());
log_setting("Renderer_ShaderBackend", values.shader_backend.GetValue());
log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue());
@@ -212,6 +213,7 @@ void RestoreGlobalState(bool is_powered_on) {
values.use_asynchronous_gpu_emulation.SetGlobal(true);
values.nvdec_emulation.SetGlobal(true);
values.accelerate_astc.SetGlobal(true);
+ values.async_astc.SetGlobal(true);
values.use_vsync.SetGlobal(true);
values.shader_backend.SetGlobal(true);
values.use_asynchronous_shaders.SetGlobal(true);
diff --git a/src/common/settings.h b/src/common/settings.h
index 6d27dd5ee..512ecff69 100644
--- a/src/common/settings.h
+++ b/src/common/settings.h
@@ -453,6 +453,7 @@ struct Values {
SwitchableSetting use_asynchronous_gpu_emulation{true, "use_asynchronous_gpu_emulation"};
SwitchableSetting nvdec_emulation{NvdecEmulation::GPU, "nvdec_emulation"};
SwitchableSetting accelerate_astc{true, "accelerate_astc"};
+ SwitchableSetting async_astc{false, "async_astc"};
SwitchableSetting use_vsync{true, "use_vsync"};
SwitchableSetting shader_backend{ShaderBackend::GLSL, ShaderBackend::GLSL,
ShaderBackend::SPIRV, "shader_backend"};
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index eb6e43a08..b047e7b3d 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -228,8 +228,9 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array TextureCacheRuntime::StagingBuffers::FindBuffer(size_t req
Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_,
VAddr cpu_addr_)
: VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), runtime{&runtime_} {
- if (CanBeAccelerated(*runtime, info)) {
+ if (CanBeDecodedAsync(*runtime, info)) {
+ flags |= ImageFlagBits::AsynchronousDecode;
+ } else if (CanBeAccelerated(*runtime, info)) {
flags |= ImageFlagBits::AcceleratedUpload;
}
if (IsConverted(runtime->device, info.format, info.type)) {
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 9b85dfb5e..80adb70eb 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -1256,11 +1256,12 @@ Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu
commit(runtime_.memory_allocator.Commit(original_image, MemoryUsage::DeviceLocal)),
aspect_mask(ImageAspectMask(info.format)) {
if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) {
- if (Settings::values.accelerate_astc.GetValue()) {
+ if (Settings::values.async_astc.GetValue()) {
+ flags |= VideoCommon::ImageFlagBits::AsynchronousDecode;
+ } else if (Settings::values.accelerate_astc.GetValue()) {
flags |= VideoCommon::ImageFlagBits::AcceleratedUpload;
- } else {
- flags |= VideoCommon::ImageFlagBits::Converted;
}
+ flags |= VideoCommon::ImageFlagBits::Converted;
flags |= VideoCommon::ImageFlagBits::CostlyLoad;
}
if (runtime->device.HasDebuggingToolAttached()) {
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 4159bc796..9dd152fbe 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -1003,6 +1003,7 @@ u64 TextureCache::GetScaledImageSizeBytes(const ImageBase& image) {
template
void TextureCache::QueueAsyncDecode(Image& image, ImageId image_id) {
UNIMPLEMENTED_IF(False(image.flags & ImageFlagBits::Converted));
+ LOG_INFO(HW_GPU, "Queuing async texture decode");
image.flags |= ImageFlagBits::IsDecoding;
auto decode = std::make_unique();
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index e8d7c7863..4381eed1d 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -1656,8 +1656,8 @@ void Decompress(std::span data, uint32_t width, uint32_t height,
const u32 rows = Common::DivideUp(height, block_height);
const u32 cols = Common::DivideUp(width, block_width);
- Common::ThreadWorker workers{std::max(std::thread::hardware_concurrency(), 2U) / 2,
- "ASTCDecompress"};
+ static Common::ThreadWorker workers{std::max(std::thread::hardware_concurrency(), 2U) / 2,
+ "ASTCDecompress"};
for (u32 z = 0; z < depth; ++z) {
const u32 depth_offset = z * height * width * 4;
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index db68ed259..dd1c1e94a 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -707,6 +707,7 @@ void Config::ReadRendererValues() {
ReadGlobalSetting(Settings::values.use_asynchronous_gpu_emulation);
ReadGlobalSetting(Settings::values.nvdec_emulation);
ReadGlobalSetting(Settings::values.accelerate_astc);
+ ReadGlobalSetting(Settings::values.async_astc);
ReadGlobalSetting(Settings::values.use_vsync);
ReadGlobalSetting(Settings::values.shader_backend);
ReadGlobalSetting(Settings::values.use_asynchronous_shaders);
@@ -1350,6 +1351,7 @@ void Config::SaveRendererValues() {
static_cast(Settings::values.nvdec_emulation.GetDefault()),
Settings::values.nvdec_emulation.UsingGlobal());
WriteGlobalSetting(Settings::values.accelerate_astc);
+ WriteGlobalSetting(Settings::values.async_astc);
WriteGlobalSetting(Settings::values.use_vsync);
WriteSetting(QString::fromStdString(Settings::values.shader_backend.GetLabel()),
static_cast(Settings::values.shader_backend.GetValue(global)),
diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp
index cc0155a2c..bbc363322 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.cpp
+++ b/src/yuzu/configuration/configure_graphics_advanced.cpp
@@ -23,11 +23,13 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
const bool runtime_lock = !system.IsPoweredOn();
ui->use_vsync->setEnabled(runtime_lock);
ui->renderer_force_max_clock->setEnabled(runtime_lock);
+ ui->async_astc->setEnabled(runtime_lock);
ui->use_asynchronous_shaders->setEnabled(runtime_lock);
ui->anisotropic_filtering_combobox->setEnabled(runtime_lock);
ui->renderer_force_max_clock->setChecked(Settings::values.renderer_force_max_clock.GetValue());
ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue());
+ ui->async_astc->setChecked(Settings::values.async_astc.GetValue());
ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue());
ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue());
ui->use_pessimistic_flushes->setChecked(Settings::values.use_pessimistic_flushes.GetValue());
@@ -60,6 +62,8 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() {
ConfigurationShared::ApplyPerGameSetting(&Settings::values.max_anisotropy,
ui->anisotropic_filtering_combobox);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_vsync, ui->use_vsync, use_vsync);
+ ConfigurationShared::ApplyPerGameSetting(&Settings::values.async_astc, ui->async_astc,
+ async_astc);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_shaders,
ui->use_asynchronous_shaders,
use_asynchronous_shaders);
@@ -91,6 +95,7 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
ui->renderer_force_max_clock->setEnabled(
Settings::values.renderer_force_max_clock.UsingGlobal());
ui->use_vsync->setEnabled(Settings::values.use_vsync.UsingGlobal());
+ ui->async_astc->setEnabled(Settings::values.async_astc.UsingGlobal());
ui->use_asynchronous_shaders->setEnabled(
Settings::values.use_asynchronous_shaders.UsingGlobal());
ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal());
@@ -108,6 +113,8 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
Settings::values.renderer_force_max_clock,
renderer_force_max_clock);
ConfigurationShared::SetColoredTristate(ui->use_vsync, Settings::values.use_vsync, use_vsync);
+ ConfigurationShared::SetColoredTristate(ui->async_astc, Settings::values.async_astc,
+ async_astc);
ConfigurationShared::SetColoredTristate(ui->use_asynchronous_shaders,
Settings::values.use_asynchronous_shaders,
use_asynchronous_shaders);
diff --git a/src/yuzu/configuration/configure_graphics_advanced.h b/src/yuzu/configuration/configure_graphics_advanced.h
index df557d585..bf1b04749 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.h
+++ b/src/yuzu/configuration/configure_graphics_advanced.h
@@ -38,6 +38,7 @@ private:
ConfigurationShared::CheckState renderer_force_max_clock;
ConfigurationShared::CheckState use_vsync;
+ ConfigurationShared::CheckState async_astc;
ConfigurationShared::CheckState use_asynchronous_shaders;
ConfigurationShared::CheckState use_fast_gpu_time;
ConfigurationShared::CheckState use_pessimistic_flushes;
diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui
index 061885e30..a7dbdc18c 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.ui
+++ b/src/yuzu/configuration/configure_graphics_advanced.ui
@@ -89,6 +89,16 @@
+ -
+
+
+ Enables asynchronous ASTC texture decoding, which may reduce load time stutter. This feature is experimental.
+
+
+ Decode ASTC textures asynchronously (Hack)
+
+
+
-
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index 3b6dce296..464da3231 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -324,6 +324,7 @@ void Config::ReadValues() {
ReadSetting("Renderer", Settings::values.use_asynchronous_shaders);
ReadSetting("Renderer", Settings::values.nvdec_emulation);
ReadSetting("Renderer", Settings::values.accelerate_astc);
+ ReadSetting("Renderer", Settings::values.async_astc);
ReadSetting("Renderer", Settings::values.use_fast_gpu_time);
ReadSetting("Renderer", Settings::values.use_pessimistic_flushes);
ReadSetting("Renderer", Settings::values.use_vulkan_driver_pipeline_cache);
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index cf3cc4c4e..20e403400 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -342,6 +342,10 @@ nvdec_emulation =
# 0: Off, 1 (default): On
accelerate_astc =
+# Decode ASTC textures asynchronously.
+# 0 (default): Off, 1: On
+async_astc =
+
# Turns on the speed limiter, which will limit the emulation speed to the desired speed limit value
# 0: Off, 1: On (default)
use_speed_limit =
--
cgit v1.2.3