diff options
Diffstat (limited to 'src/video_core/shader')
| -rw-r--r-- | src/video_core/shader/async_shaders.cpp | 94 | ||||
| -rw-r--r-- | src/video_core/shader/async_shaders.h | 39 | ||||
| -rw-r--r-- | src/video_core/shader/control_flow.cpp | 30 | ||||
| -rw-r--r-- | src/video_core/shader/decode/memory.cpp | 6 |
4 files changed, 120 insertions, 49 deletions
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp index b7f66d7ee..f815584f7 100644 --- a/src/video_core/shader/async_shaders.cpp +++ b/src/video_core/shader/async_shaders.cpp @@ -2,7 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <chrono> #include <condition_variable> #include <mutex> #include <thread> @@ -20,9 +19,18 @@ AsyncShaders::~AsyncShaders() { KillWorkers(); } -void AsyncShaders::AllocateWorkers(std::size_t num_workers) { - // If we're already have workers queued or don't want to queue workers, ignore - if (num_workers == worker_threads.size() || num_workers == 0) { +void AsyncShaders::AllocateWorkers() { + // Max worker threads we should allow + constexpr u32 MAX_THREADS = 4; + // Deduce how many threads we can use + const u32 threads_used = std::thread::hardware_concurrency() / 4; + // Always allow at least 1 thread regardless of our settings + const auto max_worker_count = std::max(1U, threads_used); + // Don't use more than MAX_THREADS + const auto num_workers = std::min(max_worker_count, MAX_THREADS); + + // If we already have workers queued, ignore + if (num_workers == worker_threads.size()) { return; } @@ -34,8 +42,8 @@ void AsyncShaders::AllocateWorkers(std::size_t num_workers) { // Create workers for (std::size_t i = 0; i < num_workers; i++) { context_list.push_back(emu_window.CreateSharedContext()); - worker_threads.push_back(std::move( - std::thread(&AsyncShaders::ShaderCompilerThread, this, context_list[i].get()))); + worker_threads.push_back( + std::thread(&AsyncShaders::ShaderCompilerThread, this, context_list[i].get())); } } @@ -111,24 +119,50 @@ void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device, VideoCommon::Shader::CompilerSettings compiler_settings, const VideoCommon::Shader::Registry& registry, VAddr cpu_addr) { - WorkerParams params{device.UseAssemblyShaders() ? AsyncShaders::Backend::GLASM - : AsyncShaders::Backend::OpenGL, - device, - shader_type, - uid, - std::move(code), - std::move(code_b), - main_offset, - compiler_settings, - registry, - cpu_addr}; + WorkerParams params{ + .backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL, + .device = &device, + .shader_type = shader_type, + .uid = uid, + .code = std::move(code), + .code_b = std::move(code_b), + .main_offset = main_offset, + .compiler_settings = compiler_settings, + .registry = registry, + .cpu_address = cpu_addr, + }; std::unique_lock lock(queue_mutex); - pending_queue.push_back(std::move(params)); + pending_queue.push(std::move(params)); + cv.notify_one(); +} + +void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, + const Vulkan::VKDevice& device, Vulkan::VKScheduler& scheduler, + Vulkan::VKDescriptorPool& descriptor_pool, + Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, + Vulkan::VKRenderPassCache& renderpass_cache, + std::vector<VkDescriptorSetLayoutBinding> bindings, + Vulkan::SPIRVProgram program, + Vulkan::GraphicsPipelineCacheKey key) { + WorkerParams params{ + .backend = Backend::Vulkan, + .pp_cache = pp_cache, + .vk_device = &device, + .scheduler = &scheduler, + .descriptor_pool = &descriptor_pool, + .update_descriptor_queue = &update_descriptor_queue, + .renderpass_cache = &renderpass_cache, + .bindings = bindings, + .program = program, + .key = key, + }; + + std::unique_lock lock(queue_mutex); + pending_queue.push(std::move(params)); cv.notify_one(); } void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) { - using namespace std::chrono_literals; while (!is_thread_exiting.load(std::memory_order_relaxed)) { std::unique_lock lock{queue_mutex}; cv.wait(lock, [this] { return HasWorkQueued() || is_thread_exiting; }); @@ -144,18 +178,17 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context if (pending_queue.empty()) { continue; } + // Pull work from queue WorkerParams work = std::move(pending_queue.front()); - pending_queue.pop_front(); - + pending_queue.pop(); lock.unlock(); - if (work.backend == AsyncShaders::Backend::OpenGL || - work.backend == AsyncShaders::Backend::GLASM) { - const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, work.registry); + if (work.backend == Backend::OpenGL || work.backend == Backend::GLASM) { + const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, *work.registry); const auto scope = context->Acquire(); auto program = - OpenGL::BuildShader(work.device, work.shader_type, work.uid, ir, work.registry); + OpenGL::BuildShader(*work.device, work.shader_type, work.uid, ir, *work.registry); Result result{}; result.backend = work.backend; result.cpu_address = work.cpu_address; @@ -164,9 +197,9 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context result.code_b = std::move(work.code_b); result.shader_type = work.shader_type; - if (work.backend == AsyncShaders::Backend::OpenGL) { + if (work.backend == Backend::OpenGL) { result.program.opengl = std::move(program->source_program); - } else if (work.backend == AsyncShaders::Backend::GLASM) { + } else if (work.backend == Backend::GLASM) { result.program.glasm = std::move(program->assembly_program); } @@ -174,6 +207,13 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context std::unique_lock complete_lock(completed_mutex); finished_work.push_back(std::move(result)); } + } else if (work.backend == Backend::Vulkan) { + auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>( + *work.vk_device, *work.scheduler, *work.descriptor_pool, + *work.update_descriptor_queue, *work.renderpass_cache, work.key, work.bindings, + work.program); + + work.pp_cache->EmplacePipeline(std::move(pipeline)); } } } diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h index 2f5ee94ad..d5ae814d5 100644 --- a/src/video_core/shader/async_shaders.h +++ b/src/video_core/shader/async_shaders.h @@ -14,6 +14,10 @@ #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" +#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_pipeline_cache.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_update_descriptor.h" namespace Core::Frontend { class EmuWindow; @@ -24,6 +28,10 @@ namespace Tegra { class GPU; } +namespace Vulkan { +class VKPipelineCache; +} + namespace VideoCommon::Shader { class AsyncShaders { @@ -31,6 +39,7 @@ public: enum class Backend { OpenGL, GLASM, + Vulkan, }; struct ResultPrograms { @@ -52,7 +61,7 @@ public: ~AsyncShaders(); /// Start up shader worker threads - void AllocateWorkers(std::size_t num_workers); + void AllocateWorkers(); /// Clear the shader queue and kill all worker threads void FreeWorkers(); @@ -76,6 +85,14 @@ public: VideoCommon::Shader::CompilerSettings compiler_settings, const VideoCommon::Shader::Registry& registry, VAddr cpu_addr); + void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::VKDevice& device, + Vulkan::VKScheduler& scheduler, + Vulkan::VKDescriptorPool& descriptor_pool, + Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, + Vulkan::VKRenderPassCache& renderpass_cache, + std::vector<VkDescriptorSetLayoutBinding> bindings, + Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key); + private: void ShaderCompilerThread(Core::Frontend::GraphicsContext* context); @@ -83,16 +100,28 @@ private: bool HasWorkQueued(); struct WorkerParams { - AsyncShaders::Backend backend; - OpenGL::Device device; + Backend backend; + // For OGL + const OpenGL::Device* device; Tegra::Engines::ShaderType shader_type; u64 uid; std::vector<u64> code; std::vector<u64> code_b; u32 main_offset; VideoCommon::Shader::CompilerSettings compiler_settings; - VideoCommon::Shader::Registry registry; + std::optional<VideoCommon::Shader::Registry> registry; VAddr cpu_address; + + // For Vulkan + Vulkan::VKPipelineCache* pp_cache; + const Vulkan::VKDevice* vk_device; + Vulkan::VKScheduler* scheduler; + Vulkan::VKDescriptorPool* descriptor_pool; + Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue; + Vulkan::VKRenderPassCache* renderpass_cache; + std::vector<VkDescriptorSetLayoutBinding> bindings; + Vulkan::SPIRVProgram program; + Vulkan::GraphicsPipelineCacheKey key; }; std::condition_variable cv; @@ -101,7 +130,7 @@ private: std::atomic<bool> is_thread_exiting{}; std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list; std::vector<std::thread> worker_threads; - std::deque<WorkerParams> pending_queue; + std::queue<WorkerParams> pending_queue; std::vector<AsyncShaders::Result> finished_work; Core::Frontend::EmuWindow& emu_window; }; diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 8d86020f6..336397cdb 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -187,24 +187,26 @@ std::optional<std::pair<BufferInfo, u64>> TrackLDC(const CFGRebuildState& state, std::optional<u64> TrackSHLRegister(const CFGRebuildState& state, u32& pos, u64 ldc_tracked_register) { - return TrackInstruction<u64>(state, pos, - [ldc_tracked_register](auto instr, const auto& opcode) { - return opcode.GetId() == OpCode::Id::SHL_IMM && - instr.gpr0.Value() == ldc_tracked_register; - }, - [](auto instr, const auto&) { return instr.gpr8.Value(); }); + return TrackInstruction<u64>( + state, pos, + [ldc_tracked_register](auto instr, const auto& opcode) { + return opcode.GetId() == OpCode::Id::SHL_IMM && + instr.gpr0.Value() == ldc_tracked_register; + }, + [](auto instr, const auto&) { return instr.gpr8.Value(); }); } std::optional<u32> TrackIMNMXValue(const CFGRebuildState& state, u32& pos, u64 shl_tracked_register) { - return TrackInstruction<u32>(state, pos, - [shl_tracked_register](auto instr, const auto& opcode) { - return opcode.GetId() == OpCode::Id::IMNMX_IMM && - instr.gpr0.Value() == shl_tracked_register; - }, - [](auto instr, const auto&) { - return static_cast<u32>(instr.alu.GetSignedImm20_20() + 1); - }); + return TrackInstruction<u32>( + state, pos, + [shl_tracked_register](auto instr, const auto& opcode) { + return opcode.GetId() == OpCode::Id::IMNMX_IMM && + instr.gpr0.Value() == shl_tracked_register; + }, + [](auto instr, const auto&) { + return static_cast<u32>(instr.alu.GetSignedImm20_20() + 1); + }); } std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& state, u32 pos) { diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 63adbc4a3..e4739394d 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -471,9 +471,9 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& const auto [base_address, index, offset] = TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size())); - ASSERT_OR_EXECUTE_MSG(base_address != nullptr, - { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); }, - "Global memory tracking failed"); + ASSERT_OR_EXECUTE_MSG( + base_address != nullptr, { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); }, + "Global memory tracking failed"); bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset))); |
