From bd8b9bbcee93549f323352f227ff44d0e79e0ad4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 26 Feb 2020 16:13:47 -0300 Subject: gl_shader_cache: Rework shader cache and remove post-specializations Instead of pre-specializing shaders and then post-specializing them, drop the later and only "specialize" the shader while decoding it. --- src/video_core/shader/const_buffer_locker.cpp | 7 ++++--- src/video_core/shader/const_buffer_locker.h | 11 +++++------ src/video_core/shader/decode.cpp | 18 +++++------------- src/video_core/shader/track.cpp | 9 +++------ 4 files changed, 17 insertions(+), 28 deletions(-) (limited to 'src/video_core/shader') diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp index 0638be8cb..c859dd7ca 100644 --- a/src/video_core/shader/const_buffer_locker.cpp +++ b/src/video_core/shader/const_buffer_locker.cpp @@ -14,8 +14,9 @@ namespace VideoCommon::Shader { using Tegra::Engines::SamplerDescriptor; -ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage) - : stage{shader_stage} {} +ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, + VideoCore::GuestDriverProfile stored_guest_driver_profile) + : stage{shader_stage}, stored_guest_driver_profile{stored_guest_driver_profile} {} ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, Tegra::Engines::ConstBufferEngineInterface& engine) @@ -97,7 +98,7 @@ void ConstBufferLocker::SetBoundBuffer(u32 buffer) { bool ConstBufferLocker::IsConsistent() const { if (!engine) { - return false; + return true; } return std::all_of(keys.begin(), keys.end(), [this](const auto& pair) { diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h index d3ea11087..7c6f7bbdd 100644 --- a/src/video_core/shader/const_buffer_locker.h +++ b/src/video_core/shader/const_buffer_locker.h @@ -26,7 +26,8 @@ using BindlessSamplerMap = */ class ConstBufferLocker { public: - explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage); + explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, + VideoCore::GuestDriverProfile stored_guest_driver_profile); explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, Tegra::Engines::ConstBufferEngineInterface& engine); @@ -83,15 +84,13 @@ public: } /// Obtains access to the guest driver's profile. - VideoCore::GuestDriverProfile* AccessGuestDriverProfile() const { - if (engine) { - return &engine->AccessGuestDriverProfile(); - } - return nullptr; + VideoCore::GuestDriverProfile& AccessGuestDriverProfile() { + return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile; } private: const Tegra::Engines::ShaderType stage; + VideoCore::GuestDriverProfile stored_guest_driver_profile; Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; KeyMap keys; BoundSamplerMap bound_samplers; diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 6b697ed5d..af4490d66 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -34,13 +34,9 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { return (absolute_offset % SchedPeriod) == 0; } -void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver, +void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver, const std::list& used_samplers) { - if (gpu_driver == nullptr) { - LOG_CRITICAL(HW_GPU, "GPU driver profile has not been created yet"); - return; - } - if (gpu_driver->TextureHandlerSizeKnown() || used_samplers.size() <= 1) { + if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) { return; } u32 count{}; @@ -53,17 +49,13 @@ void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver, bound_offsets.emplace_back(sampler.GetOffset()); } if (count > 1) { - gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets)); + gpu_driver.DeduceTextureHandlerSize(std::move(bound_offsets)); } } std::optional TryDeduceSamplerSize(const Sampler& sampler_to_deduce, - VideoCore::GuestDriverProfile* gpu_driver, + VideoCore::GuestDriverProfile& gpu_driver, const std::list& used_samplers) { - if (gpu_driver == nullptr) { - LOG_CRITICAL(HW_GPU, "GPU Driver profile has not been created yet"); - return std::nullopt; - } const u32 base_offset = sampler_to_deduce.GetOffset(); u32 max_offset{std::numeric_limits::max()}; for (const auto& sampler : used_samplers) { @@ -77,7 +69,7 @@ std::optional TryDeduceSamplerSize(const Sampler& sampler_to_deduce, if (max_offset == std::numeric_limits::max()) { return std::nullopt; } - return ((max_offset - base_offset) * 4) / gpu_driver->GetTextureHandlerSize(); + return ((max_offset - base_offset) * 4) / gpu_driver.GetTextureHandlerSize(); } } // Anonymous namespace diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index 15e22b9fa..b1a0aa00c 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -94,13 +94,10 @@ std::tuple ShaderIR::TrackBindlessSampler(Node tracked, cons } auto [gpr, base_offset] = *pair; const auto offset_inm = std::get_if(&*base_offset); - auto gpu_driver = locker.AccessGuestDriverProfile(); - if (gpu_driver == nullptr) { - return {}; - } + const auto& gpu_driver = locker.AccessGuestDriverProfile(); const u32 bindless_cv = NewCustomVariable(); - const Node op = Operation(OperationCode::UDiv, NO_PRECISE, gpr, - Immediate(gpu_driver->GetTextureHandlerSize())); + const Node op = + Operation(OperationCode::UDiv, gpr, Immediate(gpu_driver.GetTextureHandlerSize())); const Node cv_node = GetCustomVariable(bindless_cv); Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op)); -- cgit v1.2.3 From e8efd5a90100a86899e31a4de0137e915e0e0366 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 28 Feb 2020 20:53:10 -0300 Subject: video_core: Rename "const buffer locker" to "registry" --- CMakeModules/GenerateSCMRev.cmake | 4 +- src/common/CMakeLists.txt | 4 +- src/video_core/CMakeLists.txt | 4 +- src/video_core/renderer_opengl/gl_shader_cache.cpp | 65 +++++------ src/video_core/renderer_opengl/gl_shader_cache.h | 8 +- .../renderer_opengl/gl_shader_disk_cache.h | 2 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 4 +- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 4 +- src/video_core/shader/const_buffer_locker.cpp | 127 --------------------- src/video_core/shader/const_buffer_locker.h | 102 ----------------- src/video_core/shader/control_flow.cpp | 13 ++- src/video_core/shader/control_flow.h | 3 +- src/video_core/shader/decode.cpp | 4 +- src/video_core/shader/decode/texture.cpp | 5 +- src/video_core/shader/registry.cpp | 127 +++++++++++++++++++++ src/video_core/shader/registry.h | 102 +++++++++++++++++ src/video_core/shader/shader_ir.cpp | 5 +- src/video_core/shader/shader_ir.h | 6 +- src/video_core/shader/track.cpp | 4 +- 19 files changed, 299 insertions(+), 294 deletions(-) delete mode 100644 src/video_core/shader/const_buffer_locker.cpp delete mode 100644 src/video_core/shader/const_buffer_locker.h create mode 100644 src/video_core/shader/registry.cpp create mode 100644 src/video_core/shader/registry.h (limited to 'src/video_core/shader') diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake index 6c2f201eb..8c13a94fb 100644 --- a/CMakeModules/GenerateSCMRev.cmake +++ b/CMakeModules/GenerateSCMRev.cmake @@ -89,8 +89,6 @@ set(HASH_FILES "${VIDEO_CORE}/shader/ast.h" "${VIDEO_CORE}/shader/compiler_settings.cpp" "${VIDEO_CORE}/shader/compiler_settings.h" - "${VIDEO_CORE}/shader/const_buffer_locker.cpp" - "${VIDEO_CORE}/shader/const_buffer_locker.h" "${VIDEO_CORE}/shader/control_flow.cpp" "${VIDEO_CORE}/shader/control_flow.h" "${VIDEO_CORE}/shader/decode.cpp" @@ -99,6 +97,8 @@ set(HASH_FILES "${VIDEO_CORE}/shader/node.h" "${VIDEO_CORE}/shader/node_helper.cpp" "${VIDEO_CORE}/shader/node_helper.h" + "${VIDEO_CORE}/shader/registry.cpp" + "${VIDEO_CORE}/shader/registry.h" "${VIDEO_CORE}/shader/shader_ir.cpp" "${VIDEO_CORE}/shader/shader_ir.h" "${VIDEO_CORE}/shader/track.cpp" diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 274e4ec79..1f621fb1f 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -70,8 +70,6 @@ add_custom_command(OUTPUT scm_rev.cpp "${VIDEO_CORE}/shader/ast.h" "${VIDEO_CORE}/shader/compiler_settings.cpp" "${VIDEO_CORE}/shader/compiler_settings.h" - "${VIDEO_CORE}/shader/const_buffer_locker.cpp" - "${VIDEO_CORE}/shader/const_buffer_locker.h" "${VIDEO_CORE}/shader/control_flow.cpp" "${VIDEO_CORE}/shader/control_flow.h" "${VIDEO_CORE}/shader/decode.cpp" @@ -80,6 +78,8 @@ add_custom_command(OUTPUT scm_rev.cpp "${VIDEO_CORE}/shader/node.h" "${VIDEO_CORE}/shader/node_helper.cpp" "${VIDEO_CORE}/shader/node_helper.h" + "${VIDEO_CORE}/shader/registry.cpp" + "${VIDEO_CORE}/shader/registry.h" "${VIDEO_CORE}/shader/shader_ir.cpp" "${VIDEO_CORE}/shader/shader_ir.h" "${VIDEO_CORE}/shader/track.cpp" diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 3d93c07fb..0101e5f0e 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -116,8 +116,6 @@ add_library(video_core STATIC shader/ast.h shader/compiler_settings.cpp shader/compiler_settings.h - shader/const_buffer_locker.cpp - shader/const_buffer_locker.h shader/control_flow.cpp shader/control_flow.h shader/decode.cpp @@ -126,6 +124,8 @@ add_library(video_core STATIC shader/node_helper.cpp shader/node_helper.h shader/node.h + shader/registry.cpp + shader/registry.h shader/shader_ir.cpp shader/shader_ir.h shader/track.cpp diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index e3a1d5a5f..87d25b5a5 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -28,13 +28,14 @@ #include "video_core/renderer_opengl/gl_shader_disk_cache.h" #include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/renderer_opengl/utils.h" +#include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" namespace OpenGL { using Tegra::Engines::ShaderType; -using VideoCommon::Shader::ConstBufferLocker; using VideoCommon::Shader::ProgramCode; +using VideoCommon::Shader::Registry; using VideoCommon::Shader::ShaderIR; namespace { @@ -163,22 +164,22 @@ std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) { return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier); } -std::shared_ptr MakeLocker(const ShaderDiskCacheEntry& entry) { +std::shared_ptr MakeRegistry(const ShaderDiskCacheEntry& entry) { const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size}; - auto locker = std::make_shared(entry.type, guest_profile); - locker->SetBoundBuffer(entry.bound_buffer); + auto registry = std::make_shared(entry.type, guest_profile); + registry->SetBoundBuffer(entry.bound_buffer); for (const auto& [address, value] : entry.keys) { const auto [buffer, offset] = address; - locker->InsertKey(buffer, offset, value); + registry->InsertKey(buffer, offset, value); } for (const auto& [offset, sampler] : entry.bound_samplers) { - locker->InsertBoundSampler(offset, sampler); + registry->InsertBoundSampler(offset, sampler); } for (const auto& [key, sampler] : entry.bindless_samplers) { const auto [buffer, offset] = key; - locker->InsertBindlessSampler(buffer, offset, sampler); + registry->InsertBindlessSampler(buffer, offset, sampler); } - return locker; + return registry; } std::shared_ptr BuildShader(const Device& device, ShaderType shader_type, @@ -211,15 +212,15 @@ std::unordered_set GetSupportedFormats() { } // Anonymous namespace CachedShader::CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes, - std::shared_ptr locker, + std::shared_ptr registry, ShaderEntries entries, std::shared_ptr program) - : RasterizerCacheObject{host_ptr}, locker{std::move(locker)}, entries{std::move(entries)}, + : RasterizerCacheObject{host_ptr}, registry{std::move(registry)}, entries{std::move(entries)}, cpu_addr{cpu_addr}, size_in_bytes{size_in_bytes}, program{std::move(program)} {} CachedShader::~CachedShader() = default; GLuint CachedShader::GetHandle() const { - if (!locker->IsConsistent()) { + if (!registry->IsConsistent()) { std::abort(); } return program->handle; @@ -231,8 +232,8 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, const auto shader_type = GetShaderType(program_type); const std::size_t size_in_bytes = code.size() * sizeof(u64); - auto locker = std::make_shared(shader_type, params.system.GPU().Maxwell3D()); - const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *locker); + auto registry = std::make_shared(shader_type, params.system.GPU().Maxwell3D()); + const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); // TODO(Rodrigo): Handle VertexA shaders // std::optional ir_b; // if (!code_b.empty()) { @@ -245,46 +246,46 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, entry.code = std::move(code); entry.code_b = std::move(code_b); entry.unique_identifier = params.unique_identifier; - entry.bound_buffer = locker->GetBoundBuffer(); - entry.keys = locker->GetKeys(); - entry.bound_samplers = locker->GetBoundSamplers(); - entry.bindless_samplers = locker->GetBindlessSamplers(); + entry.bound_buffer = registry->GetBoundBuffer(); + entry.keys = registry->GetKeys(); + entry.bound_samplers = registry->GetBoundSamplers(); + entry.bindless_samplers = registry->GetBindlessSamplers(); params.disk_cache.SaveEntry(std::move(entry)); return std::shared_ptr(new CachedShader(params.host_ptr, params.cpu_addr, - size_in_bytes, std::move(locker), + size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program))); } Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { const std::size_t size_in_bytes = code.size() * sizeof(u64); - auto locker = std::make_shared(Tegra::Engines::ShaderType::Compute, - params.system.GPU().KeplerCompute()); - const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *locker); + auto registry = + std::make_shared(ShaderType::Compute, params.system.GPU().KeplerCompute()); + const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry); auto program = BuildShader(params.device, ShaderType::Compute, params.unique_identifier, ir); ShaderDiskCacheEntry entry; entry.type = ShaderType::Compute; entry.code = std::move(code); entry.unique_identifier = params.unique_identifier; - entry.bound_buffer = locker->GetBoundBuffer(); - entry.keys = locker->GetKeys(); - entry.bound_samplers = locker->GetBoundSamplers(); - entry.bindless_samplers = locker->GetBindlessSamplers(); + entry.bound_buffer = registry->GetBoundBuffer(); + entry.keys = registry->GetKeys(); + entry.bound_samplers = registry->GetBoundSamplers(); + entry.bindless_samplers = registry->GetBindlessSamplers(); params.disk_cache.SaveEntry(std::move(entry)); return std::shared_ptr(new CachedShader(params.host_ptr, params.cpu_addr, - size_in_bytes, std::move(locker), + size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program))); } Shader CachedShader::CreateFromCache(const ShaderParameters& params, const PrecompiledShader& precompiled_shader, std::size_t size_in_bytes) { - return std::shared_ptr( - new CachedShader(params.host_ptr, params.cpu_addr, size_in_bytes, precompiled_shader.locker, - precompiled_shader.entries, precompiled_shader.program)); + return std::shared_ptr(new CachedShader( + params.host_ptr, params.cpu_addr, size_in_bytes, precompiled_shader.registry, + precompiled_shader.entries, precompiled_shader.program)); } ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, @@ -336,8 +337,8 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, const bool is_compute = entry.type == ShaderType::Compute; const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; - auto locker = MakeLocker(entry); - const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *locker); + auto registry = MakeRegistry(entry); + const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry); std::shared_ptr program; if (precompiled_entry) { @@ -354,7 +355,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, PrecompiledShader shader; shader.program = std::move(program); - shader.locker = std::move(locker); + shader.registry = std::move(registry); shader.entries = MakeEntries(ir); std::scoped_lock lock{mutex}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 03d7a2b3f..4935019fc 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -22,7 +22,7 @@ #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_disk_cache.h" -#include "video_core/shader/const_buffer_locker.h" +#include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" namespace Core { @@ -45,7 +45,7 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs; struct PrecompiledShader { std::shared_ptr program; - std::shared_ptr locker; + std::shared_ptr registry; ShaderEntries entries; }; @@ -91,10 +91,10 @@ public: private: explicit CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes, - std::shared_ptr locker, + std::shared_ptr registry, ShaderEntries entries, std::shared_ptr program); - std::shared_ptr locker; + std::shared_ptr registry; ShaderEntries entries; VAddr cpu_addr = 0; std::size_t size_in_bytes = 0; diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index 0ce0ea3f8..7f2ab36be 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -19,7 +19,7 @@ #include "common/common_types.h" #include "core/file_sys/vfs_vector.h" #include "video_core/engines/shader_type.h" -#include "video_core/shader/const_buffer_locker.h" +#include "video_core/shader/registry.h" namespace Core { class System; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 144e1e007..ebf85f311 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -161,8 +161,8 @@ CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stag GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, ProgramCode program_code, u32 main_offset) : RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr}, - program_code{std::move(program_code)}, locker{stage, GetEngine(system, stage)}, - shader_ir{this->program_code, main_offset, compiler_settings, locker}, + program_code{std::move(program_code)}, registry{stage, GetEngine(system, stage)}, + shader_ir{this->program_code, main_offset, compiler_settings, registry}, entries{GenerateShaderEntries(shader_ir)} {} CachedShader::~CachedShader() = default; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 92a670cc7..e292526bb 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -25,7 +25,7 @@ #include "video_core/renderer_vulkan/vk_renderpass_cache.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_shader_decompiler.h" -#include "video_core/shader/const_buffer_locker.h" +#include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" #include "video_core/surface.h" @@ -147,7 +147,7 @@ private: GPUVAddr gpu_addr{}; VAddr cpu_addr{}; ProgramCode program_code; - VideoCommon::Shader::ConstBufferLocker locker; + VideoCommon::Shader::Registry registry; VideoCommon::Shader::ShaderIR shader_ir; ShaderEntries entries; }; diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp deleted file mode 100644 index c859dd7ca..000000000 --- a/src/video_core/shader/const_buffer_locker.cpp +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include - -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" -#include "video_core/shader/const_buffer_locker.h" - -namespace VideoCommon::Shader { - -using Tegra::Engines::SamplerDescriptor; - -ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, - VideoCore::GuestDriverProfile stored_guest_driver_profile) - : stage{shader_stage}, stored_guest_driver_profile{stored_guest_driver_profile} {} - -ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, - Tegra::Engines::ConstBufferEngineInterface& engine) - : stage{shader_stage}, engine{&engine} {} - -ConstBufferLocker::~ConstBufferLocker() = default; - -std::optional ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) { - const std::pair key = {buffer, offset}; - const auto iter = keys.find(key); - if (iter != keys.end()) { - return iter->second; - } - if (!engine) { - return std::nullopt; - } - const u32 value = engine->AccessConstBuffer32(stage, buffer, offset); - keys.emplace(key, value); - return value; -} - -std::optional ConstBufferLocker::ObtainBoundSampler(u32 offset) { - const u32 key = offset; - const auto iter = bound_samplers.find(key); - if (iter != bound_samplers.end()) { - return iter->second; - } - if (!engine) { - return std::nullopt; - } - const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset); - bound_samplers.emplace(key, value); - return value; -} - -std::optional ConstBufferLocker::ObtainBindlessSampler( - u32 buffer, u32 offset) { - const std::pair key = {buffer, offset}; - const auto iter = bindless_samplers.find(key); - if (iter != bindless_samplers.end()) { - return iter->second; - } - if (!engine) { - return std::nullopt; - } - const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset); - bindless_samplers.emplace(key, value); - return value; -} - -std::optional ConstBufferLocker::ObtainBoundBuffer() { - if (bound_buffer_saved) { - return bound_buffer; - } - if (!engine) { - return std::nullopt; - } - bound_buffer_saved = true; - bound_buffer = engine->GetBoundBuffer(); - return bound_buffer; -} - -void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) { - keys.insert_or_assign({buffer, offset}, value); -} - -void ConstBufferLocker::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) { - bound_samplers.insert_or_assign(offset, sampler); -} - -void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) { - bindless_samplers.insert_or_assign({buffer, offset}, sampler); -} - -void ConstBufferLocker::SetBoundBuffer(u32 buffer) { - bound_buffer_saved = true; - bound_buffer = buffer; -} - -bool ConstBufferLocker::IsConsistent() const { - if (!engine) { - return true; - } - return std::all_of(keys.begin(), keys.end(), - [this](const auto& pair) { - const auto [cbuf, offset] = pair.first; - const auto value = pair.second; - return value == engine->AccessConstBuffer32(stage, cbuf, offset); - }) && - std::all_of(bound_samplers.begin(), bound_samplers.end(), - [this](const auto& sampler) { - const auto [key, value] = sampler; - return value == engine->AccessBoundSampler(stage, key); - }) && - std::all_of(bindless_samplers.begin(), bindless_samplers.end(), - [this](const auto& sampler) { - const auto [cbuf, offset] = sampler.first; - const auto value = sampler.second; - return value == engine->AccessBindlessSampler(stage, cbuf, offset); - }); -} - -bool ConstBufferLocker::HasEqualKeys(const ConstBufferLocker& rhs) const { - return std::tie(keys, bound_samplers, bindless_samplers) == - std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers); -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h deleted file mode 100644 index 7c6f7bbdd..000000000 --- a/src/video_core/shader/const_buffer_locker.h +++ /dev/null @@ -1,102 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include "common/common_types.h" -#include "common/hash.h" -#include "video_core/engines/const_buffer_engine_interface.h" -#include "video_core/engines/shader_type.h" -#include "video_core/guest_driver.h" - -namespace VideoCommon::Shader { - -using KeyMap = std::unordered_map, u32, Common::PairHash>; -using BoundSamplerMap = std::unordered_map; -using BindlessSamplerMap = - std::unordered_map, Tegra::Engines::SamplerDescriptor, Common::PairHash>; - -/** - * The ConstBufferLocker is a class use to interface the 3D and compute engines with the shader - * compiler. with it, the shader can obtain required data from GPU state and store it for disk - * shader compilation. - */ -class ConstBufferLocker { -public: - explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, - VideoCore::GuestDriverProfile stored_guest_driver_profile); - - explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, - Tegra::Engines::ConstBufferEngineInterface& engine); - - ~ConstBufferLocker(); - - /// Retrieves a key from the locker, if it's registered, it will give the registered value, if - /// not it will obtain it from maxwell3d and register it. - std::optional ObtainKey(u32 buffer, u32 offset); - - std::optional ObtainBoundSampler(u32 offset); - - std::optional ObtainBindlessSampler(u32 buffer, u32 offset); - - std::optional ObtainBoundBuffer(); - - /// Inserts a key. - void InsertKey(u32 buffer, u32 offset, u32 value); - - /// Inserts a bound sampler key. - void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler); - - /// Inserts a bindless sampler key. - void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); - - /// Set the bound buffer for this locker. - void SetBoundBuffer(u32 buffer); - - /// Checks keys and samplers against engine's current const buffers. Returns true if they are - /// the same value, false otherwise; - bool IsConsistent() const; - - /// Returns true if the keys are equal to the other ones in the locker. - bool HasEqualKeys(const ConstBufferLocker& rhs) const; - - /// Gives an getter to the const buffer keys in the database. - const KeyMap& GetKeys() const { - return keys; - } - - /// Gets samplers database. - const BoundSamplerMap& GetBoundSamplers() const { - return bound_samplers; - } - - /// Gets bindless samplers database. - const BindlessSamplerMap& GetBindlessSamplers() const { - return bindless_samplers; - } - - /// Gets bound buffer used on this shader - u32 GetBoundBuffer() const { - return bound_buffer; - } - - /// Obtains access to the guest driver's profile. - VideoCore::GuestDriverProfile& AccessGuestDriverProfile() { - return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile; - } - -private: - const Tegra::Engines::ShaderType stage; - VideoCore::GuestDriverProfile stored_guest_driver_profile; - Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; - KeyMap keys; - BoundSamplerMap bound_samplers; - BindlessSamplerMap bindless_samplers; - bool bound_buffer_saved{}; - u32 bound_buffer{}; -}; - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 0229733b6..2e2711350 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -13,6 +13,7 @@ #include "common/common_types.h" #include "video_core/shader/ast.h" #include "video_core/shader/control_flow.h" +#include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { @@ -64,11 +65,11 @@ struct BlockInfo { }; struct CFGRebuildState { - explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker) - : program_code{program_code}, locker{locker}, start{start} {} + explicit CFGRebuildState(const ProgramCode& program_code, u32 start, Registry& registry) + : program_code{program_code}, registry{registry}, start{start} {} const ProgramCode& program_code; - ConstBufferLocker& locker; + Registry& registry; u32 start{}; std::vector block_info; std::list inspect_queries; @@ -438,7 +439,7 @@ std::pair ParseCode(CFGRebuildState& state, u32 address) const s32 pc_target = offset + result.relative_position; std::vector branches; for (u32 i = 0; i < result.entries; i++) { - auto key = state.locker.ObtainKey(result.buffer, result.offset + i * 4); + auto key = state.registry.ObtainKey(result.buffer, result.offset + i * 4); if (!key) { return {ParseResult::AbnormalFlow, parse_info}; } @@ -656,14 +657,14 @@ void DecompileShader(CFGRebuildState& state) { std::unique_ptr ScanFlow(const ProgramCode& program_code, u32 start_address, const CompilerSettings& settings, - ConstBufferLocker& locker) { + Registry& registry) { auto result_out = std::make_unique(); if (settings.depth == CompileDepth::BruteForce) { result_out->settings.depth = CompileDepth::BruteForce; return result_out; } - CFGRebuildState state{program_code, start_address, locker}; + CFGRebuildState state{program_code, start_address, registry}; // Inspect Code and generate blocks state.labels.clear(); state.labels.emplace(start_address); diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h index 5304998b9..62a3510d8 100644 --- a/src/video_core/shader/control_flow.h +++ b/src/video_core/shader/control_flow.h @@ -12,6 +12,7 @@ #include "video_core/engines/shader_bytecode.h" #include "video_core/shader/ast.h" #include "video_core/shader/compiler_settings.h" +#include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { @@ -111,6 +112,6 @@ struct ShaderCharacteristics { std::unique_ptr ScanFlow(const ProgramCode& program_code, u32 start_address, const CompilerSettings& settings, - ConstBufferLocker& locker); + Registry& registry); } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index af4490d66..87ac9ac6c 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -141,7 +141,7 @@ void ShaderIR::Decode() { std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); decompiled = false; - auto info = ScanFlow(program_code, main_offset, settings, locker); + auto info = ScanFlow(program_code, main_offset, settings, registry); auto& shader_info = *info; coverage_begin = shader_info.start; coverage_end = shader_info.end; @@ -356,7 +356,7 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { void ShaderIR::PostDecode() { // Deduce texture handler size if needed - auto gpu_driver = locker.AccessGuestDriverProfile(); + auto gpu_driver = registry.AccessGuestDriverProfile(); DeduceTextureHandlerSize(gpu_driver, used_samplers); // Deduce Indexed Samplers if (!uses_indexed_samplers) { diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index bee7d8cad..48350e042 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -12,6 +12,7 @@ #include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/shader/node_helper.h" +#include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { @@ -359,8 +360,8 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(std::optional sample if (sampler_info) { return *sampler_info; } - const auto sampler = - buffer ? locker.ObtainBindlessSampler(*buffer, offset) : locker.ObtainBoundSampler(offset); + const auto sampler = buffer ? registry.ObtainBindlessSampler(*buffer, offset) + : registry.ObtainBoundSampler(offset); if (!sampler) { LOG_WARNING(HW_GPU, "Unknown sampler info"); return SamplerInfo{TextureType::Texture2D, false, false, false}; diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp new file mode 100644 index 000000000..7126caf98 --- /dev/null +++ b/src/video_core/shader/registry.cpp @@ -0,0 +1,127 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include "common/common_types.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/engines/shader_type.h" +#include "video_core/shader/registry.h" + +namespace VideoCommon::Shader { + +using Tegra::Engines::SamplerDescriptor; + +Registry::Registry(Tegra::Engines::ShaderType shader_stage, + VideoCore::GuestDriverProfile stored_guest_driver_profile) + : stage{shader_stage}, stored_guest_driver_profile{stored_guest_driver_profile} {} + +Registry::Registry(Tegra::Engines::ShaderType shader_stage, + Tegra::Engines::ConstBufferEngineInterface& engine) + : stage{shader_stage}, engine{&engine} {} + +Registry::~Registry() = default; + +std::optional Registry::ObtainKey(u32 buffer, u32 offset) { + const std::pair key = {buffer, offset}; + const auto iter = keys.find(key); + if (iter != keys.end()) { + return iter->second; + } + if (!engine) { + return std::nullopt; + } + const u32 value = engine->AccessConstBuffer32(stage, buffer, offset); + keys.emplace(key, value); + return value; +} + +std::optional Registry::ObtainBoundSampler(u32 offset) { + const u32 key = offset; + const auto iter = bound_samplers.find(key); + if (iter != bound_samplers.end()) { + return iter->second; + } + if (!engine) { + return std::nullopt; + } + const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset); + bound_samplers.emplace(key, value); + return value; +} + +std::optional Registry::ObtainBindlessSampler(u32 buffer, + u32 offset) { + const std::pair key = {buffer, offset}; + const auto iter = bindless_samplers.find(key); + if (iter != bindless_samplers.end()) { + return iter->second; + } + if (!engine) { + return std::nullopt; + } + const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset); + bindless_samplers.emplace(key, value); + return value; +} + +std::optional Registry::ObtainBoundBuffer() { + if (bound_buffer_saved) { + return bound_buffer; + } + if (!engine) { + return std::nullopt; + } + bound_buffer_saved = true; + bound_buffer = engine->GetBoundBuffer(); + return bound_buffer; +} + +void Registry::InsertKey(u32 buffer, u32 offset, u32 value) { + keys.insert_or_assign({buffer, offset}, value); +} + +void Registry::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) { + bound_samplers.insert_or_assign(offset, sampler); +} + +void Registry::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) { + bindless_samplers.insert_or_assign({buffer, offset}, sampler); +} + +void Registry::SetBoundBuffer(u32 buffer) { + bound_buffer_saved = true; + bound_buffer = buffer; +} + +bool Registry::IsConsistent() const { + if (!engine) { + return true; + } + return std::all_of(keys.begin(), keys.end(), + [this](const auto& pair) { + const auto [cbuf, offset] = pair.first; + const auto value = pair.second; + return value == engine->AccessConstBuffer32(stage, cbuf, offset); + }) && + std::all_of(bound_samplers.begin(), bound_samplers.end(), + [this](const auto& sampler) { + const auto [key, value] = sampler; + return value == engine->AccessBoundSampler(stage, key); + }) && + std::all_of(bindless_samplers.begin(), bindless_samplers.end(), + [this](const auto& sampler) { + const auto [cbuf, offset] = sampler.first; + const auto value = sampler.second; + return value == engine->AccessBindlessSampler(stage, cbuf, offset); + }); +} + +bool Registry::HasEqualKeys(const Registry& rhs) const { + return std::tie(keys, bound_samplers, bindless_samplers) == + std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers); +} + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h new file mode 100644 index 000000000..a5487e1d7 --- /dev/null +++ b/src/video_core/shader/registry.h @@ -0,0 +1,102 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include "common/common_types.h" +#include "common/hash.h" +#include "video_core/engines/const_buffer_engine_interface.h" +#include "video_core/engines/shader_type.h" +#include "video_core/guest_driver.h" + +namespace VideoCommon::Shader { + +using KeyMap = std::unordered_map, u32, Common::PairHash>; +using BoundSamplerMap = std::unordered_map; +using BindlessSamplerMap = + std::unordered_map, Tegra::Engines::SamplerDescriptor, Common::PairHash>; + +/** + * The Registry is a class use to interface the 3D and compute engines with the shader compiler. + * With it, the shader can obtain required data from GPU state and store it for disk shader + * compilation. + */ +class Registry { +public: + explicit Registry(Tegra::Engines::ShaderType shader_stage, + VideoCore::GuestDriverProfile stored_guest_driver_profile); + + explicit Registry(Tegra::Engines::ShaderType shader_stage, + Tegra::Engines::ConstBufferEngineInterface& engine); + + ~Registry(); + + /// Retrieves a key from the registry, if it's registered, it will give the registered value, if + /// not it will obtain it from maxwell3d and register it. + std::optional ObtainKey(u32 buffer, u32 offset); + + std::optional ObtainBoundSampler(u32 offset); + + std::optional ObtainBindlessSampler(u32 buffer, u32 offset); + + std::optional ObtainBoundBuffer(); + + /// Inserts a key. + void InsertKey(u32 buffer, u32 offset, u32 value); + + /// Inserts a bound sampler key. + void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler); + + /// Inserts a bindless sampler key. + void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); + + /// Set the bound buffer for this registry. + void SetBoundBuffer(u32 buffer); + + /// Checks keys and samplers against engine's current const buffers. + /// Returns true if they are the same value, false otherwise. + bool IsConsistent() const; + + /// Returns true if the keys are equal to the other ones in the registry. + bool HasEqualKeys(const Registry& rhs) const; + + /// Gives an getter to the const buffer keys in the database. + const KeyMap& GetKeys() const { + return keys; + } + + /// Gets samplers database. + const BoundSamplerMap& GetBoundSamplers() const { + return bound_samplers; + } + + /// Gets bindless samplers database. + const BindlessSamplerMap& GetBindlessSamplers() const { + return bindless_samplers; + } + + /// Gets bound buffer used on this shader + u32 GetBoundBuffer() const { + return bound_buffer; + } + + /// Obtains access to the guest driver's profile. + VideoCore::GuestDriverProfile& AccessGuestDriverProfile() { + return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile; + } + +private: + const Tegra::Engines::ShaderType stage; + VideoCore::GuestDriverProfile stored_guest_driver_profile; + Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; + KeyMap keys; + BoundSamplerMap bound_samplers; + BindlessSamplerMap bindless_samplers; + bool bound_buffer_saved{}; + u32 bound_buffer{}; +}; + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 3a5d280a9..425927777 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -11,6 +11,7 @@ #include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/shader/node_helper.h" +#include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { @@ -24,8 +25,8 @@ using Tegra::Shader::PredOperation; using Tegra::Shader::Register; ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, - ConstBufferLocker& locker) - : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} { + Registry& registry) + : program_code{program_code}, main_offset{main_offset}, settings{settings}, registry{registry} { Decode(); PostDecode(); } diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index b0851c3be..dde036b40 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -18,8 +18,8 @@ #include "video_core/engines/shader_header.h" #include "video_core/shader/ast.h" #include "video_core/shader/compiler_settings.h" -#include "video_core/shader/const_buffer_locker.h" #include "video_core/shader/node.h" +#include "video_core/shader/registry.h" namespace VideoCommon::Shader { @@ -69,7 +69,7 @@ struct GlobalMemoryUsage { class ShaderIR final { public: explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, - ConstBufferLocker& locker); + Registry& registry); ~ShaderIR(); const std::map& GetBasicBlocks() const { @@ -414,7 +414,7 @@ private: const ProgramCode& program_code; const u32 main_offset; const CompilerSettings settings; - ConstBufferLocker& locker; + Registry& registry; bool decompiled{}; bool disable_flow_stack{}; diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index b1a0aa00c..831219841 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -81,7 +81,7 @@ std::tuple ShaderIR::TrackBindlessSampler(Node tracked, cons MakeTrackSampler(cbuf->GetIndex(), immediate->GetValue()); return {tracked, track}; } else if (const auto operation = std::get_if(&*offset)) { - auto bound_buffer = locker.ObtainBoundBuffer(); + const auto bound_buffer = registry.ObtainBoundBuffer(); if (!bound_buffer) { return {}; } @@ -94,7 +94,7 @@ std::tuple ShaderIR::TrackBindlessSampler(Node tracked, cons } auto [gpr, base_offset] = *pair; const auto offset_inm = std::get_if(&*base_offset); - const auto& gpu_driver = locker.AccessGuestDriverProfile(); + const auto& gpu_driver = registry.AccessGuestDriverProfile(); const u32 bindless_cv = NewCustomVariable(); const Node op = Operation(OperationCode::UDiv, gpr, Immediate(gpu_driver.GetTextureHandlerSize())); -- cgit v1.2.3 From 0528be5c92db67b608dc64322c55e57629c80619 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 29 Feb 2020 03:49:51 -0300 Subject: shader/registry: Store graphics and compute metadata Store information GLSL forces us to provide but it's dynamic state in hardware (workgroup sizes, primitive topology, shared memory size). --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 28 ++++---- .../renderer_opengl/gl_shader_decompiler.cpp | 84 +++++++++++++++++----- .../renderer_opengl/gl_shader_decompiler.h | 6 +- .../renderer_opengl/gl_shader_disk_cache.cpp | 12 ++-- .../renderer_opengl/gl_shader_disk_cache.h | 4 +- src/video_core/shader/registry.cpp | 59 +++++++++------ src/video_core/shader/registry.h | 49 ++++++++++--- src/video_core/shader/track.cpp | 9 +-- 8 files changed, 176 insertions(+), 75 deletions(-) (limited to 'src/video_core/shader') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 87d25b5a5..72a5dc82a 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -166,8 +166,9 @@ std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) { std::shared_ptr MakeRegistry(const ShaderDiskCacheEntry& entry) { const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size}; - auto registry = std::make_shared(entry.type, guest_profile); - registry->SetBoundBuffer(entry.bound_buffer); + const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer, + entry.graphics_info, entry.compute_info}; + const auto registry = std::make_shared(entry.type, info); for (const auto& [address, value] : entry.keys) { const auto [buffer, offset] = address; registry->InsertKey(buffer, offset, value); @@ -184,9 +185,9 @@ std::shared_ptr MakeRegistry(const ShaderDiskCacheEntry& entry) { std::shared_ptr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier, const ShaderIR& ir, - bool hint_retrievable = false) { + const Registry& registry, bool hint_retrievable = false) { LOG_INFO(Render_OpenGL, "{}", MakeShaderID(unique_identifier, shader_type)); - const std::string glsl = DecompileShader(device, ir, shader_type); + const std::string glsl = DecompileShader(device, ir, registry, shader_type); OGLShader shader; shader.Create(glsl.c_str(), GetGLShaderType(shader_type)); @@ -239,7 +240,7 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, // if (!code_b.empty()) { // ir_b.emplace(code_b, STAGE_MAIN_OFFSET); // } - auto program = BuildShader(params.device, shader_type, params.unique_identifier, ir); + auto program = BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry); ShaderDiskCacheEntry entry; entry.type = shader_type; @@ -247,6 +248,7 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, entry.code_b = std::move(code_b); entry.unique_identifier = params.unique_identifier; entry.bound_buffer = registry->GetBoundBuffer(); + entry.graphics_info = registry->GetGraphicsInfo(); entry.keys = registry->GetKeys(); entry.bound_samplers = registry->GetBoundSamplers(); entry.bindless_samplers = registry->GetBindlessSamplers(); @@ -260,16 +262,18 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { const std::size_t size_in_bytes = code.size() * sizeof(u64); - auto registry = - std::make_shared(ShaderType::Compute, params.system.GPU().KeplerCompute()); + auto& engine = params.system.GPU().KeplerCompute(); + auto registry = std::make_shared(ShaderType::Compute, engine); const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry); - auto program = BuildShader(params.device, ShaderType::Compute, params.unique_identifier, ir); + const u64 uid = params.unique_identifier; + auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry); ShaderDiskCacheEntry entry; entry.type = ShaderType::Compute; entry.code = std::move(code); - entry.unique_identifier = params.unique_identifier; + entry.unique_identifier = uid; entry.bound_buffer = registry->GetBoundBuffer(); + entry.compute_info = registry->GetComputeInfo(); entry.keys = registry->GetKeys(); entry.bound_samplers = registry->GetBoundSamplers(); entry.bindless_samplers = registry->GetBindlessSamplers(); @@ -331,8 +335,8 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, return; } const auto& entry = (*transferable)[i]; - const u64 unique_identifier = entry.unique_identifier; - const auto it = find_precompiled(unique_identifier); + const u64 uid = entry.unique_identifier; + const auto it = find_precompiled(uid); const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr; const bool is_compute = entry.type == ShaderType::Compute; @@ -350,7 +354,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, } if (!program) { // Otherwise compile it from GLSL - program = BuildShader(device, entry.type, unique_identifier, ir, true); + program = BuildShader(device, entry.type, uid, ir, *registry, true); } PrecompiledShader shader; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 308e57aae..48a25f1f8 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -36,6 +36,7 @@ using Tegra::Shader::IpaInterpMode; using Tegra::Shader::IpaMode; using Tegra::Shader::IpaSampleMode; using Tegra::Shader::Register; +using VideoCommon::Shader::Registry; using namespace std::string_literals; using namespace VideoCommon::Shader; @@ -288,6 +289,30 @@ const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) { } } +/// Describes primitive behavior on geometry shaders +std::pair GetPrimitiveDescription(Maxwell::PrimitiveTopology topology) { + switch (topology) { + case Maxwell::PrimitiveTopology::Points: + return {"points", 1}; + case Maxwell::PrimitiveTopology::Lines: + case Maxwell::PrimitiveTopology::LineStrip: + return {"lines", 2}; + case Maxwell::PrimitiveTopology::LinesAdjacency: + case Maxwell::PrimitiveTopology::LineStripAdjacency: + return {"lines_adjacency", 4}; + case Maxwell::PrimitiveTopology::Triangles: + case Maxwell::PrimitiveTopology::TriangleStrip: + case Maxwell::PrimitiveTopology::TriangleFan: + return {"triangles", 3}; + case Maxwell::PrimitiveTopology::TrianglesAdjacency: + case Maxwell::PrimitiveTopology::TriangleStripAdjacency: + return {"triangles_adjacency", 6}; + default: + UNIMPLEMENTED_MSG("topology={}", static_cast(topology)); + return {"points", 1}; + } +} + /// Generates code to use for a swizzle operation. constexpr const char* GetSwizzle(std::size_t element) { constexpr std::array swizzle = {".x", ".y", ".z", ".w"}; @@ -367,15 +392,17 @@ std::string FlowStackTopName(MetaStackClass stack) { class GLSLDecompiler final { public: - explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderType stage, - std::string_view suffix) - : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {} + explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, + ShaderType stage, std::string_view suffix) + : device{device}, ir{ir}, registry{registry}, stage{stage}, suffix{suffix}, + header{ir.GetHeader()} {} void Decompile() { DeclareHeader(); DeclareVertex(); DeclareGeometry(); DeclareFragment(); + DeclareCompute(); DeclareRegisters(); DeclareCustomVariables(); DeclarePredicates(); @@ -489,9 +516,15 @@ private: return; } + const auto& info = registry.GetGraphicsInfo(); + const auto input_topology = info.primitive_topology; + const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(input_topology); + max_input_vertices = max_vertices; + code.AddLine("layout ({}) in;", glsl_topology); + const auto topology = GetTopologyName(header.common3.output_topology); - const auto max_vertices = header.common4.max_output_vertices.Value(); - code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_vertices); + const auto max_output_vertices = header.common4.max_output_vertices.Value(); + code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_output_vertices); code.AddNewLine(); code.AddLine("in gl_PerVertex {{"); @@ -513,7 +546,8 @@ private: if (!IsRenderTargetEnabled(render_target)) { continue; } - code.AddLine("layout (location = {}) out vec4 frag_color{};", render_target, render_target); + code.AddLine("layout (location = {}) out vec4 frag_color{};", render_target, + render_target); any = true; } if (any) { @@ -521,6 +555,20 @@ private: } } + void DeclareCompute() { + if (stage != ShaderType::Compute) { + return; + } + const auto& info = registry.GetComputeInfo(); + if (const u32 size = info.shared_memory_size_in_words; size > 0) { + code.AddLine("shared uint smem[];", size); + code.AddNewLine(); + } + code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;", + info.workgroup_size[0], info.workgroup_size[1], info.workgroup_size[2]); + code.AddNewLine(); + } + void DeclareVertexRedeclarations() { code.AddLine("out gl_PerVertex {{"); ++code.scope; @@ -596,18 +644,16 @@ private: } void DeclareLocalMemory() { + u64 local_memory_size = 0; if (stage == ShaderType::Compute) { - code.AddLine("#ifdef LOCAL_MEMORY_SIZE"); - code.AddLine("uint {}[LOCAL_MEMORY_SIZE];", GetLocalMemory()); - code.AddLine("#endif"); - return; + local_memory_size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL; + } else { + local_memory_size = header.GetLocalMemorySize(); } - - const u64 local_memory_size = header.GetLocalMemorySize(); if (local_memory_size == 0) { return; } - const auto element_count = Common::AlignUp(local_memory_size, 4) / 4; + const u64 element_count = Common::AlignUp(local_memory_size, 4) / 4; code.AddLine("uint {}[{}];", GetLocalMemory(), element_count); code.AddNewLine(); } @@ -996,7 +1042,8 @@ private: // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games // set an 0x80000000 index for those and the shader fails to build. Find out why // this happens and what's its intent. - return fmt::format("gs_{}[{} % MAX_VERTEX_INPUT]", name, Visit(buffer).AsUint()); + return fmt::format("gs_{}[{} % {}]", name, Visit(buffer).AsUint(), + max_input_vertices.value()); } return std::string(name); }; @@ -2428,11 +2475,14 @@ private: const Device& device; const ShaderIR& ir; + const Registry& registry; const ShaderType stage; const std::string_view suffix; const Header header; ShaderWriter code; + + std::optional max_input_vertices; }; std::string GetFlowVariable(u32 index) { @@ -2647,9 +2697,9 @@ ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir) { return entries; } -std::string DecompileShader(const Device& device, const ShaderIR& ir, ShaderType stage, - std::string_view suffix) { - GLSLDecompiler decompiler(device, ir, stage, suffix); +std::string DecompileShader(const Device& device, const ShaderIR& ir, const Registry& registry, + ShaderType stage, std::string_view suffix) { + GLSLDecompiler decompiler(device, ir, registry, stage, suffix); decompiler.Decompile(); return decompiler.GetResult(); } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index ae97ab504..68b68ee77 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -12,12 +12,9 @@ #include "common/common_types.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" +#include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" -namespace VideoCommon::Shader { -class ShaderIR; -} - namespace OpenGL { class Device; @@ -80,6 +77,7 @@ struct ShaderEntries { ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir); std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, + const VideoCommon::Shader::Registry& registry, Tegra::Engines::ShaderType stage, std::string_view suffix = {}); } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 0e1717c5e..5d5118058 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -48,7 +48,7 @@ struct BindlessSamplerKey { Tegra::Engines::SamplerDescriptor sampler; }; -constexpr u32 NativeVersion = 16; +constexpr u32 NativeVersion = 17; ShaderCacheVersionHash GetShaderCacheVersionHash() { ShaderCacheVersionHash hash{}; @@ -83,15 +83,16 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) { return false; } - bool is_texture_handler_size_known; + u8 is_texture_handler_size_known; u32 texture_handler_size_value; u32 num_keys; u32 num_bound_samplers; u32 num_bindless_samplers; if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 || file.ReadArray(&is_texture_handler_size_known, 1) != 1 || - file.ReadArray(&texture_handler_size_value, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 || - file.ReadArray(&num_bound_samplers, 1) != 1 || + file.ReadArray(&texture_handler_size_value, 1) != 1 || + file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 || + file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 || file.ReadArray(&num_bindless_samplers, 1) != 1) { return false; } @@ -136,8 +137,9 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const { } if (file.WriteObject(unique_identifier) != 1 || file.WriteObject(bound_buffer) != 1 || - file.WriteObject(texture_handler_size.has_value()) != 1 || + file.WriteObject(static_cast(texture_handler_size.has_value())) != 1 || file.WriteObject(texture_handler_size.value_or(0)) != 1 || + file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 || file.WriteObject(static_cast(keys.size())) != 1 || file.WriteObject(static_cast(bound_samplers.size())) != 1 || file.WriteObject(static_cast(bindless_samplers.size())) != 1) { diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index 7f2ab36be..d5be52e40 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -51,8 +51,10 @@ struct ShaderDiskCacheEntry { ProgramCode code_b; u64 unique_identifier = 0; - u32 bound_buffer = 0; std::optional texture_handler_size; + u32 bound_buffer = 0; + VideoCommon::Shader::GraphicsInfo graphics_info; + VideoCommon::Shader::ComputeInfo compute_info; VideoCommon::Shader::KeyMap keys; VideoCommon::Shader::BoundSamplerMap bound_samplers; VideoCommon::Shader::BindlessSamplerMap bindless_samplers; diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp index 7126caf98..dc2d3dce3 100644 --- a/src/video_core/shader/registry.cpp +++ b/src/video_core/shader/registry.cpp @@ -6,21 +6,55 @@ #include #include "common/common_types.h" +#include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" #include "video_core/shader/registry.h" namespace VideoCommon::Shader { +using Tegra::Engines::ConstBufferEngineInterface; using Tegra::Engines::SamplerDescriptor; +using Tegra::Engines::ShaderType; -Registry::Registry(Tegra::Engines::ShaderType shader_stage, - VideoCore::GuestDriverProfile stored_guest_driver_profile) - : stage{shader_stage}, stored_guest_driver_profile{stored_guest_driver_profile} {} +namespace { + +GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) { + if (shader_stage == ShaderType::Compute) { + return {}; + } + auto& graphics = static_cast(engine); + + GraphicsInfo info; + info.primitive_topology = graphics.regs.draw.topology; + return info; +} + +ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) { + if (shader_stage != ShaderType::Compute) { + return {}; + } + auto& compute = static_cast(engine); + const auto& launch = compute.launch_description; + + ComputeInfo info; + info.workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z}; + info.local_memory_size_in_words = launch.local_pos_alloc; + info.shared_memory_size_in_words = launch.shared_alloc; + return info; +} + +} // Anonymous namespace + +Registry::Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info) + : stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile}, + bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {} Registry::Registry(Tegra::Engines::ShaderType shader_stage, Tegra::Engines::ConstBufferEngineInterface& engine) - : stage{shader_stage}, engine{&engine} {} + : stage{shader_stage}, engine{&engine}, bound_buffer{engine.GetBoundBuffer()}, + graphics_info{MakeGraphicsInfo(shader_stage, engine)}, compute_info{MakeComputeInfo( + shader_stage, engine)} {} Registry::~Registry() = default; @@ -67,18 +101,6 @@ std::optional Registry::ObtainBindlessSampler return value; } -std::optional Registry::ObtainBoundBuffer() { - if (bound_buffer_saved) { - return bound_buffer; - } - if (!engine) { - return std::nullopt; - } - bound_buffer_saved = true; - bound_buffer = engine->GetBoundBuffer(); - return bound_buffer; -} - void Registry::InsertKey(u32 buffer, u32 offset, u32 value) { keys.insert_or_assign({buffer, offset}, value); } @@ -91,11 +113,6 @@ void Registry::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor s bindless_samplers.insert_or_assign({buffer, offset}, sampler); } -void Registry::SetBoundBuffer(u32 buffer) { - bound_buffer_saved = true; - bound_buffer = buffer; -} - bool Registry::IsConsistent() const { if (!engine) { return true; diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h index a5487e1d7..c1a04ea02 100644 --- a/src/video_core/shader/registry.h +++ b/src/video_core/shader/registry.h @@ -4,11 +4,16 @@ #pragma once +#include #include +#include #include +#include + #include "common/common_types.h" #include "common/hash.h" #include "video_core/engines/const_buffer_engine_interface.h" +#include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" #include "video_core/guest_driver.h" @@ -19,6 +24,25 @@ using BoundSamplerMap = std::unordered_map, Tegra::Engines::SamplerDescriptor, Common::PairHash>; +struct GraphicsInfo { + Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology primitive_topology{}; +}; +static_assert(std::is_trivially_copyable_v); + +struct ComputeInfo { + std::array workgroup_size{}; + u32 shared_memory_size_in_words = 0; + u32 local_memory_size_in_words = 0; +}; +static_assert(std::is_trivially_copyable_v); + +struct SerializedRegistryInfo { + VideoCore::GuestDriverProfile guest_driver_profile; + u32 bound_buffer = 0; + GraphicsInfo graphics; + ComputeInfo compute; +}; + /** * The Registry is a class use to interface the 3D and compute engines with the shader compiler. * With it, the shader can obtain required data from GPU state and store it for disk shader @@ -26,8 +50,7 @@ using BindlessSamplerMap = */ class Registry { public: - explicit Registry(Tegra::Engines::ShaderType shader_stage, - VideoCore::GuestDriverProfile stored_guest_driver_profile); + explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info); explicit Registry(Tegra::Engines::ShaderType shader_stage, Tegra::Engines::ConstBufferEngineInterface& engine); @@ -42,8 +65,6 @@ public: std::optional ObtainBindlessSampler(u32 buffer, u32 offset); - std::optional ObtainBoundBuffer(); - /// Inserts a key. void InsertKey(u32 buffer, u32 offset, u32 value); @@ -53,9 +74,6 @@ public: /// Inserts a bindless sampler key. void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); - /// Set the bound buffer for this registry. - void SetBoundBuffer(u32 buffer); - /// Checks keys and samplers against engine's current const buffers. /// Returns true if they are the same value, false otherwise. bool IsConsistent() const; @@ -83,6 +101,18 @@ public: return bound_buffer; } + /// Returns compute information from this shader + const GraphicsInfo& GetGraphicsInfo() const { + ASSERT(stage != Tegra::Engines::ShaderType::Compute); + return graphics_info; + } + + /// Returns compute information from this shader + const ComputeInfo& GetComputeInfo() const { + ASSERT(stage == Tegra::Engines::ShaderType::Compute); + return compute_info; + } + /// Obtains access to the guest driver's profile. VideoCore::GuestDriverProfile& AccessGuestDriverProfile() { return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile; @@ -95,8 +125,9 @@ private: KeyMap keys; BoundSamplerMap bound_samplers; BindlessSamplerMap bindless_samplers; - bool bound_buffer_saved{}; - u32 bound_buffer{}; + u32 bound_buffer; + GraphicsInfo graphics_info; + ComputeInfo compute_info; }; } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index 831219841..10739b37d 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -81,14 +81,11 @@ std::tuple ShaderIR::TrackBindlessSampler(Node tracked, cons MakeTrackSampler(cbuf->GetIndex(), immediate->GetValue()); return {tracked, track}; } else if (const auto operation = std::get_if(&*offset)) { - const auto bound_buffer = registry.ObtainBoundBuffer(); - if (!bound_buffer) { + const u32 bound_buffer = registry.GetBoundBuffer(); + if (bound_buffer != cbuf->GetIndex()) { return {}; } - if (*bound_buffer != cbuf->GetIndex()) { - return {}; - } - auto pair = DecoupleIndirectRead(*operation); + const auto pair = DecoupleIndirectRead(*operation); if (!pair) { return {}; } -- cgit v1.2.3 From 66a8a3e88719aaa65a96dd0289e1fb151d199d9b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 29 Feb 2020 04:03:22 -0300 Subject: shader/registry: Cache tessellation state --- src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | 2 +- src/video_core/shader/registry.cpp | 3 +++ src/video_core/shader/registry.h | 8 ++++++-- 3 files changed, 10 insertions(+), 3 deletions(-) (limited to 'src/video_core/shader') diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 5d5118058..df86c0cc3 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -48,7 +48,7 @@ struct BindlessSamplerKey { Tegra::Engines::SamplerDescriptor sampler; }; -constexpr u32 NativeVersion = 17; +constexpr u32 NativeVersion = 18; ShaderCacheVersionHash GetShaderCacheVersionHash() { ShaderCacheVersionHash hash{}; diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp index dc2d3dce3..90dfab293 100644 --- a/src/video_core/shader/registry.cpp +++ b/src/video_core/shader/registry.cpp @@ -27,6 +27,9 @@ GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterfac GraphicsInfo info; info.primitive_topology = graphics.regs.draw.topology; + info.tessellation_primitive = graphics.regs.tess_mode.prim; + info.tessellation_spacing = graphics.regs.tess_mode.spacing; + info.tessellation_clockwise = graphics.regs.tess_mode.cw; return info; } diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h index c1a04ea02..7b7fad3d1 100644 --- a/src/video_core/shader/registry.h +++ b/src/video_core/shader/registry.h @@ -26,15 +26,19 @@ using BindlessSamplerMap = struct GraphicsInfo { Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology primitive_topology{}; + Tegra::Engines::Maxwell3D::Regs::TessellationPrimitive tessellation_primitive{}; + Tegra::Engines::Maxwell3D::Regs::TessellationSpacing tessellation_spacing{}; + bool tessellation_clockwise = false; }; -static_assert(std::is_trivially_copyable_v); +static_assert(std::is_trivially_copyable_v && + std::is_standard_layout_v); struct ComputeInfo { std::array workgroup_size{}; u32 shared_memory_size_in_words = 0; u32 local_memory_size_in_words = 0; }; -static_assert(std::is_trivially_copyable_v); +static_assert(std::is_trivially_copyable_v && std::is_standard_layout_v); struct SerializedRegistryInfo { VideoCore::GuestDriverProfile guest_driver_profile; -- cgit v1.2.3 From b1acb4f73f79a555480d1405bc9732cab111f6e2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 2 Mar 2020 01:08:10 -0300 Subject: shader/registry: Address feedback --- .../renderer_opengl/gl_shader_decompiler.cpp | 2 +- src/video_core/shader/registry.cpp | 11 +++++++++++ src/video_core/shader/registry.h | 18 ++++++------------ 3 files changed, 18 insertions(+), 13 deletions(-) (limited to 'src/video_core/shader') diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index cb89daba1..0108e708c 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -57,7 +57,7 @@ using TextureIR = std::variant constexpr u32 MAX_CONSTBUFFER_ELEMENTS = static_cast(Maxwell::MaxConstBufferSize) / (4 * sizeof(float)); -std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt +constexpr std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt #define ftou floatBitsToUint #define itof intBitsToFloat #define utof uintBitsToFloat diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp index 90dfab293..4a1e16c1e 100644 --- a/src/video_core/shader/registry.cpp +++ b/src/video_core/shader/registry.cpp @@ -5,6 +5,7 @@ #include #include +#include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" @@ -144,4 +145,14 @@ bool Registry::HasEqualKeys(const Registry& rhs) const { std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers); } +const GraphicsInfo& Registry::GetGraphicsInfo() const { + ASSERT(stage != Tegra::Engines::ShaderType::Compute); + return graphics_info; +} + +const ComputeInfo& Registry::GetComputeInfo() const { + ASSERT(stage == Tegra::Engines::ShaderType::Compute); + return compute_info; +} + } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h index 7b7fad3d1..07998c4db 100644 --- a/src/video_core/shader/registry.h +++ b/src/video_core/shader/registry.h @@ -85,6 +85,12 @@ public: /// Returns true if the keys are equal to the other ones in the registry. bool HasEqualKeys(const Registry& rhs) const; + /// Returns graphics information from this shader + const GraphicsInfo& GetGraphicsInfo() const; + + /// Returns compute information from this shader + const ComputeInfo& GetComputeInfo() const; + /// Gives an getter to the const buffer keys in the database. const KeyMap& GetKeys() const { return keys; @@ -105,18 +111,6 @@ public: return bound_buffer; } - /// Returns compute information from this shader - const GraphicsInfo& GetGraphicsInfo() const { - ASSERT(stage != Tegra::Engines::ShaderType::Compute); - return graphics_info; - } - - /// Returns compute information from this shader - const ComputeInfo& GetComputeInfo() const { - ASSERT(stage == Tegra::Engines::ShaderType::Compute); - return compute_info; - } - /// Obtains access to the guest driver's profile. VideoCore::GuestDriverProfile& AccessGuestDriverProfile() { return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile; -- cgit v1.2.3 From eb5861e0a22851cd2b2ca38136bfc7870790836e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 2 Mar 2020 01:54:00 -0300 Subject: engines/maxwell_3d: Add TFB registers and store them in shader registry --- src/video_core/engines/maxwell_3d.h | 34 ++++++++++++++++++++-- .../renderer_opengl/gl_shader_disk_cache.cpp | 2 +- src/video_core/shader/registry.cpp | 3 ++ src/video_core/shader/registry.h | 12 ++++++-- 4 files changed, 45 insertions(+), 6 deletions(-) (limited to 'src/video_core/shader') diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 491cff370..7000b0589 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -67,6 +67,7 @@ public: static constexpr std::size_t NumVaryings = 31; static constexpr std::size_t NumImages = 8; // TODO(Rodrigo): Investigate this number static constexpr std::size_t NumClipDistances = 8; + static constexpr std::size_t NumTransformFeedbackBuffers = 4; static constexpr std::size_t MaxShaderProgram = 6; static constexpr std::size_t MaxShaderStage = 5; // Maximum number of const buffers per shader stage. @@ -621,6 +622,22 @@ public: float depth_range_far; }; + struct alignas(32) TransformFeedbackBinding { + u32 buffer_enable; + u32 address_high; + u32 address_low; + s32 buffer_size; + s32 buffer_offset; + }; + static_assert(sizeof(TransformFeedbackBinding) == 32); + + struct alignas(16) TransformFeedbackLayout { + u32 stream; + u32 varying_count; + u32 stride; + }; + static_assert(sizeof(TransformFeedbackLayout) == 16); + bool IsShaderConfigEnabled(std::size_t index) const { // The VertexB is always enabled. if (index == static_cast(Regs::ShaderProgram::VertexB)) { @@ -677,7 +694,13 @@ public: u32 rasterize_enable; - INSERT_UNION_PADDING_WORDS(0xF1); + std::array tfb_bindings; + + INSERT_UNION_PADDING_WORDS(0xC0); + + std::array tfb_layouts; + + INSERT_UNION_PADDING_WORDS(0x1); u32 tfb_enabled; @@ -1187,7 +1210,11 @@ public: u32 tex_cb_index; - INSERT_UNION_PADDING_WORDS(0x395); + INSERT_UNION_PADDING_WORDS(0x7D); + + std::array, NumTransformFeedbackBuffers> tfb_varying_locs; + + INSERT_UNION_PADDING_WORDS(0x298); struct { /// Compressed address of a buffer that holds information about bound SSBOs. @@ -1413,6 +1440,8 @@ ASSERT_REG_POSITION(tess_mode, 0xC8); ASSERT_REG_POSITION(tess_level_outer, 0xC9); ASSERT_REG_POSITION(tess_level_inner, 0xCD); ASSERT_REG_POSITION(rasterize_enable, 0xDF); +ASSERT_REG_POSITION(tfb_bindings, 0xE0); +ASSERT_REG_POSITION(tfb_layouts, 0x1C0); ASSERT_REG_POSITION(tfb_enabled, 0x1D1); ASSERT_REG_POSITION(rt, 0x200); ASSERT_REG_POSITION(viewport_transform, 0x280); @@ -1508,6 +1537,7 @@ ASSERT_REG_POSITION(firmware, 0x8C0); ASSERT_REG_POSITION(const_buffer, 0x8E0); ASSERT_REG_POSITION(cb_bind[0], 0x904); ASSERT_REG_POSITION(tex_cb_index, 0x982); +ASSERT_REG_POSITION(tfb_varying_locs, 0xA00); ASSERT_REG_POSITION(ssbo_info, 0xD18); ASSERT_REG_POSITION(tex_info_buffers.address[0], 0xD2A); ASSERT_REG_POSITION(tex_info_buffers.size[0], 0xD2F); diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 3b0db5393..9e95a122b 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -48,7 +48,7 @@ struct BindlessSamplerKey { Tegra::Engines::SamplerDescriptor sampler; }; -constexpr u32 NativeVersion = 19; +constexpr u32 NativeVersion = 20; ShaderCacheVersionHash GetShaderCacheVersionHash() { ShaderCacheVersionHash hash{}; diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp index 4a1e16c1e..af70b3f35 100644 --- a/src/video_core/shader/registry.cpp +++ b/src/video_core/shader/registry.cpp @@ -27,9 +27,12 @@ GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterfac auto& graphics = static_cast(engine); GraphicsInfo info; + info.tfb_layouts = graphics.regs.tfb_layouts; + info.tfb_varying_locs = graphics.regs.tfb_varying_locs; info.primitive_topology = graphics.regs.draw.topology; info.tessellation_primitive = graphics.regs.tess_mode.prim; info.tessellation_spacing = graphics.regs.tess_mode.spacing; + info.tfb_enabled = graphics.regs.tfb_enabled; info.tessellation_clockwise = graphics.regs.tess_mode.cw; return info; } diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h index 07998c4db..0c80d35fd 100644 --- a/src/video_core/shader/registry.h +++ b/src/video_core/shader/registry.h @@ -25,9 +25,15 @@ using BindlessSamplerMap = std::unordered_map, Tegra::Engines::SamplerDescriptor, Common::PairHash>; struct GraphicsInfo { - Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology primitive_topology{}; - Tegra::Engines::Maxwell3D::Regs::TessellationPrimitive tessellation_primitive{}; - Tegra::Engines::Maxwell3D::Regs::TessellationSpacing tessellation_spacing{}; + using Maxwell = Tegra::Engines::Maxwell3D::Regs; + + std::array + tfb_layouts{}; + std::array, Maxwell::NumTransformFeedbackBuffers> tfb_varying_locs{}; + Maxwell::PrimitiveTopology primitive_topology{}; + Maxwell::TessellationPrimitive tessellation_primitive{}; + Maxwell::TessellationSpacing tessellation_spacing{}; + bool tfb_enabled = false; bool tessellation_clockwise = false; }; static_assert(std::is_trivially_copyable_v && -- cgit v1.2.3